In [4]:
import pandas as pd
import numpy as np

In [61]:
FEATURES_QUALI = 'features_quali_complete_P3.csv'
quali_features = pd.read_csv(FEATURES_QUALI, index_col=False)

In [62]:
quali_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1603 entries, 0 to 1602
Data columns (total 52 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Time                     1603 non-null   object 
 1   Driver                   1603 non-null   object 
 2   DriverNumber             1603 non-null   int64  
 3   Team                     1603 non-null   object 
 4   LapTime                  1603 non-null   float64
 5   LapNumber                1603 non-null   float64
 6   Stint                    1603 non-null   float64
 7   Sector1Time              1603 non-null   float64
 8   Sector2Time              1603 non-null   float64
 9   Sector3Time              1603 non-null   float64
 10  Compound                 1601 non-null   object 
 11  TyreLife                 1601 non-null   float64
 12  FreshTyre                1603 non-null   bool   
 13  AirTemp                  1603 non-null   float64
 14  TrackTemp               

In [63]:
quali_features['in_q3'] = quali_features['Quali_Rank'].apply(lambda x: 1 if x <= 10 else 0)

def map_compound(row):
    if row['Compound'] == 'SOFT':
        return row['Soft']
    elif row['Compound'] == 'MEDIUM':
        return row['Medium']
    elif row['Compound'] == 'HARD':
        return row['Hard']
    else:
        return None
    
quali_features['Compound'] = quali_features.apply(map_compound, axis=1)

compound_map = {'C6' : 6, 'C5' : 5, 'C4' : 4, 'C3' : 3, 'C2' : 2, 'C1' : 1} # wet = 0
quali_features['Compound'] = quali_features['Compound'].map(compound_map).fillna(0)

In [64]:
# --- 2. Driver_Avg_Quali_Pos ---
avg_quali_position = quali_features.groupby('Driver')['Quali_Rank'].mean().reset_index()
avg_quali_position = avg_quali_position.rename(columns={'Quali_Rank': ' Driver_Avg_Quali_Pos'})
quali_features = pd.merge(quali_features, avg_quali_position, on='Driver', how='left')

In [65]:
# --- 3. Race_Weekend_Index  ---
event_order = quali_features[['Year', 'EventDate', 'Location']].drop_duplicates().copy()
event_order = event_order.sort_values(by=['Year', 'EventDate']).reset_index(drop=True)
event_order['Race_Weekend_Index'] = event_order.groupby('Year').cumcount() + 1
quali_features = pd.merge(quali_features, event_order[['Year', 'Location', 'Race_Weekend_Index']], 
                          on=['Year', 'Location'], how='left')

In [66]:
# --- 4. Driver_Quali_Gap_to_Max_EMA ---
pole_times = quali_features.groupby(['Year', 'Location'])['LapTime'].min().reset_index()
pole_times = pole_times.rename(columns={'LapTime': 'Pole_Time'})
quali_features = pd.merge(quali_features, pole_times, on=['Year', 'Location'], how='left')
quali_features['Quali_Gap_To_Pole'] = quali_features['LapTime'] - quali_features['Pole_Time']

quali_features = quali_features.sort_values(by=['Driver', 'EventDate']).reset_index(drop=True)

ema_span=5
quali_features['Driver_Quali_Gap_To_Max_EMA'] = quali_features.groupby('Driver')['Quali_Gap_To_Pole'].transform(
    lambda x: x.ewm(span=ema_span, adjust=False).mean().shift(1)
)

quali_features = quali_features.drop(columns=['Pole_Time', 'Quali_Gap_To_Pole'])

In [67]:
quali_features

Unnamed: 0,Time,Driver,DriverNumber,Team,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,...,Peak_Front_Load_N,Peak_Rear_Load_N,Team_Pace_EMA,Driver_Teammate_Gap_EMA,Driver_Consistency_EMA,Degradation_EMA,in_q3,Driver_Avg_Quali_Pos,Race_Weekend_Index,Driver_Quali_Gap_To_Max_EMA
0,0 days 00:50:37.212000,ALB,23,Williams,92.664,12.0,5.0,29.564,39.897,23.203,...,15094.561030,23008.193641,1.351977,-0.385488,1.410489,0.157463,0,13.013514,1,
1,0 days 00:39:32.934000,ALB,23,Williams,90.492,8.0,2.0,33.167,28.975,28.350,...,16334.826738,24520.603770,1.476061,-0.908578,1.465950,0.055753,0,13.013514,2,2.106000
2,0 days 00:29:21.848000,ALB,23,Williams,80.135,7.0,2.0,27.401,18.067,34.667,...,15688.066317,23074.050464,1.124655,-0.717575,1.537510,-0.004889,0,13.013514,3,2.168000
3,0 days 00:29:34.514000,ALB,23,Williams,91.266,7.0,2.0,30.641,34.810,25.815,...,17478.415292,25773.690431,0.924819,-0.755314,2.268193,-0.035858,0,13.013514,5,2.201000
4,0 days 00:24:43.480000,ALB,23,Williams,81.645,2.0,1.0,22.570,30.748,28.327,...,16361.365931,24028.375298,1.112601,-0.295465,1.787791,0.039706,0,13.013514,6,2.290667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1598,0 days 00:29:27.866000,ZHO,24,Kick Sauber,78.072,8.0,3.0,27.746,30.217,20.109,...,17443.246536,26804.431118,0.952431,0.075067,1.122661,-0.003617,0,16.174603,19,2.465954
1599,0 days 00:42:08.263000,ZHO,24,Kick Sauber,92.263,11.0,2.0,23.896,48.959,19.408,...,13464.939348,20916.968636,0.977886,0.062018,1.785797,-0.079070,0,16.174603,20,2.352636
1600,0 days 00:54:08.938000,ZHO,24,Kick Sauber,93.566,16.0,4.0,26.355,31.266,35.945,...,18240.715519,27486.964054,0.822273,-0.251906,1.600517,-0.025989,0,16.174603,21,4.521091
1601,0 days 00:54:44.374000,ZHO,24,Kick Sauber,81.501,18.0,4.0,29.872,27.533,24.096,...,14912.742183,23875.063645,0.749633,-0.540741,1.524763,-0.076680,0,16.174603,22,3.432060


In [None]:
# --- 5. Team_Track_Score (Compatibilità Storica Team-Circuito) ---
team_track_score = df.groupby(['Team', 'Location'])['Quali_Rank'].mean().reset_index()
team_track_score = team_track_score.rename(columns={'Quali_Rank': 'Team_Track_Score'})
df = pd.merge(df, team_track_score, on=['Team', 'Location'], how='left')

In [50]:
pole_times

Unnamed: 0,Year,Location,Pole_Time
0,2022,Austin,94.421
1,2022,Barcelona,78.750
2,2022,Budapest,77.377
3,2022,Imola,78.793
4,2022,Jeddah,88.200
...,...,...,...
79,2025,Silverstone,84.892
80,2025,Spa-Francorchamps,100.562
81,2025,Spielberg,63.971
82,2025,Suzuka,86.983


In [42]:
event_order

Unnamed: 0,Year,EventDate,Location,Race_Weekend_Index
0,2022,2022-03-20,Sakhir,1
1,2022,2022-03-27,Jeddah,2
2,2022,2022-04-10,Melbourne,3
3,2022,2022-04-24,Imola,4
4,2022,2022-05-08,Miami,5
...,...,...,...,...
79,2025,2025-08-31,Zandvoort,15
80,2025,2025-09-07,Monza,16
81,2025,2025-09-21,Baku,17
82,2025,2025-10-05,Marina Bay,18


In [45]:
quali_features

Unnamed: 0,Time,Driver,DriverNumber,Team,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,...,Peak_Drag_N,Peak_Front_Load_N,Peak_Rear_Load_N,Team_Pace_EMA,Driver_Teammate_Gap_EMA,Driver_Consistency_EMA,Degradation_EMA,in_q3,Driver_Avg_Quali_Pos,Race_Weekend_Index
0,0 days 01:14:47.968000,LEC,16,Ferrari,90.558,14.0,5.0,29.115,38.702,22.741,...,8124.642361,15967.697325,23405.277478,-1.606421,-0.418709,1.294945,0.138100,1,4.609756,1
1,0 days 01:15:15.614000,VER,1,Red Bull Racing,90.681,13.0,4.0,28.970,38.832,22.879,...,8330.321373,15830.695983,23927.015267,-1.454666,-0.214258,1.176590,0.131253,1,2.855422,1
2,0 days 01:07:34.650000,SAI,55,Ferrari,90.687,11.0,4.0,29.036,38.842,22.809,...,8073.624325,15139.076301,23294.986842,-1.606421,0.418709,1.285464,0.088300,1,6.092105,1
3,0 days 01:15:21.593000,PER,11,Red Bull Racing,90.921,17.0,6.0,29.180,38.894,22.847,...,8382.142843,16030.054736,24011.176938,-1.454666,0.214258,1.278144,0.114015,1,7.737705,1
4,0 days 00:55:33.904000,HAM,44,Mercedes,91.048,10.0,3.0,29.100,39.038,22.910,...,8073.624325,14955.892800,23263.427286,-0.951462,-0.340234,1.172743,0.100051,1,7.172840,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1598,0 days 00:42:17.817000,COL,43,Alpine,94.039,7.0,3.0,25.323,37.874,30.842,...,8330.321373,16022.029189,23903.996256,0.809274,0.023433,1.185255,-0.037586,0,15.750000,19
1599,0 days 00:41:21.876000,BOR,5,Kick Sauber,94.125,7.0,3.0,25.326,38.036,30.763,...,8382.142843,15790.023815,24004.686968,0.304365,0.300834,1.148753,-0.009591,0,13.833333,19
1600,0 days 00:41:40.433000,OCO,31,Haas F1 Team,94.136,7.0,3.0,25.431,37.931,30.774,...,8434.125000,15701.891150,24134.756086,0.429606,0.458798,1.294687,-0.027581,0,12.734177,19
1601,0 days 00:32:21.571000,STR,18,Aston Martin,94.540,3.0,2.0,25.614,38.008,30.918,...,8382.142843,15360.052484,24036.061922,0.294133,0.250399,1.204013,-0.036862,0,13.987179,19


In [None]:
drop_columns = ['Time', 'DriverNumber', 'Soft', 'Medium', 'Hard', 'Country', 'EventDate',
                'LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'Quali_Rank',
                'LapNumber', 'TyreLife', 'FreshTyre', 'Stint']

quali_features = quali_features.drop(columns=drop_columns)

quali_features_encoded = features_encoded = pd.get_dummies(quali_features, columns=['Driver','Team','Location', 'Year'], drop_first=False)