In [3]:
import pandas as pd
import numpy as np
import sklearn
import fastf1

In [4]:
pip install tqdm

Note: you may need to restart the kernel to use updated packages.


In [5]:
from tqdm import tqdm

In [6]:
def get_session_data(year, session_type, event_list=None):
    all_data = []

    # Use full schedule if available
    if event_list is None:
        schedule = fastf1.get_event_schedule(year)
        event_list = schedule['EventName'].tolist()

    for gp_name in tqdm(event_list, desc=f"{year} {session_type}"):
        try:
            session = fastf1.get_session(year, gp_name, session_type)
            session.load()

            laps = session.laps
            if laps.empty:
                continue

            laps['Year'] = year
            laps['EventName'] = gp_name
            laps['SessionType'] = session_type
            all_data.append(laps)

        except Exception as e:
            print(f"Skipped {year} {gp_name} {session_type}: {e}")
            continue

    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()


In [7]:
df_q_2024 = get_session_data(2024, 'Q')
df_r_2024 = get_session_data(2024, 'R')

core           INFO 	Loading data for Singapore Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '44', '63', '81', '27', '14', '22', '16', '55', '23', '43', '11', '20', '31', '3', '18', '10', '77', '24']
2024 Q:   4%|█▍                                  | 1/25 [00:01<00:37,  1.56s/it]core           INFO 	Loadi

In [8]:
completed_2025_races = [
    'Bahrain', 'Saudi Arabia', 'Australia', 'Japan',
    'China', 'Miami', 'Emilia Romagna', 'Monaco',
    'Canada', 'Spain', 'Austria', 'Great Britain'
]

In [9]:
df_q_2025 = get_session_data(2025, 'Q', completed_2025_races)
df_r_2025 = get_session_data(2025, 'R', completed_2025_races)

2025 Q:   0%|                                            | 0/12 [00:00<?, ?it/s]core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.6.0]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching

In [12]:
df_all = pd.concat([df_q_2024, df_r_2024, df_q_2025, df_r_2025], ignore_index=True)

In [13]:
df_all.to_csv('f1_2024_2025_q_r.csv', index=False)

In [14]:
df_all.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,Year,EventName,SessionType
0,0 days 00:17:36.482000,NOR,4,NaT,1.0,1.0,0 days 00:15:24.715000,NaT,NaT,0 days 00:00:51.613000,...,2024-09-21 13:01:38.552,1,,False,,False,False,2024,Pre-Season Testing,Q
1,0 days 00:19:07.206000,NOR,4,0 days 00:01:30.724000,2.0,1.0,NaT,NaT,0 days 00:00:26.863000,0 days 00:00:38.252000,...,2024-09-21 13:03:50.319,1,,False,,False,True,2024,Pre-Season Testing,Q
2,0 days 00:21:48.149000,NOR,4,NaT,3.0,1.0,NaT,0 days 00:21:12.899000,0 days 00:00:36.760000,0 days 00:00:53.201000,...,2024-09-21 13:05:21.043,1,,False,,False,False,2024,Pre-Season Testing,Q
3,0 days 00:26:55.559000,NOR,4,NaT,4.0,2.0,0 days 00:24:57.106000,NaT,NaT,0 days 00:00:50.524000,...,2024-09-21 13:08:01.986,1,,False,,False,False,2024,Pre-Season Testing,Q
4,0 days 00:28:25.561000,NOR,4,0 days 00:01:30.002000,5.0,2.0,NaT,NaT,0 days 00:00:26.735000,0 days 00:00:37.824000,...,2024-09-21 13:13:09.396,1,,False,,False,True,2024,Pre-Season Testing,Q


In [15]:
df_all.describe()

Unnamed: 0,Time,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,...,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,TyreLife,LapStartTime,LapStartDate,Position,Year
count,52923,48723,52923.0,52477.0,4892,4887,49200,52679,51917,49108,...,51917,46331.0,52655.0,47964.0,49456.0,52439.0,52923,52897,41253.0,52923.0
mean,0 days 01:31:48.330765640,0 days 00:01:32.216075139,26.108308,2.249671,0 days 00:57:30.586132665,0 days 01:01:23.275675670,0 days 00:00:28.491929817,0 days 00:00:35.216044799,0 days 00:00:28.735686942,0 days 01:33:54.150987782,...,0 days 01:32:19.661339330,248.693618,243.654544,270.551539,287.589959,12.25666,0 days 01:29:57.130230995,2024-10-27 13:33:34.880198912,9.740698,2024.326399
min,0 days 00:05:49.596000,0 days 00:01:03.971000,1.0,1.0,0 days 00:04:17.809000,0 days 00:14:54.121000,0 days 00:00:16.251000,0 days 00:00:16.915000,0 days 00:00:17.205000,0 days 00:06:17.920000,...,0 days 00:05:49.775000,45.0,26.0,1.0,25.0,1.0,0 days 00:04:05.608000,2024-03-01 16:00:11.226000,1.0,2024.0
25%,0 days 01:05:40.739000,0 days 00:01:20.989000,10.0,1.0,0 days 00:27:14.425750,0 days 00:32:52.987500,0 days 00:00:23.794000,0 days 00:00:29.549000,0 days 00:00:22.987000,0 days 01:08:49.408750,...,0 days 01:06:14.610000,213.0,207.0,255.0,281.0,4.0,0 days 01:03:56.730000,2024-06-23 13:24:16.952999936,5.0,2024.0
50%,0 days 01:32:05.128000,0 days 00:01:30.763000,22.0,2.0,0 days 00:49:57.763000,0 days 00:55:18.517000,0 days 00:00:28.334000,0 days 00:00:33.624000,0 days 00:00:26.949000,0 days 01:34:53.600000,...,0 days 01:32:55.727000,260.0,254.0,276.0,299.0,9.0,0 days 01:30:28.315000,2024-09-22 13:06:04.368999936,10.0,2024.0
75%,0 days 02:01:22.254500,0 days 00:01:38.692000,41.0,3.0,0 days 01:18:56.424500,0 days 01:20:22.716000,0 days 00:00:31.588250,0 days 00:00:41.483500,0 days 00:00:32.512000,0 days 02:02:25.925000,...,0 days 02:01:56.392000,287.0,278.0,286.0,312.0,18.0,0 days 01:59:52.701500,2025-04-12 16:13:01.144999936,14.0,2025.0
max,0 days 03:20:09.121000,0 days 00:42:06.253000,78.0,8.0,0 days 03:11:24.136000,0 days 03:10:59.688000,0 days 00:01:39.178000,0 days 00:01:30.308000,0 days 00:01:39.438000,0 days 03:19:06.894000,...,0 days 03:20:09.129000,357.0,344.0,357.0,362.0,78.0,0 days 03:18:45.420000,2025-06-29 14:41:45.733000,20.0,2025.0
std,0 days 00:38:24.282467585,0 days 00:00:45.870915412,18.705277,1.238152,0 days 00:35:00.357642438,0 days 00:32:46.884695668,0 days 00:00:07.768660355,0 days 00:00:07.973906271,0 days 00:00:08.516293985,0 days 00:37:26.874190516,...,0 days 00:38:25.359294147,47.480897,46.526827,30.923368,43.991541,10.899986,0 days 00:38:47.478208862,,5.405077,0.4689


In [16]:
df_all.drop(columns=['SpeedI1','SpeedI2', 'SpeedST', 'SpeedFL'], inplace=True)


In [17]:
df_all.dtypes

Time                  timedelta64[ns]
Driver                         object
DriverNumber                   object
LapTime               timedelta64[ns]
LapNumber                     float64
Stint                         float64
PitOutTime            timedelta64[ns]
PitInTime             timedelta64[ns]
Sector1Time           timedelta64[ns]
Sector2Time           timedelta64[ns]
Sector3Time           timedelta64[ns]
Sector1SessionTime    timedelta64[ns]
Sector2SessionTime    timedelta64[ns]
Sector3SessionTime    timedelta64[ns]
IsPersonalBest                 object
Compound                       object
TyreLife                      float64
FreshTyre                        bool
Team                           object
LapStartTime          timedelta64[ns]
LapStartDate           datetime64[ns]
TrackStatus                    object
Position                      float64
Deleted                          bool
DeletedReason                  object
FastF1Generated                  bool
IsAccurate  

In [18]:
df_all.drop(columns=['LapNumber','Stint','PitOutTime','PitInTime','Sector1Time',
                     'Sector2Time','Sector3Time','Sector1SessionTime','Sector2SessionTime',
                     'Sector3SessionTime','FreshTyre','TrackStatus','Deleted','DeletedReason',
                     'FastF1Generated','IsAccurate','LapStartTime','LapStartDate'],inplace=True)

In [19]:
df_all.drop(columns=['Time','DriverNumber','IsPersonalBest'],inplace=True)

In [20]:
df_all.dtypes

Driver                  object
LapTime        timedelta64[ns]
Compound                object
TyreLife               float64
Team                    object
Position               float64
Year                     int64
EventName               object
SessionType             object
dtype: object

In [21]:
df_all['LapTimeSeconds'] = df_all['LapTime'].dt.total_seconds()
df_all.drop(columns=['LapTime'], inplace=True)

In [22]:
driver_dummies = pd.get_dummies(df_all['Driver'], prefix='driver')
df_all = pd.concat([df_all, driver_dummies], axis=1)

In [23]:
team_dummies = pd.get_dummies(df_all['Team'], prefix='team')
df_all = pd.concat([df_all, team_dummies], axis=1)

In [24]:
compound_dummies = pd.get_dummies(df_all['Compound'], prefix='compound')
df_all = pd.concat([df_all, compound_dummies], axis=1)

In [25]:
sessiontype_dummies = pd.get_dummies(df_all['SessionType'], prefix='sessiontype')
df_all = pd.concat([df_all, sessiontype_dummies], axis=1)

In [26]:
df_all.dtypes

Driver                    object
Compound                  object
TyreLife                 float64
Team                      object
Position                 float64
Year                       int64
EventName                 object
SessionType               object
LapTimeSeconds           float64
driver_ALB                  bool
driver_ALO                  bool
driver_ANT                  bool
driver_BEA                  bool
driver_BOR                  bool
driver_BOT                  bool
driver_COL                  bool
driver_DOO                  bool
driver_GAS                  bool
driver_HAD                  bool
driver_HAM                  bool
driver_HUL                  bool
driver_LAW                  bool
driver_LEC                  bool
driver_MAG                  bool
driver_NOR                  bool
driver_OCO                  bool
driver_PER                  bool
driver_PIA                  bool
driver_RIC                  bool
driver_RUS                  bool
driver_SAI

In [27]:
df_all.drop(['Driver'],axis=1,inplace=True)
df_all.drop(['Compound'],axis=1,inplace=True)
df_all.drop(['SessionType'],axis=1,inplace=True)
df_all.drop(['Team'],axis=1,inplace=True)

In [28]:
from sklearn.preprocessing import LabelEncoder

In [29]:
le_event = LabelEncoder()
df_all['EventEncoded'] = le_event.fit_transform(df_all['EventName'])

In [30]:
df_all.drop(columns=['EventName'],axis=1, inplace=True)

In [31]:
df_all.dtypes

TyreLife                 float64
Position                 float64
Year                       int64
LapTimeSeconds           float64
driver_ALB                  bool
driver_ALO                  bool
driver_ANT                  bool
driver_BEA                  bool
driver_BOR                  bool
driver_BOT                  bool
driver_COL                  bool
driver_DOO                  bool
driver_GAS                  bool
driver_HAD                  bool
driver_HAM                  bool
driver_HUL                  bool
driver_LAW                  bool
driver_LEC                  bool
driver_MAG                  bool
driver_NOR                  bool
driver_OCO                  bool
driver_PER                  bool
driver_PIA                  bool
driver_RIC                  bool
driver_RUS                  bool
driver_SAI                  bool
driver_SAR                  bool
driver_STR                  bool
driver_TSU                  bool
driver_VER                  bool
driver_ZHO

In [32]:

bool_cols = df_all.select_dtypes(include='bool').columns
df_all[bool_cols] = df_all[bool_cols].astype(int)


In [33]:
df_all.head()

Unnamed: 0,TyreLife,Position,Year,LapTimeSeconds,driver_ALB,driver_ALO,driver_ANT,driver_BEA,driver_BOR,driver_BOT,...,compound_HARD,compound_INTERMEDIATE,compound_MEDIUM,compound_None,compound_SOFT,compound_WET,compound_nan,sessiontype_Q,sessiontype_R,EventEncoded
0,1.0,,2024,,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,28
1,2.0,,2024,90.724,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,28
2,3.0,,2024,,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,28
3,4.0,,2024,,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,28
4,5.0,,2024,90.002,0,0,0,0,0,0,...,0,0,0,0,1,0,0,1,0,28


In [34]:
df_all = df_all.dropna(subset=['Position'])

In [35]:
df_all = df_all.dropna(subset=['LapTimeSeconds'])

In [47]:
df_all = df_all.dropna(subset=['TyreLife'])

In [48]:
df_all.isna().sum()

TyreLife                 0
Position                 0
Year                     0
LapTimeSeconds           0
driver_ALB               0
driver_ALO               0
driver_ANT               0
driver_BEA               0
driver_BOR               0
driver_BOT               0
driver_COL               0
driver_DOO               0
driver_GAS               0
driver_HAD               0
driver_HAM               0
driver_HUL               0
driver_LAW               0
driver_LEC               0
driver_MAG               0
driver_NOR               0
driver_OCO               0
driver_PER               0
driver_PIA               0
driver_RIC               0
driver_RUS               0
driver_SAI               0
driver_SAR               0
driver_STR               0
driver_TSU               0
driver_VER               0
driver_ZHO               0
team_Alpine              0
team_Aston Martin        0
team_Ferrari             0
team_Haas F1 Team        0
team_Kick Sauber         0
team_McLaren             0
t

In [72]:
driver_cols = [col for col in df_all.columns if col.startswith('driver_')]
df_all['Driver'] = df_all[driver_cols].idxmax(axis=1)


In [73]:
def clean_race_laps(df):
    df = df[df['SessionType'] == 'R']
    df = df[df['LapTimeSeconds'] > 60] 
    return df.groupby(['Year', 'EventEncoded', 'Driver']).agg({
        'LapTimeSeconds': 'mean',
        'TyreLife': 'mean',
        'Compound': 'last',
        'Team': 'last'
    }).reset_index()

In [74]:
le = LabelEncoder()
df_final_pos['DriverEncoded'] = le.fit_transform(df_final_pos['Driver'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final_pos['DriverEncoded'] = le.fit_transform(df_final_pos['Driver'])


In [75]:
df_final_pos = df_all.groupby(['Year', 'EventEncoded', 'Driver']).tail(1)
X = df_final_pos.drop(columns=['Position','Driver'])
y = df_final_pos['Position']

In [76]:
df_final_pos['PositionClass'] = df_final_pos['Position'].apply(lambda x: 1 if x==1 else (2 if x==2 else (3 if x==3 else 4)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final_pos['PositionClass'] = df_final_pos['Position'].apply(lambda x: 1 if x==1 else (2 if x==2 else (3 if x==3 else 4)))


In [77]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score

In [78]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [79]:
model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

model.fit(X_train, y_train)

0,1,2
,loss,'log_loss'
,learning_rate,0.1
,n_estimators,100
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,3
,min_impurity_decrease,0.0


In [80]:
y_pred = model.predict(X_test)

In [81]:
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred, digits=3))
print("\n📉 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

✅ Accuracy: 0.20279720279720279

📊 Classification Report:
               precision    recall  f1-score   support

         1.0      0.364     0.571     0.444         7
         2.0      0.250     0.125     0.167         8
         3.0      0.125     0.125     0.125         8
         4.0      0.400     0.500     0.444         8
         5.0      0.200     0.250     0.222         8
         6.0      0.000     0.000     0.000         8
         7.0      0.333     0.250     0.286         8
         8.0      0.222     0.250     0.235         8
         9.0      0.286     0.250     0.267         8
        10.0      0.125     0.125     0.125         8
        11.0      0.059     0.125     0.080         8
        12.0      0.200     0.125     0.154         8
        13.0      0.333     0.125     0.182         8
        14.0      0.250     0.375     0.300         8
        15.0      0.200     0.143     0.167         7
        16.0      0.167     0.143     0.154         7
        17.0      0.20

In [82]:
import joblib

In [83]:
fastf1.Cache.enable_cache('cache_dir')

session = fastf1.get_session(2025, 'British Grand Prix', 'Q')
session.load()

laps = session.laps
drivers = laps['Driver'].unique()

quickest_laps = []
for drv in drivers:
    drv_laps = laps[laps['Driver'] == drv]
    drv_laps = drv_laps.dropna(subset=['LapTime']) 
    if not drv_laps.empty:
        fastest = drv_laps.loc[drv_laps['LapTime'].idxmin()]
        quickest_laps.append(fastest)

df = pd.DataFrame(quickest_laps)

df = df[['Driver', 'Team', 'Compound', 'TyreLife', 'LapTime']].copy()
df['LapTimeSeconds'] = df['LapTime'].dt.total_seconds()
df['SessionType'] = 1  
df['Year'] = 2025
df['EventName'] = session.event['EventName']

model = joblib.load('gbc_model.pkl')
event_encoder = joblib.load('label_encoder_eventname.pkl')
train_columns = joblib.load('train_columns.pkl')

df['EventEncoded'] = event_encoder.transform([df['EventName'].iloc[0]])[0]

df.drop(columns=['LapTime', 'EventName'], inplace=True)

df_encoded = pd.get_dummies(df, columns=['Driver', 'Team', 'Compound'], drop_first=False)

for col in train_columns:
    if col not in df_encoded.columns:
        df_encoded[col] = 0
df_encoded = df_encoded[train_columns]

y_pred = model.predict(df_encoded)
df['PredictedPosition'] = y_pred

podium = df.sort_values(by='PredictedPosition').head(3)
print("🏁 Predicted Podium for 2025 British Grand Prix:")
print(podium[['Driver', 'Team', 'PredictedPosition']])



core           INFO 	Loading data for British Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '81', '4', '63', '44', '16', '12', '87', '14', '10', '55', '22', '6', '23', '31', '30', '5', '18', '27', '43']


🏁 Predicted Podium for 2025 British Grand Prix:
    Driver             Team  PredictedPosition
34     PIA          McLaren                  3
16     VER  Red Bull Racing                  4
271    STR     Aston Martin                  4
