In [7]:
import fastf1
import numpy as np
import pandas as pd
import sklearn
from tqdm import tqdm

In [8]:
def get_session_data(year, session_type, event_list=None):
    all_data = []

    if event_list is None:
        schedule = fastf1.get_event_schedule(year)
        event_list = schedule['EventName'].tolist()

    for gp_name in tqdm(event_list, desc=f"{year} {session_type}"):
        try:
            session = fastf1.get_session(year, gp_name, session_type)
            session.load()

            laps = session.laps
            if laps.empty:
                continue

            laps['Year'] = year
            laps['EventName'] = gp_name
            laps['SessionType'] = session_type
            all_data.append(laps)

        except Exception as e:
            print(f"Skipped {year} {gp_name} {session_type}: {e}")
            continue

    return pd.concat(all_data, ignore_index=True) if all_data else pd.DataFrame()

In [9]:
df_q_2024 = get_session_data(2024, 'Q')
df_r_2024 = get_session_data(2024, 'R')

core           INFO 	Loading data for Singapore Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '44', '63', '81', '27', '14', '22', '16', '55', '23', '43', '11', '20', '31', '3', '18', '10', '77', '24']
2024 Q:   4%|█▍                                  | 1/25 [00:00<00:18,  1.31it/s]core           INFO 	Loadi

In [10]:
completed_2025_races = [
    'Bahrain', 'Saudi Arabia', 'Australia', 'Japan','China', 'Miami', 'Emilia Romagna', 'Monaco',
    'Canada', 'Spain', 'Austria', 'Great Britain','Belgium','Hungary'
]

In [12]:
df_q_2025 = get_session_data(2025, 'Q', completed_2025_races)
df_r_2025 = get_session_data(2025, 'R', completed_2025_races)

2025 Q:   0%|                                            | 0/14 [00:00<?, ?it/s]core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '16', '12', '10', '4', '1', '55', '44', '22', '7', '6', '14', '31', '23', '27', '30', '5', '18', '87']
2025 Q:   7%|██▌              

In [13]:
df = pd.concat([df_q_2024, df_r_2024, df_q_2025, df_r_2025], ignore_index=True)

In [14]:
df.to_csv('formula1.csv', index=False)

In [15]:
df_all = df.copy()

In [16]:
df.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,...,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,Year,EventName,SessionType
0,0 days 00:17:36.482000,NOR,4,NaT,1.0,1.0,0 days 00:15:24.715000,NaT,NaT,0 days 00:00:51.613000,...,2024-09-21 13:01:38.552,1,,False,,False,False,2024,Pre-Season Testing,Q
1,0 days 00:19:07.206000,NOR,4,0 days 00:01:30.724000,2.0,1.0,NaT,NaT,0 days 00:00:26.863000,0 days 00:00:38.252000,...,2024-09-21 13:03:50.319,1,,False,,False,True,2024,Pre-Season Testing,Q
2,0 days 00:21:48.149000,NOR,4,NaT,3.0,1.0,NaT,0 days 00:21:12.899000,0 days 00:00:36.760000,0 days 00:00:53.201000,...,2024-09-21 13:05:21.043,1,,False,,False,False,2024,Pre-Season Testing,Q
3,0 days 00:26:55.559000,NOR,4,NaT,4.0,2.0,0 days 00:24:57.106000,NaT,NaT,0 days 00:00:50.524000,...,2024-09-21 13:08:01.986,1,,False,,False,False,2024,Pre-Season Testing,Q
4,0 days 00:28:25.561000,NOR,4,0 days 00:01:30.002000,5.0,2.0,NaT,NaT,0 days 00:00:26.735000,0 days 00:00:37.824000,...,2024-09-21 13:13:09.396,1,,False,,False,True,2024,Pre-Season Testing,Q


In [17]:
df.drop(columns=['SpeedI1','SpeedI2', 'SpeedST', 'SpeedFL','Time','DriverNumber','LapNumber','PitOutTime',
                     'PitInTime','PitOutTime','Sector1Time','Sector2Time','Sector3Time','Sector1SessionTime',
                     'Sector2SessionTime','Sector3SessionTime','IsPersonalBest','TyreLife','FreshTyre',
                      'LapStartTime','LapStartDate','TrackStatus','Deleted','DeletedReason','FastF1Generated',
                     'IsAccurate'], inplace=True)

In [18]:
df.dtypes

Driver                  object
LapTime        timedelta64[ns]
Stint                  float64
Compound                object
Team                    object
Position               float64
Year                     int64
EventName               object
SessionType             object
dtype: object

In [19]:
df = df[~df['Driver'].isin(['MAG', 'PER','RIC','SAR'])]

In [20]:
df = df[df['EventName'] != 'Pre-Season Testing']

In [21]:
df = df[~df['Compound'].isin(['None', 'nan'])]

In [22]:
mapping = {
    'Australia': 'Australian Grand Prix',
    'Austria': 'Austrian Grand Prix',
    'Bahrain': 'Bahrain Grand Prix',
    'Monaco': 'Monaco Grand Prix',
    'Japan': 'Japanese Grand Prix',
    'Canada': 'Canadian Grand Prix',
    'Spain': 'Spanish Grand Prix',
    'France': 'French Grand Prix',
    'Saudi Arabia': 'Saudi Arabian Grand Prix',
    'China':'Chinese Grand Prix',
    'Miami': 'Miami Grand Prix',
    'Emilia Romagna':'Emilia Romagna Grand Prix',
    'Great Britain': 'British Grand Prix',
    'Belgium': 'Belgian Grand Prix',
    'Hungary':'Hungarian Grand Prix'
}

df['EventName'] = df['EventName'].replace(mapping)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['EventName'] = df['EventName'].replace(mapping)


In [23]:
df['LapTimeSeconds'] = df['LapTime'].dt.total_seconds()
df.drop(columns=['LapTime'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['LapTimeSeconds'] = df['LapTime'].dt.total_seconds()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['LapTime'], inplace=True)


In [24]:
df['Lap_Time'] = df.groupby(['EventName', 'Year', 'Driver'])['LapTimeSeconds'] .transform(lambda x: x.fillna(x.mean()))

In [25]:
fastest = (
    df[df['SessionType'] == 'Q']
    .groupby(['EventName', 'Year', 'Driver'], as_index=False)['Lap_Time']
    .min()
)

In [26]:
fastest['QualifyingTime']= fastest.groupby(['EventName', 'Year'])['Lap_Time'].transform('min')
fastest['StartPosition'] = fastest.groupby(['EventName', 'Year'])['Lap_Time'].rank(method='min')

In [27]:
df = df.merge(
    fastest[['EventName', 'Year', 'Driver', 'StartPosition','QualifyingTime']],
    on=['EventName', 'Year', 'Driver'],
    how='left'
)

In [28]:
df.loc[df['SessionType'] != 'R', 'StartPosition'] = None
df.loc[df['SessionType'] != 'R', 'QualifyingTime'] = None

In [29]:
df.drop(columns=['LapTimeSeconds'], inplace=True)

In [30]:
df.dtypes

Driver             object
Stint             float64
Compound           object
Team               object
Position          float64
Year                int64
EventName          object
SessionType        object
Lap_Time          float64
StartPosition     float64
QualifyingTime    float64
dtype: object

In [31]:
df.head(-50)

Unnamed: 0,Driver,Stint,Compound,Team,Position,Year,EventName,SessionType,Lap_Time,StartPosition,QualifyingTime
0,VER,1.0,SOFT,Red Bull Racing,,2024,Bahrain Grand Prix,Q,97.523176,,
1,VER,1.0,SOFT,Red Bull Racing,,2024,Bahrain Grand Prix,Q,90.031000,,
2,VER,1.0,SOFT,Red Bull Racing,,2024,Bahrain Grand Prix,Q,128.726000,,
3,VER,2.0,SOFT,Red Bull Racing,,2024,Bahrain Grand Prix,Q,97.523176,,
4,VER,2.0,SOFT,Red Bull Racing,,2024,Bahrain Grand Prix,Q,90.160000,,
...,...,...,...,...,...,...,...,...,...,...,...
48369,GAS,2.0,MEDIUM,Alpine,16.0,2025,Hungarian Grand Prix,R,84.516000,17.0,74.89
48370,GAS,2.0,MEDIUM,Alpine,17.0,2025,Hungarian Grand Prix,R,84.101000,17.0,74.89
48371,GAS,2.0,MEDIUM,Alpine,17.0,2025,Hungarian Grand Prix,R,82.529000,17.0,74.89
48372,GAS,2.0,MEDIUM,Alpine,17.0,2025,Hungarian Grand Prix,R,82.675000,17.0,74.89


In [32]:
df = df[df['SessionType'] == 'Q'] 

In [33]:
df['QualifyingTime'] = df.groupby(['EventName', 'Year', 'Driver'])['Lap_Time'].transform('min')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['QualifyingTime'] = df.groupby(['EventName', 'Year', 'Driver'])['Lap_Time'].transform('min')


In [34]:
df = df[df['Lap_Time'] == df['QualifyingTime']]

In [35]:
df = df.drop(columns=['Lap_Time'])

In [36]:
df['StartPosition'] = df.groupby(['EventName', 'Year'])['QualifyingTime'].rank(method='first', ascending=True).astype(int)

In [37]:
df = df_all[df_all['SessionType'] == 'Q'].copy()

df['LapTimeSeconds'] = df['LapTime'].dt.total_seconds()
df.drop(columns=['LapTime'], inplace=True)

df['Lap_Time'] = df.groupby(['EventName', 'Year', 'Driver'])['LapTimeSeconds'] .transform(lambda x: x.fillna(x.mean()))

df['QualifyingTime'] = df.groupby(['EventName','Year','Driver'])['Lap_Time'].transform('min')
df = df[df['Lap_Time'] == df['QualifyingTime']]
df = df.drop(columns=['Lap_Time'])

df['StartPosition'] = df.groupby(['EventName','Year'])['QualifyingTime'].rank(method='first', ascending=True).astype(int)

In [39]:
race_results = df_all[df_all['SessionType'] == 'R'].groupby(['EventName','Year','Driver'])['Position'].last().reset_index()

In [40]:
df = df.merge(race_results, on=['EventName','Year','Driver'], how='left').rename(columns={'Position':'RacePosition'})

In [41]:
df.head()

Unnamed: 0,Time,Driver,DriverNumber,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,...,DeletedReason,FastF1Generated,IsAccurate,Year,EventName,SessionType,LapTimeSeconds,QualifyingTime,StartPosition,Position_y
0,0 days 01:28:47.747000,NOR,4,15.0,6.0,NaT,NaT,0 days 00:00:26.599000,0 days 00:00:37.630000,0 days 00:00:25.296000,...,,False,True,2024,Pre-Season Testing,Q,89.525,89.525,1,1.0
1,0 days 00:50:26.283000,VER,1,11.0,4.0,NaT,NaT,0 days 00:00:26.615000,0 days 00:00:37.663000,0 days 00:00:25.402000,...,,False,True,2024,Pre-Season Testing,Q,89.68,89.68,3,2.0
2,0 days 01:29:42.301000,HAM,44,15.0,6.0,NaT,NaT,0 days 00:00:26.626000,0 days 00:00:37.987000,0 days 00:00:25.228000,...,,False,True,2024,Pre-Season Testing,Q,89.841,89.841,5,6.0
3,0 days 01:29:58.242000,RUS,63,16.0,6.0,NaT,NaT,0 days 00:00:26.727000,0 days 00:00:37.913000,0 days 00:00:25.227000,...,,False,True,2024,Pre-Season Testing,Q,89.867,89.867,6,4.0
4,0 days 00:52:45.676000,PIA,81,11.0,4.0,NaT,NaT,0 days 00:00:26.603000,0 days 00:00:37.753000,0 days 00:00:25.284000,...,,False,True,2024,Pre-Season Testing,Q,89.64,89.64,2,3.0


In [42]:
df.drop(columns=['SpeedI1','SpeedI2', 'SpeedST', 'SpeedFL','Time','DriverNumber','LapNumber','PitOutTime',
                     'PitInTime','PitOutTime','Sector1Time','Sector2Time','Sector3Time','Sector1SessionTime',
                     'Sector2SessionTime','Sector3SessionTime','IsPersonalBest','TyreLife','FreshTyre',
                      'LapStartTime','LapStartDate','TrackStatus','Deleted','DeletedReason','FastF1Generated',
                     'IsAccurate'], inplace=True)

In [43]:
df = df[~df['Driver'].isin(['MAG', 'PER','RIC','SAR'])]

In [44]:
df = df[df['EventName'] != 'Pre-Season Testing']

In [45]:
df = df[~df['Compound'].isin(['None', 'nan'])]

In [46]:
mapping = {
    'Australia': 'Australian Grand Prix',
    'Austria': 'Austrian Grand Prix',
    'Bahrain': 'Bahrain Grand Prix',
    'Monaco': 'Monaco Grand Prix',
    'Japan': 'Japanese Grand Prix',
    'Canada': 'Canadian Grand Prix',
    'Spain': 'Spanish Grand Prix',
    'France': 'French Grand Prix',
    'Saudi Arabia': 'Saudi Arabian Grand Prix',
    'China':'Chinese Grand Prix',
    'Miami': 'Miami Grand Prix',
    'Emilia Romagna':'Emilia Romagna Grand Prix',
    'Great Britain': 'British Grand Prix',
    'Belgium': 'Belgian Grand Prix',
    'Hungary':'Hungarian Grand Prix'
}

df['EventName'] = df['EventName'].replace(mapping)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['EventName'] = df['EventName'].replace(mapping)


In [47]:
df.head(-50)

Unnamed: 0,Driver,Stint,Compound,Team,Position_x,Year,EventName,SessionType,LapTimeSeconds,QualifyingTime,StartPosition,Position_y
20,VER,6.0,SOFT,Red Bull Racing,,2024,Bahrain Grand Prix,Q,89.179,89.179,2,1.0
21,LEC,5.0,SOFT,Ferrari,,2024,Bahrain Grand Prix,Q,89.165,89.165,1,4.0
22,RUS,6.0,SOFT,Mercedes,,2024,Bahrain Grand Prix,Q,89.485,89.485,3,5.0
23,SAI,6.0,SOFT,Ferrari,,2024,Bahrain Grand Prix,Q,89.507,89.507,4,3.0
25,ALO,5.0,SOFT,Aston Martin,,2024,Bahrain Grand Prix,Q,89.542,89.542,6,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...
724,RUS,6.0,SOFT,Mercedes,,2025,British Grand Prix,Q,64.763,64.763,5,5.0
725,LAW,6.0,SOFT,Racing Bulls,,2025,British Grand Prix,Q,64.926,64.926,9,6.0
726,VER,4.0,SOFT,Red Bull Racing,,2025,British Grand Prix,Q,64.836,64.836,6,
727,BOR,5.0,SOFT,Kick Sauber,,2025,British Grand Prix,Q,64.846,64.846,7,8.0


In [48]:
df = df.drop(columns=['SessionType'])

In [49]:
df = df.drop(columns=['LapTimeSeconds'])

In [50]:
df = df.drop(columns=['Position_x'])

In [51]:
df.dropna(subset=['Position_y'])

Unnamed: 0,Driver,Stint,Compound,Team,Year,EventName,QualifyingTime,StartPosition,Position_y
20,VER,6.0,SOFT,Red Bull Racing,2024,Bahrain Grand Prix,89.179,2,1.0
21,LEC,5.0,SOFT,Ferrari,2024,Bahrain Grand Prix,89.165,1,4.0
22,RUS,6.0,SOFT,Mercedes,2024,Bahrain Grand Prix,89.485,3,5.0
23,SAI,6.0,SOFT,Ferrari,2024,Bahrain Grand Prix,89.507,4,3.0
25,ALO,5.0,SOFT,Aston Martin,2024,Bahrain Grand Prix,89.542,6,9.0
...,...,...,...,...,...,...,...,...,...
776,TSU,2.0,SOFT,Red Bull Racing,2025,Hungarian Grand Prix,75.899,16,18.0
777,GAS,2.0,SOFT,Alpine,2025,Hungarian Grand Prix,75.966,17,17.0
778,OCO,2.0,SOFT,Haas F1 Team,2025,Hungarian Grand Prix,76.023,18,16.0
779,HUL,2.0,SOFT,Kick Sauber,2025,Hungarian Grand Prix,76.081,19,13.0


In [52]:
df.rename(columns={'Position_y': 'Position'}, inplace=True)

In [53]:
df.head()

Unnamed: 0,Driver,Stint,Compound,Team,Year,EventName,QualifyingTime,StartPosition,Position
20,VER,6.0,SOFT,Red Bull Racing,2024,Bahrain Grand Prix,89.179,2,1.0
21,LEC,5.0,SOFT,Ferrari,2024,Bahrain Grand Prix,89.165,1,4.0
22,RUS,6.0,SOFT,Mercedes,2024,Bahrain Grand Prix,89.485,3,5.0
23,SAI,6.0,SOFT,Ferrari,2024,Bahrain Grand Prix,89.507,4,3.0
25,ALO,5.0,SOFT,Aston Martin,2024,Bahrain Grand Prix,89.542,6,9.0


In [54]:
from sklearn.preprocessing import LabelEncoder

In [55]:
le_driver = LabelEncoder()
df['DriverEncoded'] = le_driver.fit_transform(df['Driver'])

In [56]:
le_compound = LabelEncoder()
df['CompoundEncoded'] = le_compound.fit_transform(df['Compound'])

In [57]:
le_team = LabelEncoder()
df['TeamEncoded'] = le_team.fit_transform(df['Team'])

In [58]:
le_event = LabelEncoder()
df['EventEncoded'] = le_event.fit_transform(df['EventName'])

In [59]:
df.drop(columns=['EventName'],axis=1, inplace=True)
df.drop(columns=['Driver'],axis=1, inplace=True)
df.drop(columns=['Compound'],axis=1, inplace=True)
df.drop(columns=['Team'],axis=1, inplace=True)

In [60]:
df.dtypes

Stint              float64
Year                 int64
QualifyingTime     float64
StartPosition        int64
Position           float64
DriverEncoded        int64
CompoundEncoded      int64
TeamEncoded          int64
EventEncoded         int64
dtype: object

In [61]:
df['Position'].isna().sum()

np.int64(21)

In [62]:
df = df.dropna(subset=["Position"])

In [63]:
df.to_csv("cleaned_formula1.csv", index=False)

In [64]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [65]:
X = df.drop(columns=['Position'])

In [66]:
y = df['Position']

In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [68]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X, y)


0,1,2
,n_estimators,100
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [69]:
y_pred_model = model.predict(X_test)

In [71]:
print("Random Forest Results:")
print("MSE:", mean_squared_error(y_test, y_pred_model))
print("R²:", r2_score(y_test, y_pred_model))

Random Forest Results:
MSE: 1.358794696969697
R²: 0.9535366372619246


In [72]:
import pickle

In [80]:
with open("formula1_model.pkl", "rb") as f:model = pickle.load(f)

In [81]:
driver_enc   = LabelEncoder()
team_enc     = LabelEncoder()
compound_enc = LabelEncoder()
event_enc    = LabelEncoder()

In [82]:
train_df = pd.read_csv("formula1.csv")
driver_enc.fit(train_df["Driver"])
team_enc.fit(train_df["Team"])
compound_enc.fit(train_df["Compound"])
event_enc.fit(train_df["EventName"])

In [83]:
df_ref = pd.read_csv("formula1.csv")

In [84]:
fastf1.Cache.enable_cache("f1_cache")
session = fastf1.get_session(2025, "Canadian Grand Prix", "Q")
session.load()
res = session.results.copy()

core           INFO 	Loading data for Canadian Grand Prix - Qualifying [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['63', '1', '81', '12', '44', '14', '4', '16', '6', '23', '22', '43', '27', '87', '31', '5', '55', '18', '30', '10']


In [85]:
laps = session.laps

In [86]:
idx_fast = laps.groupby("Driver")["LapTime"].idxmin()

In [87]:
fast_laps = laps.loc[idx_fast, ["Driver","LapTime","Compound","Stint"]].copy()
fast_laps["QualifyingTime"] = fast_laps["LapTime"].dt.total_seconds()

In [90]:
quali = pd.DataFrame({"Driver": res["Abbreviation"],"Team": res["TeamName"],}).merge(fast_laps
    [["Driver","QualifyingTime","Compound","Stint"]], on="Driver", how="left")

In [91]:
quali["StartPosition"] = quali["QualifyingTime"].rank(method="first").astype(int)

In [92]:
quali["Year"] = 2025
quali["EventName"] = "Canadian Grand Prix"

In [93]:
quali["DriverEncoded"]   = driver_enc.transform(quali["Driver"])
quali["TeamEncoded"]     = team_enc.transform(quali["Team"])
quali["CompoundEncoded"] = compound_enc.transform(quali["Compound"])
quali["EventEncoded"]    = event_enc.transform(quali["EventName"])

In [94]:
with open("formula1_model.pkl", "rb") as f:model = pickle.load(f)

In [95]:
feature_cols = ["Stint","Year","QualifyingTime","StartPosition", "DriverEncoded","CompoundEncoded","TeamEncoded","EventEncoded"]

In [96]:
X_pred = quali[feature_cols]
quali["PredictedPosition"] = model.predict(X_pred)

In [99]:
quali["Podium"] = quali["PredictedPosition"].rank(method="first").astype(int)
podium = quali.nsmallest(3, "Podium")

In [103]:
print('🏁Canadian Grand Prix:')
print(podium[["Driver","Team","StartPosition","Podium"]])

🏁Canadian Grand Prix:
  Driver             Team  StartPosition  Podium
1    VER  Red Bull Racing              2       1
0    RUS         Mercedes              1       2
2    PIA          McLaren              3       3
