### Import Libraries

In [26]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error
from sklearn.ensemble import RandomForestRegressor

import warnings
warnings.filterwarnings('ignore')
import logging
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from sklearn.ensemble import RandomForestRegressor

# Suppress FastF1 logging to only show errors
#logging.getLogger("fastf1").setLevel(logging.ERROR)


### Load Data 

In [2]:
def get_race_data(year,location,event):
    session_2024 = fastf1.get_session(year, location, event)
    print(f"Getting {location}'s {year} data")
    session_2024.load()
    session_2024_df = session_2024.laps[["Driver","Team", "LapTime"]].copy()
    session_2024_df.dropna(inplace=True)
    # Convert times to seconds
    for col in ["LapTime"]:
        session_2024_df[f"{col}_(s)_{location}"] = session_2024_df[col].dt.total_seconds()
    session_2024_df = session_2024_df.groupby(["Driver","Team"])[f"LapTime_(s)_{location}"].mean().reset_index()
    return session_2024_df

### 2022 Data

In [3]:
Bah_2022_df = get_race_data(2022, "Bahrain", "R")
SA_2022_df = get_race_data(2022, "Saudi Arabia", "R")
Aus_2022_df = get_race_data(2022, "Australia", "R")
Jap_2022_df = get_race_data(2022, "Japan", "R")
Aze_2022_df = get_race_data(2022, "Azerbaijan", "R")
Mia_2022_df = get_race_data(2022, "Miami", "R")
Emi_2022_df = get_race_data(2022, "Emilia-Romagna", "R")

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Bahrain's 2022 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '55', '44', '63', '20', '77', '31', '22', '14', '24', '47', '18', '23', '3', '4', '6', '27', '11', '1', '10']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Saudi Arabia's 2022 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '55', '11', '63', '31', '4', '10', '20', '44', '24', '27', '18', '23', '77', '14', '3', '6', '22', '47']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_d

Getting Australia's 2022 data


req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '11', '63', '44', '4', '3', '31', '77', '10', '23', '24', '18', '47', '20', '22', '6', '14', '1', '5', '55']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data


Getting Japan's 2022 data


req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '31', '44', '5', '14', '63', '6', '4', '3', '18', '22', '20', '77', '24', '47', '10', '55', '23']
core           INFO 	Loading data for Azerbaijan Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req    

Getting Azerbaijan's 2022 data


req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '63', '44', '10', '5', '14', '3', '4', '31', '77', '23', '22', '47', '6', '18', '20', '24', '16', '55']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data


Getting Miami's 2022 data


req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '16', '55', '11', '63', '44', '77', '31', '23', '18', '14', '22', '3', '6', '47', '20', '5', '10', '4', '24']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_tim

Getting Emilia-Romagna's 2022 data


req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '4', '63', '77', '16', '22', '5', '20', '18', '23', '10', '44', '31', '24', '6', '47', '3', '14', '55']


In [4]:
merged_data_2022 = Bah_2022_df.merge(SA_2022_df, on= ['Driver','Team'],how='outer').merge(Aus_2022_df, on= ['Driver','Team'],how='outer')\
.merge(Jap_2022_df, on= ['Driver','Team'],how='outer').merge(Aze_2022_df, on= ['Driver','Team'],how='outer').merge(Mia_2022_df, on= ['Driver','Team'],how='outer')\
.merge(Emi_2022_df, on= ['Driver','Team'],how='outer')
merged_data_2022

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_Azerbaijan,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna
0,ALB,Williams,103.640632,102.143717,92.171207,,112.230959,98.999768,88.391049
1,ALO,Alpine,103.087263,98.455062,91.784,111.57175,111.64402,98.934732,124.153167
2,BOT,Alfa Romeo,102.977246,98.269281,90.960368,113.641667,111.455396,97.828582,88.431222
3,GAS,AlphaTauri,100.781023,100.232854,91.095351,114.168083,111.43078,97.572667,88.389016
4,HAM,Mercedes,102.864193,100.373896,90.18107,111.226708,111.07112,97.716218,89.4075
5,HUL,Aston Martin,103.814263,99.768277,,,,,
6,LAT,Williams,103.778579,98.876143,92.579491,112.907083,114.208694,98.210873,88.578164
7,LEC,Ferrari,100.697709,96.81834,88.37975,110.268083,112.38645,96.148093,88.635841
8,MAG,Haas F1 Team,102.953298,100.147688,91.368429,113.604167,112.623161,99.197436,88.940413
9,MSC,Haas F1 Team,103.26593,,91.402893,113.78825,113.076918,99.741464,88.876541


In [5]:
if pd.isna(merged_data_2022.at[19, 'LapTime_(s)_Bahrain']):
    merged_data_2022.at[19, 'LapTime_(s)_Bahrain'] = merged_data_2022.at[5, 'LapTime_(s)_Bahrain']

if pd.isna(merged_data_2022.at[19, 'LapTime_(s)_Saudi Arabia']):
    merged_data_2022.at[19, 'LapTime_(s)_Saudi Arabia'] = merged_data_2022.at[5, 'LapTime_(s)_Saudi Arabia']

merged_data_2022 = merged_data_2022.drop([5]).reset_index(drop=True)
merged_data_2022

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_Azerbaijan,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna
0,ALB,Williams,103.640632,102.143717,92.171207,,112.230959,98.999768,88.391049
1,ALO,Alpine,103.087263,98.455062,91.784,111.57175,111.64402,98.934732,124.153167
2,BOT,Alfa Romeo,102.977246,98.269281,90.960368,113.641667,111.455396,97.828582,88.431222
3,GAS,AlphaTauri,100.781023,100.232854,91.095351,114.168083,111.43078,97.572667,88.389016
4,HAM,Mercedes,102.864193,100.373896,90.18107,111.226708,111.07112,97.716218,89.4075
5,LAT,Williams,103.778579,98.876143,92.579491,112.907083,114.208694,98.210873,88.578164
6,LEC,Ferrari,100.697709,96.81834,88.37975,110.268083,112.38645,96.148093,88.635841
7,MAG,Haas F1 Team,102.953298,100.147688,91.368429,113.604167,112.623161,99.197436,88.940413
8,MSC,Haas F1 Team,103.26593,,91.402893,113.78825,113.076918,99.741464,88.876541
9,NOR,McLaren,103.682789,100.100188,90.651544,113.002667,112.519549,96.139538,88.29873


### 2024 Data

In [6]:
Bah_2024_df = get_race_data(2024, "Bahrain", "R")
SA_2024_df = get_race_data(2024, "Saudi Arabia", "R")
Aus_2024_df = get_race_data(2024, "Australia", "R")
Jap_2024_df = get_race_data(2024, "Japan", "R")
Chi_2024_df = get_race_data(2024, "China", "R")
Mia_2024_df = get_race_data(2024, "Miami", "R")
Emi_2024_df = get_race_data(2024, "Emilia-Romagna", "R")

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Bahrain's 2024 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Saudi Arabia's 2024 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '16', '81', '14', '63', '38', '4', '44', '27', '23', '20', '31', '2', '22', '3', '77', '24', '18', '10']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Australia's 2024 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Japan's 2024 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '4', '14', '63', '81', '44', '22', '27', '18', '20', '77', '31', '10', '2', '24', '3', '23']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting China's 2024 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '11', '16', '55', '63', '14', '81', '44', '27', '31', '23', '10', '24', '18', '20', '2', '3', '22', '77']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Miami's 2024 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '16', '11', '55', '44', '22', '63', '14', '31', '27', '10', '81', '24', '3', '77', '18', '23', '20', '2']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Emilia-Romagna's 2024 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '16', '81', '55', '44', '63', '11', '18', '22', '27', '20', '3', '31', '24', '10', '2', '77', '14', '23']


In [7]:
 merged_data_2024 = Bah_2024_df.merge(SA_2024_df, on= ['Driver','Team'],how='outer').merge(Aus_2024_df, on= ['Driver','Team'],how='outer')\
.merge(Jap_2024_df, on= ['Driver','Team'],how='outer').merge(Chi_2024_df, on= ['Driver','Team'],how='outer').merge(Mia_2024_df, on= ['Driver','Team'],how='outer')\
.merge(Emi_2024_df, on= ['Driver','Team'],how='outer')
merged_data_2024

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna
0,ALB,Williams,98.511214,96.972327,84.769333,,109.251071,96.946789,84.49362
1,ALO,Aston Martin,97.888228,94.55775,84.617845,98.639314,107.299648,96.26286,83.877032
2,BEA,Ferrari,,96.048245,,,,,
3,BOT,Kick Sauber,98.503745,97.688667,85.424018,101.448647,104.132632,96.530386,83.574113
4,GAS,Alpine,98.877839,121.453,85.312211,102.096196,109.317446,96.400596,83.46721
5,HAM,Mercedes,97.457298,94.955042,85.2284,98.922863,109.116786,95.902825,81.910413
6,HUL,Haas F1 Team,98.613821,95.764625,85.024069,100.18244,108.411909,96.327456,83.073048
7,LEC,Ferrari,97.270368,94.101208,83.262224,98.537588,106.020943,94.780893,81.478857
8,MAG,Haas F1 Team,98.447464,96.91802,84.753053,101.488706,108.536345,96.571211,83.09171
9,NOR,McLaren,97.424561,94.858021,83.323224,98.261392,107.444545,94.522071,81.364714


In [8]:
if pd.isna(merged_data_2024.at[15, 'LapTime_(s)_Bahrain']):
    merged_data_2024.at[15, 'LapTime_(s)_Bahrain'] = merged_data_2024.at[2, 'LapTime_(s)_Bahrain']

merged_data_2024 = merged_data_2024.drop([2]).reset_index(drop=True)
merged_data_2024

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna
0,ALB,Williams,98.511214,96.972327,84.769333,,109.251071,96.946789,84.49362
1,ALO,Aston Martin,97.888228,94.55775,84.617845,98.639314,107.299648,96.26286,83.877032
2,BOT,Kick Sauber,98.503745,97.688667,85.424018,101.448647,104.132632,96.530386,83.574113
3,GAS,Alpine,98.877839,121.453,85.312211,102.096196,109.317446,96.400596,83.46721
4,HAM,Mercedes,97.457298,94.955042,85.2284,98.922863,109.116786,95.902825,81.910413
5,HUL,Haas F1 Team,98.613821,95.764625,85.024069,100.18244,108.411909,96.327456,83.073048
6,LEC,Ferrari,97.270368,94.101208,83.262224,98.537588,106.020943,94.780893,81.478857
7,MAG,Haas F1 Team,98.447464,96.91802,84.753053,101.488706,108.536345,96.571211,83.09171
8,NOR,McLaren,97.424561,94.858021,83.323224,98.261392,107.444545,94.522071,81.364714
9,OCO,Alpine,98.860571,97.310542,85.628456,101.825667,109.202964,96.309158,83.373532


## Prepare Train Test Data

In [12]:
X_train = pd.get_dummies(
    merged_data_2022.drop(['Driver', 'LapTime_(s)_Emilia-Romagna'], axis=1).fillna(999),
    dtype=int
).copy().values

y_train = merged_data_2022[['LapTime_(s)_Emilia-Romagna']].fillna(999).values

X_test = pd.get_dummies(
    merged_data_2024.drop(['Driver', 'LapTime_(s)_Emilia-Romagna'], axis=1).fillna(999),
    dtype=int
).copy().values

y_test = merged_data_2024[['LapTime_(s)_Emilia-Romagna']].fillna(999).values

### Modelling Data Set 

In [24]:
model_result = {}

In [22]:
# Linear Regression

# Fit the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on new data
Y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, Y_pred)
print("MSE:", mse)

model_result['LinearRegression'] = mse
model_result

MSE: 136028446.5152621


In [27]:
# Random Foreacst
rf = RandomForestRegressor(random_state=42,n_estimators=1000)

# Fit the model
rf.fit(X_train, y_train)

# Predict on new data
Y_pred = rf.predict(X_test)
mse = mean_squared_error(y_test, Y_pred)
print("MSE:", mse)

model_result['Random Forest'] = mse
model_result

MSE: 19300.730130072137


In [31]:
# Gradient Boost

model = GradientBoostingRegressor(n_estimators=20000, learning_rate=0.01, random_state=38)
model.fit(X_train, y_train)

Y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, Y_pred)
print("MSE:", mse)

model_result['Gradient Boost'] = mse
model_result

MSE: 42004.30346567337


{'LinearRegression': 136028446.5152621,
 'Random Forest': 19300.730130072137,
 'Gradient Boost': 42004.30346567337}

In [50]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and fit SVR
svr = SVR(kernel='rbf')  # or try 'linear', 'poly'
svr.fit(X_train, y_train)

# Predict
Y_pred = svr.predict(X_test)


mse = mean_squared_error(y_test, Y_pred)
print("MSE:", mse)

model_result['SVR'] = mse
model_result

MSE: 0.6846240006813783


{'LinearRegression': 136028446.5152621,
 'Random Forest': 19300.730130072137,
 'Gradient Boost': 0.10633048777882441,
 'SVR': 0.6846240006813783}

### 2025 Data

In [33]:
Bah_2025_df = get_race_data(2025, "Bahrain", "R")
SA_2025_df = get_race_data(2025, "Saudi Arabia", "R")
Aus_2025_df = get_race_data(2025, "Australia", "R")
Jap_2025_df = get_race_data(2025, "Japan", "R")
Chi_2025_df = get_race_data(2025, "China", "R")
Mia_2025_df = get_race_data(2025, "Miami", "R")

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Bahrain's 2025 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '16', '44', '1', '10', '31', '22', '87', '12', '23', '6', '7', '14', '30', '18', '5', '55', '27']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Saudi Arabia's 2025 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '1', '16', '4', '63', '12', '44', '55', '23', '6', '14', '30', '87', '31', '27', '18', '7', '5', '22', '10']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Australia's 2025 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '63', '12', '23', '18', '27', '16', '81', '44', '10', '22', '31', '87', '30', '5', '14', '55', '7', '6']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Japan's 2025 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '63', '12', '44', '6', '23', '87', '14', '22', '10', '55', '7', '27', '30', '31', '5', '18']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting China's 2025 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '63', '1', '31', '12', '23', '87', '18', '55', '6', '30', '7', '5', '27', '22', '14', '16', '44', '10']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Getting Miami's 2025 data


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '63', '1', '23', '12', '16', '44', '55', '22', '6', '31', '10', '27', '14', '18', '30', '5', '87', '7']


In [34]:
merged_data_new = Bah_2025_df.merge(SA_2025_df, on= ['Driver','Team'],how='outer').merge(Aus_2025_df, on= ['Driver','Team'],how='outer')\
.merge(Jap_2025_df, on= ['Driver' ,'Team'],how='outer').merge(Chi_2025_df, on= ['Driver','Team'],how='outer').merge(Mia_2025_df, on= ['Driver','Team'],how='outer')
merged_data_new

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami
0,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895
1,ALO,Aston Martin,100.858357,96.135208,98.517379,94.043698,111.3935,95.581536
2,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333
3,BEA,Haas F1 Team,101.526825,96.305375,103.870075,93.990792,98.505875,95.954556
4,BOR,Kick Sauber,100.971571,97.150894,102.655634,94.544906,99.377855,98.1153
5,DOO,Alpine,101.618263,97.136213,,94.49617,98.811196,
6,GAS,Alpine,101.323456,,104.889167,94.134057,98.611089,95.123368
7,HAD,Racing Bulls,101.679807,96.001021,,93.662585,98.819661,94.845421
8,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509
9,HUL,Kick Sauber,101.629947,96.809021,104.740167,94.508302,99.550036,95.227536


In [37]:
if pd.isna(merged_data_new.at[10, 'LapTime_(s)_Australia']):
    merged_data_new.at[10, 'LapTime_(s)_Australia'] = merged_data_new.at[19, 'LapTime_(s)_Australia']

if pd.isna(merged_data_new.at[10, 'LapTime_(s)_China']):
    merged_data_new.at[10, 'LapTime_(s)_China'] = merged_data_new.at[19, 'LapTime_(s)_China']

if pd.isna(merged_data_new.at[20, 'LapTime_(s)_Australia']):
    merged_data_new.at[20, 'LapTime_(s)_Australia'] = merged_data_new.at[11, 'LapTime_(s)_Australia']

if pd.isna(merged_data_new.at[20, 'LapTime_(s)_China']):
    merged_data_new.at[20, 'LapTime_(s)_China'] = merged_data_new.at[11, 'LapTime_(s)_China']

merged_data_new = merged_data_new.drop([11,19]).reset_index(drop=True)

merged_data_new

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami
0,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895
1,ALO,Aston Martin,100.858357,96.135208,98.517379,94.043698,111.3935,95.581536
2,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333
3,BEA,Haas F1 Team,101.526825,96.305375,103.870075,93.990792,98.505875,95.954556
4,BOR,Kick Sauber,100.971571,97.150894,102.655634,94.544906,99.377855,98.1153
5,DOO,Alpine,101.618263,97.136213,,94.49617,98.811196,
6,GAS,Alpine,101.323456,,104.889167,94.134057,98.611089,95.123368
7,HAD,Racing Bulls,101.679807,96.001021,,93.662585,98.819661,94.845421
8,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509
9,HUL,Kick Sauber,101.629947,96.809021,104.740167,94.508302,99.550036,95.227536


### Prepare for final data

In [43]:
X_train = pd.get_dummies(
    merged_data_2024.drop(['Driver', 'LapTime_(s)_Emilia-Romagna'], axis=1).fillna(999),
    dtype=int
).copy()

y_train = merged_data_2024[['LapTime_(s)_Emilia-Romagna']].fillna(999).values

X_final = pd.get_dummies(
    merged_data_new.drop(['Driver'], axis=1).fillna(999),
    dtype=int
).copy().values

In [73]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

# Initialize the model
rf = RandomForestRegressor(random_state=42)

# Define hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 400, 800, 1000, 1500, 2000],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 7],
    'min_samples_leaf': [1, 2, 3],
    'max_features': ['auto', 'sqrt']
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,                      # 5-fold cross-validation
    scoring='neg_mean_squared_error',
    n_jobs=-1,                 # Use all cores
    verbose=2
)

# Fit to training data
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)

# Use the best model
best_rf = grid_search.best_estimator_

# Predict and evaluate
Y_pred = best_rf.predict(X_test)


Fitting 5 folds for each of 504 candidates, totalling 2520 fits
Best Parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}


NameError: name 'y_pred' is not defined

In [75]:
mse = mean_squared_error(y_test, Y_pred)
print("Test MSE:", mse)

Test MSE: 0.14803352550751941


In [76]:
# Predict and evaluate
Y_pred = best_rf.predict(X_final)

In [68]:
# Random Foreacst
rf = RandomForestRegressor(random_state=42,n_estimators=1000)

# Fit the model
rf.fit(X_train, y_train)

# Predict on new data
Y_pred = rf.predict(X_final)

In [77]:
merged_data_new["PredictedRaceTime (s) LR"] = Y_pred
merged_data_new

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,PredictedRaceTime (s) LR
0,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895,82.535612
1,ALO,Aston Martin,100.858357,96.135208,98.517379,94.043698,111.3935,95.581536,82.800467
2,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333,82.388554
3,BEA,Haas F1 Team,101.526825,96.305375,103.870075,93.990792,98.505875,95.954556,82.687648
4,BOR,Kick Sauber,100.971571,97.150894,102.655634,94.544906,99.377855,98.1153,82.96731
5,DOO,Alpine,101.618263,97.136213,,94.49617,98.811196,,82.970588
6,GAS,Alpine,101.323456,,104.889167,94.134057,98.611089,95.123368,82.600943
7,HAD,Racing Bulls,101.679807,96.001021,,93.662585,98.819661,94.845421,82.570931
8,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509,82.392454
9,HUL,Kick Sauber,101.629947,96.809021,104.740167,94.508302,99.550036,95.227536,82.610629


In [78]:
# Rank drivers by predicted race time
final_result_Imola = merged_data_new.sort_values(by="PredictedRaceTime (s) LR").reset_index(drop=True)
final_result_Imola

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,PredictedRaceTime (s) LR
0,PIA,McLaren,99.524375,94.322646,102.084462,93.002113,97.411179,93.536614,82.293621
1,VER,Red Bull Racing,101.295263,94.357229,103.341151,92.961943,97.708607,94.237596,82.341584
2,NOR,McLaren,99.984,94.714229,103.428302,92.988792,97.58525,93.617842,82.383339
3,LEC,Ferrari,100.018643,94.547104,103.933528,93.26566,97.825661,94.537246,82.387497
4,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333,82.388554
5,RUS,Mercedes,100.023418,94.932833,103.68634,93.289528,97.609339,94.197035,82.388554
6,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509,82.392454
7,SAI,Williams,102.382955,95.821917,,94.360604,98.775232,94.599368,82.535612
8,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895,82.535612
9,HAD,Racing Bulls,101.679807,96.001021,,93.662585,98.819661,94.845421,82.570931


### Constructor Points 

In [82]:
def calculate_constructor_points(df):
    # F1 point system for top 10 positions (optional)
    f1_points = [25, 18, 15, 12, 10, 8, 6, 4, 2, 1]

    # Get positions
    try:
        sai_pos = df[df['Driver'] == 'SAI'].index[0]
        alb_pos = df[df['Driver'] == 'ALB'].index[0] 
    except IndexError:
        return "One or both drivers not found in the DataFrame."

    # Calculate points based on F1 system (0 points if position > 9)
    sai_points = f1_points[sai_pos] if sai_pos < len(f1_points) else 0
    alb_points = f1_points[alb_pos] if alb_pos < len(f1_points) else 0

    # Total constructor points
    total_points = sai_points + alb_points

    print(f"Carlos's position: {sai_pos + 1}, points: {sai_points}")
    print(f"Alex's position: {alb_pos + 1}, points: {alb_points}")
    print(f"Total constructor points: {total_points}")

    return total_points

cp = calculate_constructor_points(final_result_Imola)

Carlos's position: 8, points: 4
Alex's position: 9, points: 2
Total constructor points: 6
