<a href="https://colab.research.google.com/github/mzainxo/F1WinnerPredictions/blob/main/ChineseGP_F1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
pip install fastf1 pandas numpy scikit-learn



In [23]:
# Necessary imports
import os
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [24]:
# Define cache directory
cache_dir = "/content/f1_cache2"

In [25]:
# Create the directory
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

In [26]:
# Enable FastF1 caching
fastf1.Cache.enable_cache(cache_dir)
print("Cache enabled at:", cache_dir)

Cache enabled at: /content/f1_cache2


In [27]:
#Load Fast F1 2024 Chinese GP race session
session_2024 = fastf1.get_session(2024, "China", 'R')
session_2024.load()

core           INFO 	Loading data for Chinese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Chinese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core         

In [28]:
session_2024.laps.info()

<class 'fastf1.core.Laps'>
RangeIndex: 1032 entries, 0 to 1031
Data columns (total 31 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                1032 non-null   timedelta64[ns]
 1   Driver              1032 non-null   object         
 2   DriverNumber        1032 non-null   object         
 3   LapTime             1007 non-null   timedelta64[ns]
 4   LapNumber           1032 non-null   float64        
 5   Stint               1032 non-null   float64        
 6   PitOutTime          41 non-null     timedelta64[ns]
 7   PitInTime           41 non-null     timedelta64[ns]
 8   Sector1Time         1009 non-null   timedelta64[ns]
 9   Sector2Time         1030 non-null   timedelta64[ns]
 10  Sector3Time         1030 non-null   timedelta64[ns]
 11  Sector1SessionTime  1009 non-null   timedelta64[ns]
 12  Sector2SessionTime  1030 non-null   timedelta64[ns]
 13  Sector3SessionTime  1030 non-null   timedelt

In [29]:
# Extract lap and sector times
laps_2024 = session_2024.laps[["Driver", "LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]].copy()
laps_2024.dropna(inplace=True)

# Convert times to seconds
for col in ["LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]:
    laps_2024[f"{col}Seconds"] = laps_2024[col].dt.total_seconds()

In [30]:
laps_2024.Driver.unique()

array(['VER', 'GAS', 'PER', 'ALO', 'LEC', 'STR', 'SAR', 'MAG', 'TSU',
       'ALB', 'ZHO', 'HUL', 'RIC', 'OCO', 'NOR', 'HAM', 'SAI', 'RUS',
       'BOT', 'PIA'], dtype=object)

In [31]:
# Group by driver to get average sector times per driver
sector_times_2024 = laps_2024.groupby("Driver")[["Sector1TimeSeconds", "Sector2TimeSeconds", "Sector3TimeSeconds"]].mean().reset_index()

In [32]:
# 2025 Qualifying Data Chinese GP
qualifying_2025 = pd.DataFrame({
    "Driver": ["Oscar Piastri", "George Russell", "Lando Norris", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Yuki Tsunoda", "Alexander Albon","Esteban Ocon", "Nico Hülkenberg",
               "Fernando Alonso", "Lance Stroll", "Carlos Sainz", "Pierre Gasly"],
    "QualifyingSeconds": [90.641, 90.723, 90.793, 90.817, 90.927,
                           91.021, 91.638, 91.706, 91.625, 91.632,
                          91.688, 91.773, 91.840, 91.992]
})
qualifying_2025

Unnamed: 0,Driver,QualifyingSeconds
0,Oscar Piastri,90.641
1,George Russell,90.723
2,Lando Norris,90.793
3,Max Verstappen,90.817
4,Lewis Hamilton,90.927
5,Charles Leclerc,91.021
6,Yuki Tsunoda,91.638
7,Alexander Albon,91.706
8,Esteban Ocon,91.625
9,Nico Hülkenberg,91.632


In [33]:
# Map full names to FastF1 3-letter codes
driver_mapping = {
    "Oscar Piastri": "PIA", "George Russell": "RUS", "Lando Norris": "NOR", "Max Verstappen": "VER",
    "Lewis Hamilton": "HAM", "Charles Leclerc": "LEC", "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Esteban Ocon": "OCO",
    "Nico Hülkenberg": "HUL", "Fernando Alonso": "ALO", "Lance Stroll": "STR", "Carlos Sainz": "SAI", "Pierre Gasly": "GAS",
}

In [34]:
qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)

# Merge 2025 qualifying data with 2024 race data
merged_data = qualifying_2025.merge(sector_times_2024, left_on="DriverCode", right_on="Driver", how="left")
merged_data.dropna(inplace=True)
merged_data

Unnamed: 0,Driver_x,QualifyingSeconds,DriverCode,Driver_y,Sector1TimeSeconds,Sector2TimeSeconds,Sector3TimeSeconds
0,Oscar Piastri,90.641,PIA,PIA,28.937296,32.519426,46.923907
1,George Russell,90.723,RUS,RUS,28.795722,32.411685,46.822019
2,Lando Norris,90.793,NOR,NOR,28.553593,32.451481,46.493556
3,Max Verstappen,90.817,VER,VER,28.198173,31.471942,45.636635
4,Lewis Hamilton,90.927,HAM,HAM,29.151218,32.691655,47.230545
5,Charles Leclerc,91.021,LEC,LEC,28.025712,31.970673,46.015577
6,Yuki Tsunoda,91.638,TSU,TSU,30.24584,32.70168,48.11608
7,Alexander Albon,91.706,ALB,ALB,29.195855,32.869764,47.1834
8,Esteban Ocon,91.625,OCO,OCO,29.098545,32.6264,47.483582
9,Nico Hülkenberg,91.632,HUL,HUL,28.528833,32.721778,47.186685


In [35]:
# Define feature set (Qualifying + Sector Times)
X = merged_data[["QualifyingSeconds", "Sector1TimeSeconds", "Sector2TimeSeconds", "Sector3TimeSeconds"]].fillna(0)
y = merged_data.merge(laps_2024.groupby("Driver")["LapTimeSeconds"].mean(), left_on="DriverCode", right_index=True)["LapTimeSeconds"]


In [36]:
# Train Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=38)

In [37]:
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.neighbors import KNeighborsRegressor

knn_model = KNeighborsRegressor(n_neighbors=5)  # Try different values for n_neighbors
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)

# Train Gradient Boosting model
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=38)
model.fit(X_train, y_train)

# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=38)
rf_model.fit(X_train, y_train)


# Make predictions with each model
y_pred_gb = model.predict(X_test)
y_pred_rf = rf_model.predict(X_test)


# Evaluate each model
mae_knn = mean_absolute_error(y_test, y_pred_knn)
mae_gb = mean_absolute_error(y_test, y_pred_gb)
mae_rf = mean_absolute_error(y_test, y_pred_rf)


print("KNN MAE:", mae_knn)
print("Gradient Boosting MAE:", mae_gb)
print("Random Forest MAE:", mae_rf)


KNN MAE: 0.8342604023463119
Gradient Boosting MAE: 0.9161113315856729
Random Forest MAE: 0.7016820425249127


In [38]:
#dataset split
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(11, 4) (11,) (3, 4) (3,)


In [39]:
#Predict using 2025 qualifying times
predicted_race_times = rf_model.predict(X)
qualifying_2025["PredictedRaceTimeSeconds"] = predicted_race_times

In [40]:
# Rank drivers by predicted race time
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTimeSeconds")

In [46]:
# Final prediction
print("\nPREDICTED CHINESE GRAND PRIX 2025 WINNER\n")
qualifying_2025[["Driver","PredictedRaceTimeSeconds"]]


PREDICTED CHINESE GRAND PRIX 2025 WINNER



Unnamed: 0,Driver,PredictedRaceTimeSeconds
3,Max Verstappen,105.581401
11,Lance Stroll,105.899113
5,Charles Leclerc,106.163415
12,Carlos Sainz,106.94288
10,Fernando Alonso,107.442834
2,Lando Norris,107.727861
1,George Russell,108.061496
0,Oscar Piastri,108.339404
9,Nico Hülkenberg,108.630635
4,Lewis Hamilton,109.028876


In [47]:
print("Gradient Boosting MAE:", mae_gb)
print("Random Forest MAE:", mae_rf)

Gradient Boosting MAE: 0.9161113315856729
Random Forest MAE: 0.7016820425249127
