In [3]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [41]:
# Load 2024 Chinese GP race session
session_2024 = fastf1.get_session(2024, "Bahrain", "R")
session_2024.load()

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
_api           INFO 	Fetching lap count data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No ca

In [42]:
type(session_2024)

fastf1.core.Session

In [43]:
# Extract lap and sector times
laps_2024 = session_2024.laps[["Driver", "LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]].copy()
laps_2024.dropna(inplace=True)
laps_2024

Unnamed: 0,Driver,LapTime,Sector1Time,Sector2Time,Sector3Time
1,VER,0 days 00:01:36.296000,0 days 00:00:30.916000,0 days 00:00:41.661000,0 days 00:00:23.719000
2,VER,0 days 00:01:36.753000,0 days 00:00:30.999000,0 days 00:00:41.966000,0 days 00:00:23.788000
3,VER,0 days 00:01:36.647000,0 days 00:00:30.931000,0 days 00:00:41.892000,0 days 00:00:23.824000
4,VER,0 days 00:01:37.173000,0 days 00:00:31.255000,0 days 00:00:42.056000,0 days 00:00:23.862000
5,VER,0 days 00:01:37.092000,0 days 00:00:31.041000,0 days 00:00:42.187000,0 days 00:00:23.864000
...,...,...,...,...,...
1124,SAR,0 days 00:01:35.972000,0 days 00:00:30.781000,0 days 00:00:41.539000,0 days 00:00:23.652000
1125,SAR,0 days 00:01:35.987000,0 days 00:00:30.775000,0 days 00:00:41.440000,0 days 00:00:23.772000
1126,SAR,0 days 00:01:36.088000,0 days 00:00:30.798000,0 days 00:00:41.610000,0 days 00:00:23.680000
1127,SAR,0 days 00:01:39.614000,0 days 00:00:32.179000,0 days 00:00:43.748000,0 days 00:00:23.687000


In [44]:
laps_2024.Driver.unique()

array(['VER', 'PER', 'SAI', 'LEC', 'RUS', 'NOR', 'HAM', 'PIA', 'ALO',
       'STR', 'ZHO', 'MAG', 'RIC', 'TSU', 'ALB', 'HUL', 'OCO', 'GAS',
       'BOT', 'SAR'], dtype=object)

In [45]:
# Convert times to seconds
for col in ["LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]:
    laps_2024[f"{col} (s)"] = laps_2024[col].dt.total_seconds()

In [46]:
# Group by driver to get average sector times per driver
sector_times_2024 = laps_2024.groupby("Driver")[["Sector1Time (s)", "Sector2Time (s)", "Sector3Time (s)"]].mean().reset_index()

In [47]:
# 2025 Qualifying Data Chinese GP
qualifying_2025 = pd.DataFrame({
    "Driver": ["Oscar Piastri", "George Russell", "Lando Norris", "Max Verstappen", "Lewis Hamilton",
               "Charles Leclerc", "Isack Hadjar", "Andrea Kimi Antonelli", "Yuki Tsunoda", "Alexander Albon",
               "Esteban Ocon", "Nico H√ºlkenberg", "Fernando Alonso", "Lance Stroll", "Carlos Sainz Jr.",
               "Pierre Gasly", "Oliver Bearman", "Jack Doohan", "Gabriel Bortoleto", "Liam Lawson"],
    "QualifyingTime (s)": [90.641, 90.723, 90.793, 90.817, 90.927,
                           91.021, 91.079, 91.103, 91.638, 91.706,
                           91.625, 91.632, 91.688, 91.773, 91.840,
                           91.992, 92.018, 92.092, 92.141, 92.174]
})

In [48]:
# Map full names to FastF1 3-letter codes
driver_mapping = {
    "Oscar Piastri": "PIA", "George Russell": "RUS", "Lando Norris": "NOR", "Max Verstappen": "VER",
    "Lewis Hamilton": "HAM", "Charles Leclerc": "LEC", "Isack Hadjar": "HAD", "Andrea Kimi Antonelli": "ANT",
    "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Esteban Ocon": "OCO", "Nico H√ºlkenberg": "HUL",
    "Fernando Alonso": "ALO", "Lance Stroll": "STR", "Carlos Sainz Jr.": "SAI", "Pierre Gasly": "GAS",
    "Oliver Bearman": "BEA", "Jack Doohan": "DOO", "Gabriel Bortoleto": "BOR", "Liam Lawson": "LAW"
}

In [49]:
qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)

In [50]:
# Merge qualifying data with sector times
merged_data = qualifying_2025.merge(sector_times_2024, left_on="DriverCode", right_on="Driver", how="left")

In [51]:
# Define feature set (Qualifying + Sector Times)
X = merged_data[["QualifyingTime (s)", "Sector1Time (s)", "Sector2Time (s)", "Sector3Time (s)"]].fillna(0)
y = laps_2024.groupby("Driver")["LapTime (s)"].mean().reset_index()["LapTime (s)"]

In [53]:
# Train Gradient Boosting Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=38)
model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, random_state=38)
model.fit(X_train, y_train)

In [54]:
# Predict race times using 2025 qualifying and sector data
predicted_race_times = model.predict(X)
qualifying_2025["PredictedRaceTime (s)"] = predicted_race_times

In [55]:
# Rank drivers by predicted race time
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime (s)").reset_index()

In [57]:
# Print final predictions
print("\nüèÅ Predicted 2025 Bahrain GP Winner with New Drivers and Sector Times üèÅ\n")
print(qualifying_2025[["Driver", "PredictedRaceTime (s)"]])


üèÅ Predicted 2025 Bahrain GP Winner with New Drivers and Sector Times üèÅ

                   Driver  PredictedRaceTime (s)
0            Esteban Ocon              96.913435
1            Isack Hadjar              97.252858
2          Lewis Hamilton              97.356152
3            Lance Stroll              97.362709
4         Nico H√ºlkenberg              97.469731
5         Charles Leclerc              97.664356
6          George Russell              97.820651
7          Oliver Bearman              97.995602
8        Carlos Sainz Jr.              98.279651
9             Liam Lawson              98.303578
10      Gabriel Bortoleto              98.303578
11           Lando Norris              98.318000
12        Fernando Alonso              98.327714
13  Andrea Kimi Antonelli              98.339929
14           Yuki Tsunoda              98.375038
15            Jack Doohan              98.376981
16          Oscar Piastri              98.413459
17        Alexander Albon             

In [58]:
# Evaluate Model
y_pred = model.predict(X_test)
print(f"\nüîç Model Error (MAE): {mean_absolute_error(y_test, y_pred):.2f} seconds")


üîç Model Error (MAE): 1.20 seconds


In [59]:
y_pred

array([98.27965115, 98.37503844, 97.66435566, 98.30357828])