In [None]:
import fastf1
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

print(f"FastF1 API version: {fastf1.__version__}")
print(f"Pandas version: {pd.__version__}")
print(f"Numpy version: {np.__version__}")
print(f"Scikit-learn version: {sklearn.__version__}")

FastF1 API version: 3.5.3
Pandas version: 2.2.2
Numpy version: 2.0.2
Scikit-learn version: 1.6.1


In [None]:
fastf1.Cache.enable_cache('fastf1-cache')

In [None]:
# Get Race Session
session = fastf1.get_session(2025, 3, "R")
session.load()
laps = session.laps[[
    "Driver",
    "LapTime",
    "Sector1Time",
    "Sector2Time",
    "Sector3Time"]].copy()
laps.dropna(inplace=True)

core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core       

In [None]:
laps.head()

Unnamed: 0,Driver,LapTime,Sector1Time,Sector2Time,Sector3Time
1,VER,0 days 00:01:33.943000,0 days 00:00:33.894000,0 days 00:00:42.026000,0 days 00:00:18.023000
2,VER,0 days 00:01:33.639000,0 days 00:00:33.567000,0 days 00:00:42.027000,0 days 00:00:18.045000
3,VER,0 days 00:01:33.744000,0 days 00:00:33.570000,0 days 00:00:42.140000,0 days 00:00:18.034000
4,VER,0 days 00:01:33.776000,0 days 00:00:33.486000,0 days 00:00:42.157000,0 days 00:00:18.133000
5,VER,0 days 00:01:33.646000,0 days 00:00:33.319000,0 days 00:00:42.252000,0 days 00:00:18.075000


In [None]:
# Get Qualifying Time
qualifying_session = fastf1.get_session(2025, 3, "Q")
qualifying_session.load()
qualifying_laps = qualifying_session.laps.pick_quicklaps()
qualifying_laps = qualifying_laps.groupby("Driver")["LapTime"].min().reset_index()
qualifying_laps.rename(columns={"LapTime": "QualifyingTime"}, inplace=True)

core           INFO 	Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core           INFO 	Processing timing data...
INFO:fastf1.fastf1.core:Processing timing data...
req       

In [None]:
qualifying_laps.head()

Unnamed: 0,Driver,QualifyingTime
0,ALB,0 days 00:01:27.615000
1,ALO,0 days 00:01:27.897000
2,ANT,0 days 00:01:27.555000
3,BEA,0 days 00:01:27.711000
4,BOR,0 days 00:01:28.622000


In [None]:
print(f"Drivers in Laps: {np.unique(laps.Driver)}")
print(f"Drivers in Qualifying: {np.unique(qualifying_laps.Driver)}")
assert np.all(np.unique(laps.Driver) == np.unique(qualifying_laps.Driver))

Drivers in Laps: ['ALB' 'ALO' 'ANT' 'BEA' 'BOR' 'DOO' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC'
 'NOR' 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'TSU' 'VER']
Drivers in Qualifying: ['ALB' 'ALO' 'ANT' 'BEA' 'BOR' 'DOO' 'GAS' 'HAD' 'HAM' 'HUL' 'LAW' 'LEC'
 'NOR' 'OCO' 'PIA' 'RUS' 'SAI' 'STR' 'TSU' 'VER']


In [None]:
# Convert the LapTime and QualifyingTime to second
for col in ["LapTime", "Sector1Time", "Sector2Time", "Sector3Time"]:
    laps[f"{col} (s)"] = laps[col].dt.total_seconds()
qualifying_laps["QualifyingTime (s)"] = qualifying_laps["QualifyingTime"].dt.total_seconds()

In [None]:
laps.head()

Unnamed: 0,Driver,LapTime,Sector1Time,Sector2Time,Sector3Time,LapTime (s),Sector1Time (s),Sector2Time (s),Sector3Time (s)
1,VER,0 days 00:01:33.943000,0 days 00:00:33.894000,0 days 00:00:42.026000,0 days 00:00:18.023000,93.943,33.894,42.026,18.023
2,VER,0 days 00:01:33.639000,0 days 00:00:33.567000,0 days 00:00:42.027000,0 days 00:00:18.045000,93.639,33.567,42.027,18.045
3,VER,0 days 00:01:33.744000,0 days 00:00:33.570000,0 days 00:00:42.140000,0 days 00:00:18.034000,93.744,33.57,42.14,18.034
4,VER,0 days 00:01:33.776000,0 days 00:00:33.486000,0 days 00:00:42.157000,0 days 00:00:18.133000,93.776,33.486,42.157,18.133
5,VER,0 days 00:01:33.646000,0 days 00:00:33.319000,0 days 00:00:42.252000,0 days 00:00:18.075000,93.646,33.319,42.252,18.075


In [None]:
qualifying_laps.head()

Unnamed: 0,Driver,QualifyingTime,QualifyingTime (s)
0,ALB,0 days 00:01:27.615000,87.615
1,ALO,0 days 00:01:27.897000,87.897
2,ANT,0 days 00:01:27.555000,87.555
3,BEA,0 days 00:01:27.711000,87.711
4,BOR,0 days 00:01:28.622000,88.622


In [None]:
sector_times = laps.groupby("Driver")[["Sector1Time (s)", "Sector2Time (s)", "Sector3Time (s)"]].mean().reset_index()
sector_times

Unnamed: 0,Driver,Sector1Time (s),Sector2Time (s),Sector3Time (s)
0,ALB,33.124558,42.323481,18.139731
1,ALO,33.278077,42.360635,18.251058
2,ANT,32.952885,42.172712,18.079962
3,BEA,33.135769,42.441692,18.27325
4,BOR,33.460058,42.584077,18.263365
5,DOO,33.392096,42.564558,18.331712
6,GAS,33.258096,42.495788,18.220692
7,HAD,33.069423,42.279904,18.192846
8,HAM,33.022385,42.207192,18.152846
9,HUL,33.428365,42.662692,18.237442


In [None]:
driver_mapping = {
    "Alexander Albon": "ALB",
    "Fernando Alonso": "ALO",
    "Andrea Kimi Antonelli": "ANT",
    "Oliver Bearman": "BEA",
    "Gabriel Bortoleto": "BOR",
    "Jack Doohan": "DOO",
    "Pierre Gasly": "GAS",
    "Isack Hadjar": "HAD",
    "Lewis Hamilton": "HAM",
    "Nico Hülkenberg": "HUL",
    "Liam Lawson": "LAW",
    "Charles Leclerc": "LEC",
    "Lando Norris": "NOR",
    "Esteban Ocon": "OCO",
    "Oscar Piastri": "PIA",
    "George Russell": "RUS",
    "Carlos Sainz Jr.": "SAI",
    "Lance Stroll": "STR",
    "Yuki Tsunoda": "TSU",
    "Max Verstappen": "VER"
}
mapping = {v: k for k, v in driver_mapping.items()}

In [None]:
# QualifyingDataFrame
qualifying = qualifying_laps[["Driver", "QualifyingTime (s)"]].copy()
qualifying["Driver"] = qualifying["Driver"].map(mapping)
qualifying["DriverCode"] = qualifying_laps["Driver"]
qualifying = qualifying[["Driver", "DriverCode", "QualifyingTime (s)"]]
display(qualifying)

Unnamed: 0,Driver,DriverCode,QualifyingTime (s)
0,Alexander Albon,ALB,87.615
1,Fernando Alonso,ALO,87.897
2,Andrea Kimi Antonelli,ANT,87.555
3,Oliver Bearman,BEA,87.711
4,Gabriel Bortoleto,BOR,88.622
5,Jack Doohan,DOO,88.877
6,Pierre Gasly,GAS,87.822
7,Isack Hadjar,HAD,87.569
8,Lewis Hamilton,HAM,87.61
9,Nico Hülkenberg,HUL,88.57


In [None]:
data = qualifying.merge(sector_times, left_on="DriverCode", right_on="Driver", how='left')
data.sample(5)

Unnamed: 0,Driver_x,DriverCode,QualifyingTime (s),Driver_y,Sector1Time (s),Sector2Time (s),Sector3Time (s)
14,Oscar Piastri,PIA,87.027,PIA,32.787,42.015385,18.132808
12,Lando Norris,NOR,86.995,NOR,32.795154,42.081212,18.059115
6,Pierre Gasly,GAS,87.822,GAS,33.258096,42.495788,18.220692
10,Liam Lawson,LAW,87.906,LAW,33.55025,42.527327,18.273462
18,Yuki Tsunoda,TSU,87.967,TSU,33.331673,42.331846,18.231135


In [None]:
data.drop(columns=["Driver_y"], inplace=True)
data.rename(columns={"Driver_x": "Driver"}, inplace=True)
display(data)

Unnamed: 0,Driver,DriverCode,QualifyingTime (s),Sector1Time (s),Sector2Time (s),Sector3Time (s)
0,Alexander Albon,ALB,87.615,33.124558,42.323481,18.139731
1,Fernando Alonso,ALO,87.897,33.278077,42.360635,18.251058
2,Andrea Kimi Antonelli,ANT,87.555,32.952885,42.172712,18.079962
3,Oliver Bearman,BEA,87.711,33.135769,42.441692,18.27325
4,Gabriel Bortoleto,BOR,88.622,33.460058,42.584077,18.263365
5,Jack Doohan,DOO,88.877,33.392096,42.564558,18.331712
6,Pierre Gasly,GAS,87.822,33.258096,42.495788,18.220692
7,Isack Hadjar,HAD,87.569,33.069423,42.279904,18.192846
8,Lewis Hamilton,HAM,87.61,33.022385,42.207192,18.152846
9,Nico Hülkenberg,HUL,88.57,33.428365,42.662692,18.237442


In [None]:
X = data.drop(columns=["Driver", "DriverCode"])
y = laps.groupby("Driver")["LapTime (s)"].mean().reset_index()["LapTime (s)"]

In [None]:
X.sample(5)

Unnamed: 0,QualifyingTime (s),Sector1Time (s),Sector2Time (s),Sector3Time (s)
12,86.995,32.795154,42.081212,18.059115
7,87.569,33.069423,42.279904,18.192846
1,87.897,33.278077,42.360635,18.251058
9,88.57,33.428365,42.662692,18.237442
5,88.877,33.392096,42.564558,18.331712


In [None]:
y.sample(5)

Unnamed: 0,LapTime (s)
14,92.935192
18,93.894654
0,93.587769
12,92.935481
5,94.288365


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = GradientBoostingRegressor(n_estimators=200, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

In [None]:
predicted_race_times = model.predict(X)
qualifying["PredictedRaceTime (s)"] = predicted_race_times
qualifying = qualifying.sort_values(by="PredictedRaceTime (s)")
display(qualifying)

Unnamed: 0,Driver,DriverCode,QualifyingTime (s),PredictedRaceTime (s)
19,Max Verstappen,VER,86.983,92.928038
14,Oscar Piastri,PIA,87.027,92.935192
12,Lando Norris,NOR,86.995,92.935481
11,Charles Leclerc,LEC,87.299,93.186712
2,Andrea Kimi Antonelli,ANT,87.555,93.205558
15,George Russell,RUS,87.318,93.224663
8,Lewis Hamilton,HAM,87.61,93.382423
0,Alexander Albon,ALB,87.615,93.527521
7,Isack Hadjar,HAD,87.569,93.542173
3,Oliver Bearman,BEA,87.711,93.850712


In [None]:
print("\n🏁 Predicted 2025 Chinese GP Winner with New Drivers and Sector Times 🏁\n")
print(qualifying[["Driver", "PredictedRaceTime (s)"]])


🏁 Predicted 2025 Chinese GP Winner with New Drivers and Sector Times 🏁

                   Driver  PredictedRaceTime (s)
19         Max Verstappen              92.928038
14          Oscar Piastri              92.935192
12           Lando Norris              92.935481
11        Charles Leclerc              93.186712
2   Andrea Kimi Antonelli              93.205558
15         George Russell              93.224663
8          Lewis Hamilton              93.382423
0         Alexander Albon              93.527521
7            Isack Hadjar              93.542173
3          Oliver Bearman              93.850712
18           Yuki Tsunoda              93.894654
1         Fernando Alonso              93.895012
6            Pierre Gasly              93.974577
17           Lance Stroll              94.058733
16       Carlos Sainz Jr.              94.171269
5             Jack Doohan              94.288365
4       Gabriel Bortoleto              94.307500
9         Nico Hülkenberg              94.328

In [None]:
y_pred = model.predict(X_test)
print(f"\n🔍 Model Error (MAE): {mean_absolute_error(y_test, y_pred):.2f} seconds")


🔍 Model Error (MAE): 0.20 seconds
