In [None]:
pip install fastf1



In [None]:
import os
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [None]:
#Create cache to avoid redownloading the FastF1 data during each run
cache_dir = "f1_cache"

# Create the directory if it does not exist
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

# Enable caching
fastf1.Cache.enable_cache(cache_dir)

print("Cache enabled successfully!")

Cache enabled successfully!


In [None]:
# Load FastF1 2024 Australian GP race session
session_2024 = fastf1.get_session(2024, 3, "R") #Australian GP was the 3rd race of 2024
session_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
DEBUG:fastf1.ergast:Failed to parse timestamp '-1:57:37.891' in Ergastresponse.
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timi

In [None]:
#session_2024.weather()
session_2024.results.info()
session_2024.results.head(5)

<class 'fastf1.core.SessionResults'>
Index: 19 entries, 55 to 1
Data columns (total 21 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   DriverNumber        19 non-null     object         
 1   BroadcastName       19 non-null     object         
 2   Abbreviation        19 non-null     object         
 3   DriverId            19 non-null     object         
 4   TeamName            19 non-null     object         
 5   TeamColor           19 non-null     object         
 6   TeamId              19 non-null     object         
 7   FirstName           19 non-null     object         
 8   LastName            19 non-null     object         
 9   FullName            19 non-null     object         
 10  HeadshotUrl         19 non-null     object         
 11  CountryCode         19 non-null     object         
 12  Position            19 non-null     float64        
 13  ClassifiedPosition  19 non-null     object 

Unnamed: 0,DriverNumber,BroadcastName,Abbreviation,DriverId,TeamName,TeamColor,TeamId,FirstName,LastName,FullName,...,CountryCode,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points
55,55,C SAINZ,SAI,sainz,Ferrari,E80020,ferrari,Carlos,Sainz,Carlos Sainz,...,ESP,1.0,1,2.0,NaT,NaT,NaT,0 days 01:20:26.843000,Finished,25.0
16,16,C LECLERC,LEC,leclerc,Ferrari,E80020,ferrari,Charles,Leclerc,Charles Leclerc,...,MON,2.0,2,4.0,NaT,NaT,NaT,0 days 00:00:02.366000,Finished,19.0
4,4,L NORRIS,NOR,norris,McLaren,FF8000,mclaren,Lando,Norris,Lando Norris,...,GBR,3.0,3,3.0,NaT,NaT,NaT,0 days 00:00:05.904000,Finished,15.0
81,81,O PIASTRI,PIA,piastri,McLaren,FF8000,mclaren,Oscar,Piastri,Oscar Piastri,...,AUS,4.0,4,5.0,NaT,NaT,NaT,0 days 00:00:35.770000,Finished,12.0
11,11,S PEREZ,PER,perez,Red Bull Racing,3671C6,red_bull,Sergio,Perez,Sergio Perez,...,MEX,5.0,5,6.0,NaT,NaT,NaT,0 days 00:00:56.309000,Finished,10.0


In [None]:
# 2025 Qualifying Data
qualifying_2025 = pd.DataFrame({
    "Driver": [
        "Lando Norris", "Oscar Piastri", "Max Verstappen", "George Russell", "Yuki Tsunoda",
        "Alexander Albon", "Charles Leclerc", "Lewis Hamilton", "Pierre Gasly", "Carlos Sainz",
        "Fernando Alonso", "Lance Stroll"
    ],
    "FinalQualifyingTime": [
        "1:15.096", "1:15.180", "1:15.481", "1:15.546", "1:15.670",
        "1:15.737", "1:15.755", "1:15.973", "1:15.980", "1:16.062",
        "1:16.453", "1:16.483"
    ],

    "GridPosition_2025": [
        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13
    ],
})

# Convert lap times to seconds
def convert_time_to_seconds(time_str):
    if pd.isna(time_str):
        return None
    minutes, seconds = map(float, time_str.split(":"))
    return minutes * 60 + seconds

qualifying_2025["QualifyingTime (s)"] = qualifying_2025["FinalQualifyingTime"].apply(convert_time_to_seconds)
qualifying_2025.drop(columns=["FinalQualifyingTime"], inplace=True)  # Drop original column

qualifying_2025

Unnamed: 0,Driver,GridPosition_2025,QualifyingTime (s)
0,Lando Norris,1,75.096
1,Oscar Piastri,2,75.18
2,Max Verstappen,3,75.481
3,George Russell,4,75.546
4,Yuki Tsunoda,5,75.67
5,Alexander Albon,6,75.737
6,Charles Leclerc,7,75.755
7,Lewis Hamilton,8,75.973
8,Pierre Gasly,9,75.98
9,Carlos Sainz,10,76.062


In [None]:
# Map full names to FastF1 3-letter codes
driver_mapping = {
    "Lando Norris": "NOR", "Oscar Piastri": "PIA", "Max Verstappen": "VER", "George Russell": "RUS",
    "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Charles Leclerc": "LEC", "Lewis Hamilton": "HAM",
    "Pierre Gasly": "GAS", "Carlos Sainz": "SAI", "Isack Hadjar": "HAD", "Fernando Alonso": "ALO",
    "Lance Stroll": "STR", "Jack Doohan": "DOO", "Gabriel Bortoleto": "BOR", "Andrea Kimi Antonelli": "ANT",
    "Nico Hulkenberg": "HUL", "Liam Lawson": "LAW", "Esteban Ocon": "OCO", "Oliver Bearman": "BEA"
}

qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)

qualifying_2025

Unnamed: 0,Driver,GridPosition_2025,QualifyingTime (s),DriverCode
0,Lando Norris,1,75.096,NOR
1,Oscar Piastri,2,75.18,PIA
2,Max Verstappen,3,75.481,VER
3,George Russell,4,75.546,RUS
4,Yuki Tsunoda,5,75.67,TSU
5,Alexander Albon,6,75.737,ALB
6,Charles Leclerc,7,75.755,LEC
7,Lewis Hamilton,8,75.973,HAM
8,Pierre Gasly,9,75.98,GAS
9,Carlos Sainz,10,76.062,SAI


In [None]:
session_2024.laps.info()

<class 'fastf1.core.Laps'>
RangeIndex: 998 entries, 0 to 997
Data columns (total 31 columns):
 #   Column              Non-Null Count  Dtype          
---  ------              --------------  -----          
 0   Time                998 non-null    timedelta64[ns]
 1   Driver              998 non-null    object         
 2   DriverNumber        998 non-null    object         
 3   LapTime             995 non-null    timedelta64[ns]
 4   LapNumber           998 non-null    float64        
 5   Stint               998 non-null    float64        
 6   PitOutTime          37 non-null     timedelta64[ns]
 7   PitInTime           37 non-null     timedelta64[ns]
 8   Sector1Time         977 non-null    timedelta64[ns]
 9   Sector2Time         996 non-null    timedelta64[ns]
 10  Sector3Time         995 non-null    timedelta64[ns]
 11  Sector1SessionTime  972 non-null    timedelta64[ns]
 12  Sector2SessionTime  996 non-null    timedelta64[ns]
 13  Sector3SessionTime  995 non-null    timedelta6

In [None]:
# Extract lap times, grid position, lap number, and tire compound from 2024 session
laps_2024 = session_2024.laps[["Driver", "LapTime", "LapNumber", "Compound"]].copy()

# Convert LapTime to seconds
laps_2024.dropna(subset=["LapTime"], inplace=True)
laps_2024["LapTime (s)"] = laps_2024["LapTime"].dt.total_seconds()

# Extract grid position from 2024 race results
grid_positions_2024 = session_2024.results[["Abbreviation", "GridPosition"]].copy()
grid_positions_2024.rename(columns={"GridPosition": "GridPosition_2024"}, inplace=True)  # Rename column for clarity

# Merge lap times with grid position from last year's race
laps_2024 = laps_2024.merge(grid_positions_2024, left_on="Driver", right_on="Abbreviation", how="left")
laps_2024.drop(columns=["Abbreviation"], inplace=True)  # Drop duplicate driver column

# Merge 2025 Qualifying Data with 2024 Race Data
merged_data = qualifying_2025.merge(laps_2024, left_on="DriverCode", right_on="Driver", how="left")

# Display the updated dataframe
merged_data.head()

merged_data.head()

Unnamed: 0,Driver_x,GridPosition_2025,QualifyingTime (s),DriverCode,Driver_y,LapTime,LapNumber,Compound,LapTime (s),GridPosition_2024
0,Lando Norris,1,75.096,NOR,NOR,0 days 00:01:29.784000,1.0,MEDIUM,89.784,3.0
1,Lando Norris,1,75.096,NOR,NOR,0 days 00:01:23.183000,2.0,MEDIUM,83.183,3.0
2,Lando Norris,1,75.096,NOR,NOR,0 days 00:01:22.656000,3.0,MEDIUM,82.656,3.0
3,Lando Norris,1,75.096,NOR,NOR,0 days 00:01:22.609000,4.0,MEDIUM,82.609,3.0
4,Lando Norris,1,75.096,NOR,NOR,0 days 00:01:22.685000,5.0,MEDIUM,82.685,3.0


In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

X = merged_data[["QualifyingTime (s)", "GridPosition_2025"]]
y = merged_data["LapTime (s)"]

# Train Gradient Boosting Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=39)
model = GradientBoostingRegressor(n_estimators=1000, learning_rate=0.01, random_state=39)
model.fit(X_train, y_train)

# Predict using 2025 data
predicted_lap_times = model.predict(qualifying_2025[["QualifyingTime (s)", "GridPosition_2025"]])
qualifying_2025["PredictedRaceTime (s)"] = predicted_lap_times

# Display final predictions
print("\n Predicted 2025 Australian GP Standings\n")
print(qualifying_2025[["Driver", "PredictedRaceTime (s)"]])

# Evaluate Model
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print("\nModel Error (MAE): {:.2f} seconds".format(mae))


 Predicted 2025 Australian GP Standings

             Driver  PredictedRaceTime (s)
0      Lando Norris              82.713339
1     Oscar Piastri              84.328823
2    Max Verstappen              85.144804
3    George Russell              83.889971
4      Yuki Tsunoda              84.417393
5   Alexander Albon              84.642920
6   Charles Leclerc              83.080425
7    Lewis Hamilton              86.096218
8      Pierre Gasly              85.541001
9      Carlos Sainz              83.622659
10  Fernando Alonso              83.871622
11     Lance Stroll              85.286795

Model Error (MAE): 3.47 seconds
