## Australia 2025 Grand Prix Prediction

In [1]:
# Importing Necessary Libraries
import fastf1
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

In [2]:
# Loading Data through FastF1 API for 2024
session_2024 = fastf1.get_session(2024,3,'R')
session_2024.load()

core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 19 drivers: ['55', '16', '4', '81', '11', '18', '22', '14', '27', '20', '23', '3', '10', '77', '24', '31', '63', '44', '1']


In [3]:
# Lap Data for the Race Day
race_2024_df = session_2024.laps

In [4]:
# Selecting Necessary Columns from the DataSet
race_2024_df = race_2024_df[['Driver','LapTime']]

In [5]:
# Removing the Null Values
race_2024_df = race_2024_df.dropna()

In [6]:
# Converting the String to Datetime
race_2024_df ['LapTime (s)'] = race_2024_df['LapTime'].dt.total_seconds()

In [7]:
# Race Day Dataset
race_2024_df

Unnamed: 0,Driver,LapTime,LapTime (s)
0,VER,0 days 00:01:27.458000,87.458
1,VER,0 days 00:01:24.099000,84.099
2,VER,0 days 00:01:23.115000,83.115
4,GAS,0 days 00:01:37.304000,97.304
5,GAS,0 days 00:01:24.649000,84.649
...,...,...,...
993,PIA,0 days 00:01:20.199000,80.199
994,PIA,0 days 00:01:20.754000,80.754
995,PIA,0 days 00:01:20.357000,80.357
996,PIA,0 days 00:01:25.255000,85.255


In [8]:
# Loading Data through FastF1 API for 2025
session_2025 = fastf1.get_session(2025,1,'Qualifying')
session_2025.load()

core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '1', '63', '22', '23', '16', '44', '10', '55', '6', '14', '18', '7', '5', '12', '27', '30', '31', '87']


In [9]:
# Lap Data for the Race Day
quali_2025_df = session_2025.laps

In [10]:
# Selecting Necessary Columns from the DataSet
quali_2025_df = quali_2025_df[['Driver','LapTime']]

In [11]:
# Removing the Null Values
quali_2025_df = quali_2025_df.dropna()

In [12]:
# Converting the String to Datetime
quali_2025_df ['LapTime (s)'] = quali_2025_df['LapTime'].dt.total_seconds()

In [13]:
# Dropping the Columns not Required
quali_2025_df = quali_2025_df.drop('LapTime', axis = 1)
race_2024_df = race_2024_df.drop('LapTime', axis = 1)

In [14]:
# Qualifying Day Dataset
quali_2025_df

Unnamed: 0,Driver,LapTime (s)
0,NOR,110.421
1,NOR,76.003
2,NOR,122.350
3,NOR,116.260
4,NOR,75.912
...,...,...
288,OCO,77.517
290,OCO,129.804
291,OCO,77.147
293,OCO,130.240


In [15]:
race_2024_df

Unnamed: 0,Driver,LapTime (s)
0,VER,87.458
1,VER,84.099
2,VER,83.115
4,GAS,97.304
5,GAS,84.649
...,...,...
993,PIA,80.199
994,PIA,80.754
995,PIA,80.357
996,PIA,85.255


In [16]:
# Merging two different datasets on Driver Column
merged_data = pd.merge(race_2024_df,quali_2025_df, on = 'Driver')

In [17]:
merged_data

Unnamed: 0,Driver,LapTime (s)_x,LapTime (s)_y
0,VER,87.458,98.807
1,VER,87.458,76.018
2,VER,87.458,105.333
3,VER,87.458,100.522
4,VER,87.458,75.688
...,...,...,...
8168,PIA,124.363,111.838
8169,PIA,124.363,104.313
8170,PIA,124.363,76.147
8171,PIA,124.363,104.756


In [18]:
# Renaming the Column Names
merged_data.rename(columns = {'LapTime (s)_x':'LapTime (s)', 'LapTime (s)_y':'QualifyingTime (s)'}, inplace = True)

In [19]:
merged_data

Unnamed: 0,Driver,LapTime (s),QualifyingTime (s)
0,VER,87.458,98.807
1,VER,87.458,76.018
2,VER,87.458,105.333
3,VER,87.458,100.522
4,VER,87.458,75.688
...,...,...,...
8168,PIA,124.363,111.838
8169,PIA,124.363,104.313
8170,PIA,124.363,76.147
8171,PIA,124.363,104.756


In [20]:
# Model Building
X = merged_data['QualifyingTime (s)']
X_reshape = X.values.reshape(-1,1)
Y = merged_data['LapTime (s)']

In [21]:
# Splitting the Data into Training and Testing Datasets
X_train,X_test,Y_train,Y_test = train_test_split(X_reshape,Y, test_size = 0.2, random_state = 39)

In [22]:
# Using GradientBoostingRegressor Model
model = GradientBoostingRegressor(n_estimators = 100, learning_rate = 0.1, random_state = 39)
model.fit(X_train,Y_train)

In [23]:
# Predicting Laptimes
predict_laptimes = model.predict(quali_2025_df[['LapTime (s)']])
quali_2025_df['PredictedRaceTime (s)'] = predict_laptimes



In [24]:
# Sorting Values in the Dataset
quali_2025_df = quali_2025_df.sort_values(by = 'PredictedRaceTime (s)')

In [25]:
quali_2025_df[['Driver','PredictedRaceTime (s)']]

Unnamed: 0,Driver,PredictedRaceTime (s)
15,NOR,82.965730
11,NOR,82.996765
127,LEC,82.998905
62,RUS,83.171710
187,SAI,83.171710
...,...,...
164,GAS,85.151875
167,GAS,85.151930
132,HAM,85.168455
260,BOR,85.333776


In [26]:
y_pred = model.predict(X_test)
error_percentage = mean_absolute_error(Y_test, y_pred)
print(f'The Mean Absolute Error is : {error_percentage}')

The Mean Absolute Error is : 3.422286177462586
