In [2]:
pip install fastf1 pandas numpy scikit-learn xgboost


Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.3-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.1-py3-none-any.whl.metadata (5.6 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [3]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error



import warnings
warnings.filterwarnings('ignore')
import logging
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from sklearn.ensemble import RandomForestRegressor

# Generic Functions

In [5]:
def get_race_data(year,location,event):
    session_2024 = fastf1.get_session(year, location, event)
    print(f"Getting {location}'s {year} data")
    session_2024.load()
    session_2024_df = session_2024.laps[["Driver","Team", "LapTime"]].copy()
    session_2024_df.dropna(inplace=True)
    # Convert times to seconds
    for col in ["LapTime"]:
        session_2024_df[f"{col}_(s)_{location}"] = session_2024_df[col].dt.total_seconds()
    session_2024_df = session_2024_df.groupby(["Driver","Team"])[f"LapTime_(s)_{location}"].mean().reset_index()
    return session_2024_df

In [6]:
def calculate_constructor_points(df):
    # F1 point system for top 10 positions (optional)
    f1_points = [25, 18, 15, 12, 10, 8, 6, 4, 2, 1]

    # Get positions
    try:
        sai_pos = df[df['Driver'] == 'SAI'].index[0]
        alb_pos = df[df['Driver'] == 'ALB'].index[0]
    except IndexError:
        return "One or both drivers not found in the DataFrame."

    # Calculate points based on F1 system (0 points if position > 9)
    sai_points = f1_points[sai_pos] if sai_pos < len(f1_points) else 0
    alb_points = f1_points[alb_pos] if alb_pos < len(f1_points) else 0

    # Total constructor points
    total_points = sai_points + alb_points

    print(f"Carlos's position: {sai_pos + 1}, points: {sai_points}")
    print(f"Alex's position: {alb_pos + 1}, points: {alb_points}")
    print(f"Total constructor points: {total_points}")

    return total_points

In [7]:
def get_avg_constructor_points(year: int, race_places: list) -> pd.DataFrame:
    constructor_points = {}

    for race in race_places:
        try:
            session = fastf1.get_session(year, race, 'R')
            session.load()
            results = session.results

            for _, row in results.iterrows():
                team = row['TeamName']
                points = row['Points']
                constructor_points.setdefault(team, []).append(points)

        except Exception as e:
            print(f"Error loading {race} {year}: {e}")
            continue

    # Compute average points per race for each constructor
    constructor_avg = {
        team: sum(points_list) / len(points_list)
        for team, points_list in constructor_points.items()
        if len(points_list) > 0
    }

    # Return as DataFrame
    df = pd.DataFrame([
        {"Team": team, "AvgConstructorPointsPerRace": avg}
        for team, avg in constructor_avg.items()
    ])

    return df.sort_values(by="AvgConstructorPointsPerRace", ascending=False)

# 2024 data

In [8]:
Bah_2024_df = get_race_data(2024, "Bahrain", "R")
SA_2024_df = get_race_data(2024, "Saudi Arabia", "R")
Aus_2024_df = get_race_data(2024, "Australia", "R")
Jap_2024_df = get_race_data(2024, "Japan", "R")
Chi_2024_df = get_race_data(2024, "China", "R")
Mia_2024_df = get_race_data(2024, "Miami", "R")
Emi_2024_df = get_race_data(2024, "Emilia-Romagna", "R")
mon_2024_df = get_race_data(2024,"Monaco","R")
spa_2024_df = get_race_data(2024, "Spain", "R")

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	No cached data found for session_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
INFO:fastf1.api:Fetching session info data...


Getting Bahrain's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf1.req:No cached data found for lap_count. Loading data...
_api           INFO 	F

Getting Saudi Arabia's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf1.req:No cached data found for lap_count. Loading data...
_api           INFO 	F

Getting Australia's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
DEBUG:fastf1.ergast:Failed to parse timestamp '-1:57:37.891' in Ergastresponse.
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf

Getting Japan's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf1.req:No cached data found for lap_count. Loading data...
_api           INFO 	F

Getting China's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf1.req:No cached data found for lap_count. Loading data...
_api           INFO 	F

Getting Miami's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf1.req:No cached data found for lap_count. Loading data...
_api           INFO 	F

Getting Emilia-Romagna's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf1.req:No cached data found for lap_count. Loading data...
_api           INFO 	F

Getting Monaco's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf1.req:No cached data found for lap_count. Loading data...
_api           INFO 	F

Getting Spain's 2024 data


req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
INFO:fastf1.fastf1.req:No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
INFO:fastf1.api:Fetching driver list...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
INFO:fastf1.fastf1.req:No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
INFO:fastf1.api:Fetching session status data...
req            INFO 	Data has been written to cache!
INFO:fastf1.fastf1.req:Data has been written to cache!
req            INFO 	No cached data found for lap_count. Loading data...
INFO:fastf1.fastf1.req:No cached data found for lap_count. Loading data...
_api           INFO 	F

In [54]:
merged_data_2024 = Bah_2024_df.merge(SA_2024_df, on= ['Driver','Team'],how='outer').merge(Aus_2024_df, on= ['Driver','Team'],how='outer')\
.merge(Jap_2024_df, on= ['Driver','Team'],how='outer').merge(Chi_2024_df, on= ['Driver','Team'],how='outer').merge(Mia_2024_df, on= ['Driver','Team'],how='outer')\
.merge(Emi_2024_df, on= ['Driver','Team'],how='outer').merge(mon_2024_df, on=['Driver','Team'], how='outer').merge(spa_2024_df, on=['Driver','Team'], how='outer')
merged_data_2024

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna,LapTime_(s)_Monaco,LapTime_(s)_Spain
0,ALB,Williams,98.511214,96.972327,84.769333,,109.251071,96.946789,84.49362,112.332545,82.436554
1,ALO,Aston Martin,97.888228,94.55775,84.617845,98.639314,107.299648,96.26286,83.877032,113.150105,81.688062
2,BEA,Ferrari,,96.048245,,,,,,,
3,BOT,Kick Sauber,98.503745,97.688667,85.424018,101.448647,104.132632,96.530386,83.574113,113.158447,82.362154
4,GAS,Alpine,98.877839,121.453,85.312211,102.096196,109.317446,96.400596,83.46721,112.412922,81.246242
5,HAM,Mercedes,97.457298,94.955042,85.2284,98.922863,109.116786,95.902825,81.910413,109.83961,80.576015
6,HUL,Haas F1 Team,98.613821,95.764625,85.024069,100.18244,108.411909,96.327456,83.073048,,81.430939
7,LEC,Ferrari,97.270368,94.101208,83.262224,98.537588,106.020943,94.780893,81.478857,109.32074,80.650545
8,MAG,Haas F1 Team,98.447464,96.91802,84.753053,101.488706,108.536345,96.571211,83.09171,,82.384846
9,NOR,McLaren,97.424561,94.858021,83.323224,98.261392,107.444545,94.522071,81.364714,109.565429,80.340091


In [55]:
merged_data_2024.loc[merged_data_2024["Driver"] == "SAI", "LapTime_(s)_Saudi Arabia"] = merged_data_2024.loc[merged_data_2024["Driver"] == "BEA", "LapTime_(s)_Saudi Arabia"].values[0]

# Drop BEA from the DataFrame
merged_data_2024_m = merged_data_2024[merged_data_2024["Driver"] != "BEA"]
merged_data_2024_m = merged_data_2024_m.reset_index(drop=True)
merged_data_2024_m

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna,LapTime_(s)_Monaco,LapTime_(s)_Spain
0,ALB,Williams,98.511214,96.972327,84.769333,,109.251071,96.946789,84.49362,112.332545,82.436554
1,ALO,Aston Martin,97.888228,94.55775,84.617845,98.639314,107.299648,96.26286,83.877032,113.150105,81.688062
2,BOT,Kick Sauber,98.503745,97.688667,85.424018,101.448647,104.132632,96.530386,83.574113,113.158447,82.362154
3,GAS,Alpine,98.877839,121.453,85.312211,102.096196,109.317446,96.400596,83.46721,112.412922,81.246242
4,HAM,Mercedes,97.457298,94.955042,85.2284,98.922863,109.116786,95.902825,81.910413,109.83961,80.576015
5,HUL,Haas F1 Team,98.613821,95.764625,85.024069,100.18244,108.411909,96.327456,83.073048,,81.430939
6,LEC,Ferrari,97.270368,94.101208,83.262224,98.537588,106.020943,94.780893,81.478857,109.32074,80.650545
7,MAG,Haas F1 Team,98.447464,96.91802,84.753053,101.488706,108.536345,96.571211,83.09171,,82.384846
8,NOR,McLaren,97.424561,94.858021,83.323224,98.261392,107.444545,94.522071,81.364714,109.565429,80.340091
9,OCO,Alpine,98.860571,97.310542,85.628456,101.825667,109.202964,96.309158,83.373532,,81.395697


In [56]:
race_places = ["Bahrain", "Saudi Arabia", "Australia", "Japan", "China", "Miami", "Emilia-Romagna","Monaco"]
year = 2024

avg_points_df = get_avg_constructor_points(year, race_places)
print(avg_points_df)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core         

              Team  AvgConstructorPointsPerRace
0  Red Bull Racing                    15.500000
1          Ferrari                    14.500000
3          McLaren                    11.000000
2         Mercedes                     5.500000
4     Aston Martin                     2.750000
7               RB                     1.125000
6     Haas F1 Team                     0.312500
8         Williams                     0.133333
9           Alpine                     0.125000
5      Kick Sauber                     0.000000


In [57]:
avg_points_df_2025 = get_avg_constructor_points(2025, race_places)
print(avg_points_df_2025)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core         

              Team  AvgConstructorPointsPerRace
0          McLaren                      18.5000
3  Red Bull Racing                       8.3750
1         Mercedes                       8.3125
2          Ferrari                       7.7500
6         Williams                       3.3750
5     Haas F1 Team                       1.6250
7     Racing Bulls                       1.1875
8     Aston Martin                       0.6250
4           Alpine                       0.3750
9      Kick Sauber                       0.3750


In [58]:
avg_points_df_2025['Team'] = avg_points_df_2025['Team'].replace('Racing Bulls', 'RB')
print(avg_points_df_2025)

              Team  AvgConstructorPointsPerRace
0          McLaren                      18.5000
3  Red Bull Racing                       8.3750
1         Mercedes                       8.3125
2          Ferrari                       7.7500
6         Williams                       3.3750
5     Haas F1 Team                       1.6250
7               RB                       1.1875
8     Aston Martin                       0.6250
4           Alpine                       0.3750
9      Kick Sauber                       0.3750


# 2025 data

In [14]:
Bah_2025_df = get_race_data(2025, "Bahrain", "R")
SA_2025_df = get_race_data(2025, "Saudi Arabia", "R")
Aus_2025_df = get_race_data(2025, "Australia", "R")
Jap_2025_df = get_race_data(2025, "Japan", "R")
Chi_2025_df = get_race_data(2025, "China", "R")
Mia_2025_df = get_race_data(2025, "Miami", "R")
Emi_2025_df = get_race_data(2025, "Emilia-Romagna", "R")
Mon_2025_df = get_race_data(2025, "Monaco", "R")


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data
core         

Getting Bahrain's 2025 data


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '16', '44', '1', '10', '31', '22', '87', '12', '23', '6', '7', '14', '30', '18', '5', '55', '27']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['81', '63', '4', '16', '44', '1', '10', '31', '22', '87', '12', '23', '6', '7', '14', '30', '18', '5', '55', '27']
core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Saudi Arabian Grand Prix - Race [v3.5.3]
req            INF

Getting Saudi Arabia's 2025 data


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '1', '16', '4', '63', '12', '44', '55', '23', '6', '14', '30', '87', '31', '27', '18', '7', '5', '22', '10']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['81', '1', '16', '4', '63', '12', '44', '55', '23', '6', '14', '30', '87', '31', '27', '18', '7', '5', '22', '10']
core           INFO 	Loading data for Australian Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Australian Grand Prix - Race [v3.5.3]
req            INFO 	Usi

Getting Australia's 2025 data


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '63', '12', '23', '18', '27', '16', '81', '44', '10', '22', '31', '87', '30', '5', '14', '55', '7', '6']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['4', '1', '63', '12', '23', '18', '27', '16', '81', '44', '10', '22', '31', '87', '30', '5', '14', '55', '7', '6']
core           INFO 	Loading data for Japanese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Japanese Grand Prix - Race [v3.5.3]
req            INFO 	Using c

Getting Japan's 2025 data


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '16', '63', '12', '44', '6', '23', '87', '14', '22', '10', '55', '7', '27', '30', '31', '5', '18']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['1', '4', '81', '16', '63', '12', '44', '6', '23', '87', '14', '22', '10', '55', '7', '27', '30', '31', '5', '18']
core           INFO 	Loading data for Chinese Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Chinese Grand Prix - Race [v3.5.3]
req            INFO 	Using cac

Getting China's 2025 data


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '63', '1', '31', '12', '23', '87', '18', '55', '6', '30', '7', '5', '27', '22', '14', '16', '44', '10']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['81', '4', '63', '1', '31', '12', '23', '87', '18', '55', '6', '30', '7', '5', '27', '22', '14', '16', '44', '10']
core           INFO 	Loading data for Miami Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Miami Grand Prix - Race [v3.5.3]
req            INFO 	Using cached 

Getting Miami's 2025 data


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '4', '63', '1', '23', '12', '16', '44', '55', '22', '6', '31', '10', '27', '14', '18', '30', '5', '87', '7']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['81', '4', '63', '1', '23', '12', '16', '44', '55', '22', '6', '31', '10', '27', '14', '18', '30', '5', '87', '7']
core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
req            I

Getting Emilia-Romagna's 2025 data


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '81', '44', '23', '16', '63', '55', '6', '22', '14', '27', '10', '30', '18', '43', '87', '5', '12', '31']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['1', '4', '81', '44', '23', '16', '63', '55', '6', '22', '14', '27', '10', '30', '18', '43', '87', '5', '12', '31']
core           INFO 	Loading data for Monaco Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Monaco Grand Prix - Race [v3.5.3]
req            INFO 	Using cac

Getting Monaco's 2025 data


req            INFO 	Using cached data for car_data
INFO:fastf1.fastf1.req:Using cached data for car_data
req            INFO 	Using cached data for position_data
INFO:fastf1.fastf1.req:Using cached data for position_data
req            INFO 	Using cached data for weather_data
INFO:fastf1.fastf1.req:Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
INFO:fastf1.fastf1.req:Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '16', '81', '1', '44', '6', '31', '30', '23', '55', '63', '87', '43', '5', '18', '27', '22', '12', '14', '10']
INFO:fastf1.fastf1.core:Finished loading data for 20 drivers: ['4', '16', '81', '1', '44', '6', '31', '30', '23', '55', '63', '87', '43', '5', '18', '27', '22', '12', '14', '10']


In [59]:
merged_data_new = Bah_2025_df.merge(SA_2025_df, on= ['Driver','Team'],how='outer').merge(Aus_2025_df, on= ['Driver','Team'],how='outer')\
.merge(Jap_2025_df, on= ['Driver' ,'Team'],how='outer').merge(Chi_2025_df, on= ['Driver','Team'],how='outer').merge(Mia_2025_df, on= ['Driver','Team'],how='outer').merge(Emi_2025_df,on=['Driver','Team'],how='outer').merge(Mon_2025_df,on=['Driver','Team'],how='outer')
merged_data_new

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna,LapTime_(s)_Monaco
0,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895,87.478476,79.548842
1,ALO,Aston Martin,100.858357,96.135208,98.517379,94.043698,111.3935,95.581536,87.626175,80.752833
2,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333,83.857818,80.56336
3,BEA,Haas F1 Team,101.526825,96.305375,103.870075,93.990792,98.505875,95.954556,87.73027,80.112316
4,BOR,Kick Sauber,100.971571,97.150894,102.655634,94.544906,99.377855,98.1153,87.805889,80.292987
5,COL,Alpine,,,,,,,87.723968,80.144171
6,DOO,Alpine,101.618263,97.136213,,94.49617,98.811196,,,
7,GAS,Alpine,101.323456,,104.889167,94.134057,98.611089,95.123368,87.692429,93.944143
8,HAD,Racing Bulls,101.679807,96.001021,,93.662585,98.819661,94.845421,87.568016,79.206818
9,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509,87.421508,78.015769


In [60]:
if pd.isna(merged_data_new.at[11, 'LapTime_(s)_Australia']):
    merged_data_new.at[11, 'LapTime_(s)_Australia'] = merged_data_new.at[20, 'LapTime_(s)_Australia']

if pd.isna(merged_data_new.at[11, 'LapTime_(s)_China']):
    merged_data_new.at[11, 'LapTime_(s)_China'] = merged_data_new.at[20, 'LapTime_(s)_China']

if pd.isna(merged_data_new.at[21, 'LapTime_(s)_Australia']):
    merged_data_new.at[21, 'LapTime_(s)_Australia'] = merged_data_new.at[12, 'LapTime_(s)_Australia']

if pd.isna(merged_data_new.at[21, 'LapTime_(s)_China']):
    merged_data_new.at[21, 'LapTime_(s)_China'] = merged_data_new.at[12, 'LapTime_(s)_China']

if pd.isna(merged_data_new.at[6, 'LapTime_(s)_Emilia-Romagna']):
    merged_data_new.at[6, 'LapTime_(s)_Emilia-Romagna'] = merged_data_new.at[5, 'LapTime_(s)_Emilia-Romagna']

merged_data_new = merged_data_new.drop([12,20,5]).reset_index(drop=True)
merged_data_new['Team'] = merged_data_new['Team'].replace('Racing Bulls', 'RB')
merged_data_new

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna,LapTime_(s)_Monaco
0,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895,87.478476,79.548842
1,ALO,Aston Martin,100.858357,96.135208,98.517379,94.043698,111.3935,95.581536,87.626175,80.752833
2,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333,83.857818,80.56336
3,BEA,Haas F1 Team,101.526825,96.305375,103.870075,93.990792,98.505875,95.954556,87.73027,80.112316
4,BOR,Kick Sauber,100.971571,97.150894,102.655634,94.544906,99.377855,98.1153,87.805889,80.292987
5,DOO,Alpine,101.618263,97.136213,,94.49617,98.811196,,87.723968,
6,GAS,Alpine,101.323456,,104.889167,94.134057,98.611089,95.123368,87.692429,93.944143
7,HAD,RB,101.679807,96.001021,,93.662585,98.819661,94.845421,87.568016,79.206818
8,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509,87.421508,78.015769
9,HUL,Kick Sauber,101.629947,96.809021,104.740167,94.508302,99.550036,95.227536,87.674524,80.334039


# New Feature - avg constructor points per team per race

In [61]:
merged_data_new = merged_data_new.merge(
    avg_points_df_2025,
    on="Team",
    how="left"
)
merged_data_new

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna,LapTime_(s)_Monaco,AvgConstructorPointsPerRace
0,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895,87.478476,79.548842,3.375
1,ALO,Aston Martin,100.858357,96.135208,98.517379,94.043698,111.3935,95.581536,87.626175,80.752833,0.625
2,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333,83.857818,80.56336,8.3125
3,BEA,Haas F1 Team,101.526825,96.305375,103.870075,93.990792,98.505875,95.954556,87.73027,80.112316,1.625
4,BOR,Kick Sauber,100.971571,97.150894,102.655634,94.544906,99.377855,98.1153,87.805889,80.292987,0.375
5,DOO,Alpine,101.618263,97.136213,,94.49617,98.811196,,87.723968,,0.375
6,GAS,Alpine,101.323456,,104.889167,94.134057,98.611089,95.123368,87.692429,93.944143,0.375
7,HAD,RB,101.679807,96.001021,,93.662585,98.819661,94.845421,87.568016,79.206818,1.1875
8,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509,87.421508,78.015769,7.75
9,HUL,Kick Sauber,101.629947,96.809021,104.740167,94.508302,99.550036,95.227536,87.674524,80.334039,0.375


In [62]:
merged_data_2024_m = merged_data_2024_m.merge(
    avg_points_df,
    on="Team",
    how="left"
)
merged_data_2024_m

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna,LapTime_(s)_Monaco,LapTime_(s)_Spain,AvgConstructorPointsPerRace
0,ALB,Williams,98.511214,96.972327,84.769333,,109.251071,96.946789,84.49362,112.332545,82.436554,0.133333
1,ALO,Aston Martin,97.888228,94.55775,84.617845,98.639314,107.299648,96.26286,83.877032,113.150105,81.688062,2.75
2,BOT,Kick Sauber,98.503745,97.688667,85.424018,101.448647,104.132632,96.530386,83.574113,113.158447,82.362154,0.0
3,GAS,Alpine,98.877839,121.453,85.312211,102.096196,109.317446,96.400596,83.46721,112.412922,81.246242,0.125
4,HAM,Mercedes,97.457298,94.955042,85.2284,98.922863,109.116786,95.902825,81.910413,109.83961,80.576015,5.5
5,HUL,Haas F1 Team,98.613821,95.764625,85.024069,100.18244,108.411909,96.327456,83.073048,,81.430939,0.3125
6,LEC,Ferrari,97.270368,94.101208,83.262224,98.537588,106.020943,94.780893,81.478857,109.32074,80.650545,14.5
7,MAG,Haas F1 Team,98.447464,96.91802,84.753053,101.488706,108.536345,96.571211,83.09171,,82.384846,0.3125
8,NOR,McLaren,97.424561,94.858021,83.323224,98.261392,107.444545,94.522071,81.364714,109.565429,80.340091,11.0
9,OCO,Alpine,98.860571,97.310542,85.628456,101.825667,109.202964,96.309158,83.373532,,81.395697,0.125


# Training

In [63]:
X_train = pd.get_dummies(
    merged_data_2024_m.drop(['Driver', 'LapTime_(s)_Spain'], axis=1).fillna(140),
    dtype=int
).copy()

y_train = merged_data_2024_m[['LapTime_(s)_Spain']].fillna(140).values

X_final = pd.get_dummies(
    merged_data_new.drop(['Driver'], axis=1).fillna(140),
    dtype=int
).copy().values

# XgBoost

In [64]:
# Initialize XGBoost Regressor
xg_reg = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)

# Define parameter grid for tuning
param_grid = {
    'n_estimators': [100, 300, 500],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1],
    'colsample_bytree': [0.8, 1]
}

# Set up GridSearchCV
grid_search = GridSearchCV(
    estimator=xg_reg,
    param_grid=param_grid,
    scoring='neg_mean_squared_error',
    cv=5,
    n_jobs=-1,
    verbose=2
)

# Fit GridSearchCV
grid_search.fit(X_train, y_train)

# Best parameters and model
print("Best parameters found: ", grid_search.best_params_)
best_model = grid_search.best_estimator_

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best parameters found:  {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 500, 'subsample': 0.8}


In [65]:
y_pred_rf_grid = best_model.predict(X_final)

In [66]:
merged_data_new["PredictedRaceTime (s) Spain"] = y_pred_rf_grid

In [67]:
merged_data_new.sort_values(by='PredictedRaceTime (s) Spain')

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna,LapTime_(s)_Monaco,AvgConstructorPointsPerRace,PredictedRaceTime (s) Spain
6,GAS,Alpine,101.323456,,104.889167,94.134057,98.611089,95.123368,87.692429,93.944143,0.375,81.764053
8,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509,87.421508,78.015769,7.75,81.8302
15,RUS,Mercedes,100.023418,94.932833,103.68634,93.289528,97.609339,94.197035,87.543381,79.837987,8.3125,81.830597
2,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333,83.857818,80.56336,8.3125,81.836746
14,PIA,McLaren,99.524375,94.322646,102.084462,93.002113,97.411179,93.536614,87.399286,77.403859,18.5,81.856384
19,VER,Red Bull Racing,101.295263,94.357229,103.341151,92.961943,97.708607,94.237596,87.193635,77.620705,8.375,81.856384
12,NOR,McLaren,99.984,94.714229,103.428302,92.988792,97.58525,93.617842,87.290603,77.356962,18.5,81.856537
11,LEC,Ferrari,100.018643,94.547104,103.933528,93.26566,97.825661,94.537246,87.523381,77.397103,7.75,81.856537
5,DOO,Alpine,101.618263,97.136213,,94.49617,98.811196,,87.723968,,0.375,81.917175
0,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895,87.478476,79.548842,3.375,81.949326


# Random Forest

In [68]:
# Initialize the model
rf = RandomForestRegressor(random_state=42)

# Define hyperparameter grid
param_grid = {
    'n_estimators': [500, 1000, 1500],
    'max_depth': [10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [2,3],
    'max_features': ['auto', 'sqrt']
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,                      # 5-fold cross-validation
    scoring='neg_mean_squared_error',
    n_jobs=-1,                 # Use all cores
    verbose=2
)

# Fit to training data
grid_search.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid_search.best_params_)

# Use the best model
best_rf = grid_search.best_estimator_

Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Parameters: {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 500}


In [69]:
Y_pred = best_rf.predict(X_final)

In [70]:
merged_data_new["PredictedRaceTime (s) Spain"] = Y_pred

df_sorted = merged_data_new.sort_values(by='PredictedRaceTime (s) Spain')

In [71]:
df_sorted_final=df_sorted.reset_index(drop=True)

In [72]:
df_sorted_final

Unnamed: 0,Driver,Team,LapTime_(s)_Bahrain,LapTime_(s)_Saudi Arabia,LapTime_(s)_Australia,LapTime_(s)_Japan,LapTime_(s)_China,LapTime_(s)_Miami,LapTime_(s)_Emilia-Romagna,LapTime_(s)_Monaco,AvgConstructorPointsPerRace,PredictedRaceTime (s) Spain
0,NOR,McLaren,99.984,94.714229,103.428302,92.988792,97.58525,93.617842,87.290603,77.356962,18.5,81.218614
1,ANT,Mercedes,100.647446,95.124646,104.57937,93.314226,98.370964,94.510333,83.857818,80.56336,8.3125,81.221015
2,RUS,Mercedes,100.023418,94.932833,103.68634,93.289528,97.609339,94.197035,87.543381,79.837987,8.3125,81.221015
3,PIA,McLaren,99.524375,94.322646,102.084462,93.002113,97.411179,93.536614,87.399286,77.403859,18.5,81.224849
4,HAM,Ferrari,100.272036,95.272271,103.967189,93.512547,97.864411,94.592509,87.421508,78.015769,7.75,81.233499
5,VER,Red Bull Racing,101.295263,94.357229,103.341151,92.961943,97.708607,94.237596,87.193635,77.620705,8.375,81.238548
6,LEC,Ferrari,100.018643,94.547104,103.933528,93.26566,97.825661,94.537246,87.523381,77.397103,7.75,81.23885
7,TSU,Red Bull Racing,100.589036,165.662,101.752524,94.063849,98.860232,94.754754,87.613413,80.338053,8.375,81.39266
8,ALB,Williams,100.663357,95.926667,104.672389,93.723585,98.416911,94.379895,87.478476,79.548842,3.375,81.46138
9,SAI,Williams,102.382955,95.821917,,94.360604,98.775232,94.599368,87.557095,79.593092,3.375,81.46138


# Constructor Points

In [73]:
cp = calculate_constructor_points(df_sorted_final)

Carlos's position: 10, points: 1
Alex's position: 9, points: 2
Total constructor points: 3
