In [1]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

fastf1.Cache.enable_cache("f1_cache")


In [2]:
gp_rounds = {
    "Bahrain": 1,
    "Saudi Arabia": 2,
    "Australia": 3,
    "Japan": 4,
    "China": 5,
    "Miami": 6,
    "Imola": 7,
    "Monaco": 8,
    "Canada": 9,
    "Spain": 10
    # Add more as needed
}


In [3]:
def load_single_gp_laps(gp_name, year=2024):
    round_number = gp_rounds.get(gp_name)
    if round_number is None:
        return pd.DataFrame(columns=["Driver", "LapTime (s)", "Compound"])
    
    try:
        session = fastf1.get_session(year, round_number, "R")
        session.load()
        laps = session.laps[["Driver", "LapTime", "Compound"]].dropna()
        laps["LapTime (s)"] = laps["LapTime"].dt.total_seconds()
        return laps[["Driver", "LapTime (s)", "Compound"]]
    except:
        return pd.DataFrame(columns=["Driver", "LapTime (s)", "Compound"])


In [4]:
def get_qualifying_data(year, gp_name):
    round_number = gp_rounds.get(gp_name)
    if round_number is None:
        return pd.DataFrame()

    try:
        session = fastf1.get_session(year, round_number, "Q")
        session.load()
        laps = session.laps.pick_quicklaps().dropna(subset=["LapTime", "Compound"])

        df = laps.groupby("DriverNumber").apply(
            lambda x: x.sort_values("LapTime").iloc[0]
        ).reset_index(drop=True)

        df["QualifyingTime (s)"] = df["LapTime"].dt.total_seconds()

        return df[["Driver", "DriverNumber", "Compound", "QualifyingTime (s)"]].rename(
            columns={"DriverNumber": "DriverCode"}
        )
    except:
        return pd.DataFrame()


In [5]:
def predict_gp_leaderboard(gp_name, qualifying_2025_df):
    race_laps = load_single_gp_laps(gp_name, year=2024)
    if race_laps.empty or qualifying_2025_df.empty:
        return pd.DataFrame()

    session = fastf1.get_session(2024, gp_rounds[gp_name], "R")
    session.load()

    # Normalize compound casing
    qualifying_2025_df["Compound"] = qualifying_2025_df["Compound"].str.capitalize()
    race_laps["Compound"] = race_laps["Compound"].str.capitalize()

    # Get driver number mapping from the loaded session
    driver_map = session.results[["DriverNumber", "Abbreviation"]].rename(
        columns={"DriverNumber": "DriverCode", "Abbreviation": "Driver"}
    )

    # Merge to get DriverCode into race laps
    race_laps = race_laps.merge(driver_map, on="Driver", how="left")

    # Group average lap times by driver + compound
    avg_laps = race_laps.groupby(["DriverCode", "Compound"])["LapTime (s)"].mean().reset_index()

    # Merge with qualifying data using DriverCode and Compound
    merged_data = qualifying_2025_df.merge(
        avg_laps,
        on=["DriverCode", "Compound"],
        how="inner"
    )

    print("✅ Merged shape:", merged_data.shape)

    if merged_data.empty:
        return pd.DataFrame()

    X = merged_data[["QualifyingTime (s)", "Compound"]]
    y = merged_data["LapTime (s)"]

    preprocessor = ColumnTransformer([
        ("scale", StandardScaler(), ["QualifyingTime (s)"]),
        ("encode", OneHotEncoder(drop="first"), ["Compound"])
    ])

    pipeline = Pipeline([
        ("prep", preprocessor),
        ("model", GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=39))
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=39)
    pipeline.fit(X_train, y_train)

    preds = pipeline.predict(qualifying_2025_df[["QualifyingTime (s)", "Compound"]])
    qualifying_2025_df["Predicted Race Time"] = preds
    qualifying_2025_df = qualifying_2025_df.sort_values(by="Predicted Race Time").reset_index(drop=True)

    # Return top 10 without Starting Grid column
    return qualifying_2025_df.rename(columns={
        "QualifyingTime (s)": "Qualifying Time"
    })[[
        "Driver", "Compound", "Qualifying Time", "Predicted Race Time"
    ]].head(10)



In [6]:
qualifying_2025_df = get_qualifying_data(2025, "Bahrain")  # Or 2024 if needed
leaderboard = predict_gp_leaderboard("Bahrain", qualifying_2025_df)
leaderboard

core           INFO 	Loading data for Australian Grand Prix - Qualifying [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '81', '1', '63', '22', '23', '16', '44', '10', '55', '6', '14', '18', '7', '5', '12', '27', '30', '31', '87']
  df = laps.groupby("DriverNumber").apply(
core           INFO 	Loading data for Bahrain Grand Prix - Race 

✅ Merged shape: (14, 5)


Unnamed: 0,Driver,Compound,Qualifying Time,Predicted Race Time
0,VER,Soft,75.481,96.419373
1,ANT,Soft,76.525,97.918135
2,BOR,Soft,76.516,97.918135
3,HUL,Soft,76.579,97.918135
4,RUS,Soft,75.546,97.991824
5,GAS,Soft,75.98,98.041857
6,SAI,Soft,75.931,98.041857
7,LEC,Soft,75.755,98.326286
8,NOR,Soft,75.096,98.458917
9,PIA,Soft,75.18,98.671338
