<a href="https://colab.research.google.com/github/perchedinthedark/formula1_predictions/blob/main/imola.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install fastf1 pandas numpy

Collecting fastf1
  Downloading fastf1-3.5.3-py3-none-any.whl.metadata (4.6 kB)
Collecting rapidfuzz (from fastf1)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting requests-cache>=1.0.0 (from fastf1)
  Downloading requests_cache-1.2.1-py3-none-any.whl.metadata (9.9 kB)
Collecting timple>=0.1.6 (from fastf1)
  Downloading timple-0.1.8-py3-none-any.whl.metadata (2.0 kB)
Collecting websockets<14,>=10.3 (from fastf1)
  Downloading websockets-13.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting cattrs>=22.2 (from requests-cache>=1.0.0->fastf1)
  Downloading cattrs-24.1.3-py3-none-any.whl.metadata (8.4 kB)
Collecting url-normalize>=1.4 (from requests-cache>=1.0.0->fastf1)
  Downloading url_normalize-2.2.1-py3-none-any.whl.metadata (5.6 kB)
Downloading fastf1-3.5.3-py3-none-any.whl (151 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [None]:
import fastf1
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

# Cache directory set-up
cache_dir = "./cache"
os.makedirs(cache_dir, exist_ok=True)
fastf1.Cache.enable_cache(cache_dir)

# Load FastF1 2024 Australian GP race session
session_2024 = fastf1.get_session(2024, "Imola", "R")
session_2024.load()

# Extract lap times
laps_2024 = session_2024.laps[["Driver", "LapTime", "Stint", "LapNumber"]].copy()
laps_2024.dropna(subset=["LapTime"], inplace=True)
laps_2024["LapTime (s)"] = laps_2024["LapTime"].dt.total_seconds()


# 2025 Qualifying Data Imola GP
qualifying_2025 = pd.DataFrame({
    "Driver": ["Oscar Piastri", "Max Verstappen", "George Russell", "Lando Norris", "Fernando Alonso",
               "Carlos Sainz Jr.", "Alexander Albon", "Lance Stroll", "Isack Hadjar", "Pierre Gasly",
               "Charles Leclerc", "Lewis Hamilton", "Andrea Kimi Antonelli", "Gabriel Bortoleto", "Franco Colapinto",
               "Liam Lawson", "Nico Hülkenberg", "Esteban Ocon", "Oliver Bearman", "Yuki Tsunoda"],
    "QualifyingTime (s)": [74.670, 74.704, 74.807, 74.962, 75.431,
                           75.198, 75.473, 75.497, 75.510, 75.505,
                           75.604, 75.765, 75.772, 76.260, 76.256,
                           76.379, 76.518, 76.613, 76.918, 0]
})

# Map full names to FastF1 3-letter codes
driver_mapping = {
    "Oscar Piastri": "PIA", "George Russell": "RUS", "Lando Norris": "NOR", "Max Verstappen": "VER",
    "Lewis Hamilton": "HAM", "Charles Leclerc": "LEC", "Isack Hadjar": "HAD", "Andrea Kimi Antonelli": "ANT",
    "Yuki Tsunoda": "TSU", "Alexander Albon": "ALB", "Esteban Ocon": "OCO", "Nico Hülkenberg": "HUL",
    "Fernando Alonso": "ALO", "Lance Stroll": "STR", "Carlos Sainz Jr.": "SAI", "Pierre Gasly": "GAS",
    "Oliver Bearman": "BEA", "Franco Colapinto": "COL", "Gabriel Bortoleto": "BOR", "Liam Lawson": "LAW"
}

# 2025 Driver Standings (Example values - replace with real standings if available)
driver_standings_2025 = {
    "Oscar Piastri": 1, "Max Verstappen": 3, "George Russell": 4, "Lewis Hamilton": 7, "Lando Norris": 2,
    "Charles Leclerc": 5, "Isack Hadjar": 16, "Andrea Kimi Antonelli": 6, "Yuki Tsunoda": 11,
    "Alexander Albon": 8, "Esteban Ocon": 9, "Nico Hülkenberg": 14, "Fernando Alonso": 17,
    "Lance Stroll": 10, "Carlos Sainz Jr.": 13, "Pierre Gasly": 12, "Oliver Bearman": 15,
    "Franco Colapinto": 20, "Gabriel Bortoleto": 19, "Liam Lawson": 18
}

# Invert the standings (1st → highest value, 20th → lowest)
max_rank = max(driver_standings_2025.values())  # Gets the worst rank (e.g., 20)
driver_standings_2025_inverted = {
    driver: max_rank + 1 - rank
    for driver, rank in driver_standings_2025.items()
}

# Map inverted standings to the DataFrame
qualifying_2025["DriverStandings2025"] = qualifying_2025["Driver"].map(driver_standings_2025_inverted)

# Feature: Tyre degradation (slope of lap times vs. tyre age)
def calc_degradation(stint_laps):
    if len(stint_laps) > 3:
        x = stint_laps["LapNumber"]
        y = stint_laps["LapTime (s)"]
        return np.polyfit(x, y, 1)[0]  # Degradation rate (s/lap)
    return np.nan

degradation = laps_2024.groupby(["Driver", "Stint"]).apply(calc_degradation)
driver_degradation = degradation.groupby("Driver").mean().rename("DegradationRate")

# Optional: Add driver codes if needed
qualifying_2025["DriverCode"] = qualifying_2025["Driver"].map(driver_mapping)

# Merge 2025 Qualifying Data with 2024 Race Data
merged_data = qualifying_2025.merge(laps_2024, left_on="DriverCode", right_on="Driver")

# Merge features
merged_data = merged_data.merge(driver_degradation, left_on="DriverCode", right_on="Driver", how="left")

# Use both Qualifying time and driver standings as features
X = merged_data[["QualifyingTime (s)", "DriverStandings2025", "DegradationRate"]]
y = merged_data["LapTime (s)"]

if X.shape[0] == 0:
    raise ValueError("Dataset is empty after preprocessing. Check data sources!")

# Train Gradient Boosting Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=39)
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=39)
model.fit(X_train, y_train)

# Predict using 2025 qualifying times
predicted_lap_times = model.predict(qualifying_2025[["QualifyingTime (s)", "DriverStandings2025", "DegradationRate"]])
qualifying_2025["PredictedRaceTime (s)"] = predicted_lap_times

# Rank drivers by predicted race time
qualifying_2025 = qualifying_2025.sort_values(by="PredictedRaceTime (s)")

# Print final predictions
print("\n🏁 Predicted 2025 Imola GP Winner with no Change in ML Model🏁\n")
print(qualifying_2025[["Driver", "PredictedRaceTime (s)"]])

# Evaluate Model
y_pred = model.predict(X_test)
print(f"\n🔍 Model Error (MAE): {mean_absolute_error(y_test, y_pred):.2f} seconds")

core           INFO 	Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
INFO:fastf1.fastf1.core:Loading data for Emilia Romagna Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
INFO:fastf1.fastf1.req:Using cached data for session_info
req            INFO 	Using cached data for driver_info
INFO:fastf1.fastf1.req:Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
INFO:fastf1.fastf1.req:Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
INFO:fastf1.fastf1.req:Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
INFO:fastf1.fastf1.req:Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
INFO:fastf1.fastf1.req:Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
INFO:fastf1.fastf1.req:Using cached data for timing_app_data

KeyError: "['DegradationRate'] not in index"