In [3]:
import fastf1
import pandas as pd
import numpy as np
import itertools
import joblib
import logging
from collections import defaultdict
from tabulate import tabulate

In [4]:
# Paths
data_path = r"C:\Users\Sran\Desktop\F1Project\data" # Set Path 
model_path = r"C:\Users\Sran\Desktop\F1Project\models" # Set Path 

# Load test features and model
train_features = pd.read_pickle(f"{data_path}/train_features.pkl")
test_features = pd.read_pickle(f"{data_path}/test_features.pkl")


rf_model = joblib.load(f"{model_path}/rf_model.pkl")
gbr_model = joblib.load(f"{model_path}/gbr_model.pkl")

In [5]:
def get_race_parameters(year, gp_name, session_type='R'):
    fastf1.Cache.enable_cache(r"\cache") # Set Path 
    session = fastf1.get_session(year, gp_name, session_type)
    session.load()
    driver_name = {drv: data["FullName"] for drv, data in session.results.iterrows()}
    laps = session.laps
    total_laps = int(laps.LapNumber.max())
    pit_events = laps[laps.PitInTime.notna() & laps.PitOutTime.notna()]
    avg_pit = (
        (pit_events.PitOutTime - pit_events.PitInTime)
        .dt.total_seconds()
        .mean()
        if not pit_events.empty else 23.0
    )
    return total_laps, avg_pit

logging.getLogger('fastf1').setLevel(logging.WARNING)

In [6]:
def load_artifacts():
    #train = pd.read_pickle("data/train_features.pkl")
    train = pd.read_pickle(f"{data_path}/train_features.pkl")
    #model = joblib.load("models/gbr_model.pkl")
    model = joblib.load(f"{model_path}/rf_model.pkl")

    # avg degradation per compound
    avg_deg = (
        train.groupby("Compound").Degradation.sum() /
        train.groupby("Compound").LapsInStint.sum()
    ).to_dict()

    # reconstruct your one-hot columns
    enc = pd.get_dummies(train, columns=["Driver","Compound"], drop_first=True)
    feat_cols     = [c for c in enc.columns if c!="LapTimeSeconds"]
    driver_cols   = [c for c in feat_cols if c.startswith("Driver_")]
    compound_cols = [c for c in feat_cols if c.startswith("Compound_")]

    return avg_deg, feat_cols, driver_cols, compound_cols, model

In [7]:
def simulate_strategy(driver, strategy,
                      total_laps, avg_deg,
                      feat_cols, driver_cols, compound_cols,
                      model, pit_penalty):
    """
    Build one DataFrame for all laps, call .predict once, sum + pit penalties.
    Returns (total_time, pit_laps_list).
    """
    rows = []
    lap_start = 1
    pit_laps = []
    drv_map = {dc: int(dc==f"Driver_{driver}") for dc in driver_cols}

    for stint_idx, (cmpd, length) in enumerate(strategy):
        cmp_map = {cc: int(cc==f"Compound_{cmpd}") for cc in compound_cols}
        if stint_idx>0:
            # pit on the first lap of this stint
            pit_lap = lap_start
            pit_laps.append(pit_lap)

        for i in range(length):
            lap = lap_start + i
            rows.append({
                "LapNumber":   lap,
                "LapsInStint": i+1,
                "Degradation": avg_deg.get(cmpd,0.0)*i,
                "PitEvent":    int(stint_idx>0 and i==0),
                "Flagged":     0,
                "LapPercent":  lap/total_laps,
                **drv_map, **cmp_map
            })
        lap_start += length

    df = pd.DataFrame(rows)[feat_cols]
    preds = model.predict(df)
    total_time = preds.sum() + pit_penalty * len(pit_laps)
    return total_time, pit_laps

In [8]:
def all_strategies_for_driver(driver, pref_cmpd,
                              total_laps, avg_deg,
                              feat_cols, driver_cols, compound_cols,
                              model, pit_penalty):
    """
    Enumerate all 1-stop & 2-stop strategies for this driver,
    returning list of (time, pit_laps, strat_tuple).
    """
    MAX_LIFE = {"SOFT":30,"MEDIUM":40,"HARD":50}
    strategies = []
    comps = list(MAX_LIFE)

    for stops in (1,2):
        stints = stops + 1
        for combo in itertools.permutations(comps, stints):
            if combo[0] != pref_cmpd:
                continue
            if stops == 1:
                c1,c2 = combo
                for l1 in range(1, min(MAX_LIFE[c1], total_laps-1)+1):
                    l2 = total_laps - l1
                    if l2 <= MAX_LIFE[c2]:
                        strat = [(c1,l1),(c2,l2)]
                        t, pits = simulate_strategy(
                            driver, strat, total_laps,
                            avg_deg, feat_cols, driver_cols, compound_cols,
                            model, pit_penalty
                        )
                        strategies.append((t, pits, strat))
            else:
                c1,c2,c3 = combo
                for l1 in range(1, min(MAX_LIFE[c1], total_laps-2)+1):
                    for l2 in range(1, min(MAX_LIFE[c2], total_laps-l1-1)+1):
                        l3 = total_laps - l1 - l2
                        if l3 <= MAX_LIFE[c3]:
                            strat = [(c1,l1),(c2,l2),(c3,l3)]
                            t, pits = simulate_strategy(
                                driver, strat, total_laps,
                                avg_deg, feat_cols, driver_cols, compound_cols,
                                model, pit_penalty
                            )
                            strategies.append((t, pits, strat))

    # sort by ascending total time
    strategies.sort(key=lambda x: x[0])
    return strategies

In [9]:
def format_time(sec):
    h, rem = divmod(sec, 3600)
    m, rem = divmod(rem, 60)
    s, ms  = divmod(rem, 1)
    return f"{int(h)}:{int(m):02d}:{int(s):02d}.{int(ms*1000):03d}"

In [10]:

driver_name = { "VER": "Max Verstappen", "HAM": "Lewis Hamilton", "PIA": "Oscar Piastri", "NOR": "Lando Norris", "LEC": "Charles Leclerc",  "RUS": "George Russell",
    "ALO": "Fernando Alonso", "SAI": "Carlos Sainz", "TSU": "Yuki Tsunoda", "ALB": "Alexander Albon", "HUL": "Nico Hülkenberg", "DOO": "Jack Doohan", "COL": "Logan Sargeant",
    "LAW": "Liam Lawson", "ZHO": "Guanyu Zhou", "GAS": "Pierre Gasly", "STR": "Lance Stroll", "BOT": "Valtteri Bottas", "MAG": "Kevin Magnussen"
}

In [11]:
def main():

    year    = 2024
    gp_name = "Abu Dhabi"

    
    #year    = int(input("Year (e.g. 2024): "))
    #gp_name = input("Grand Prix (e.g. Abu Dhabi): ")

    total_laps, avg_pit = get_race_parameters(year, gp_name)
    avg_deg, feat_cols, driver_cols, compound_cols, model = load_artifacts()

    #test = pd.read_pickle("data/test_features.pkl")
    test = pd.read_pickle(f"{data_path}/test_features.pkl")
    drivers = sorted(test.Driver.unique())
    preferred = {d: test[test.Driver==d].Compound.mode()[0] for d in drivers}


    pit_counts = defaultdict(int)

    rows = []
    for drv in drivers:
        cands = all_strategies_for_driver(
            drv, preferred[drv],
            total_laps, avg_deg,
            feat_cols, driver_cols, compound_cols,
            model, avg_pit
        )

        chosen = None
        for t, pits, strat in cands:
            if all(pit_counts[lap] < 5 for lap in pits):
                chosen = (t, pits, strat)
                break
        if chosen is None:
            chosen = cands[0]

        ttime, pit_laps, strat = chosen
        for lap in pit_laps:
            pit_counts[lap] += 1

        # format output
        entry = {
            "Driver": driver_name.get(drv, drv),
            "Total Race Time (s)": ttime
        }
        lap0 = 1
        for i, (cmpd, L) in enumerate(strat, 1):
            lap1, lap2 = lap0, lap0+L-1
            entry[f"Stint {i}"] = f"{lap1}–{lap2}"
            entry[f"Compound {i}"] = cmpd
            lap0 += L
        if len(strat) == 2:
            entry["Stint 3"] = ""
            entry["Compound"] = ""
        rows.append(entry)

    rows = sorted(rows, key=lambda x: x["Total Race Time (s)"])

    leader_time = rows[0]["Total Race Time (s)"]
    
    for row in rows:
        gap_sec = row["Total Race Time (s)"] - leader_time
        row["Gap"] = "Leader" if gap_sec == 0 else "+" + format_time(gap_sec)
        row["Total Race Time"] = format_time(row["Total Race Time (s)"])
        del row["Total Race Time (s)"]

    df_out = pd.DataFrame(rows)
    
    top_5 = df_out = df_out.head(5)
    
    print(f"\nRace Simulation for {gp_name} grand prix - {year}:\n")
    
    print(tabulate(top_5, headers='keys', tablefmt='fancy_grid', showindex=False))

if __name__=="__main__":
    main()


Race Simulation for Abu Dhabi grand prix - 2024:

╒═════════════════╤═══════════╤══════════════╤═══════════╤══════════════╤═══════════╤══════════════╤══════════════╤═══════════════════╕
│ Driver          │ Stint 1   │ Compound 1   │ Stint 2   │ Compound 2   │ Stint 3   │ Compound 3   │ Gap          │ Total Race Time   │
╞═════════════════╪═══════════╪══════════════╪═══════════╪══════════════╪═══════════╪══════════════╪══════════════╪═══════════════════╡
│ Max Verstappen  │ 1–21      │ HARD         │ 22–42     │ SOFT         │ 43–58     │ MEDIUM       │ Leader       │ 1:21:52.484       │
├─────────────────┼───────────┼──────────────┼───────────┼──────────────┼───────────┼──────────────┼──────────────┼───────────────────┤
│ Lewis Hamilton  │ 1–50      │ HARD         │ 51–51     │ MEDIUM       │ 52–58     │ SOFT         │ +0:00:04.529 │ 1:21:57.013       │
├─────────────────┼───────────┼──────────────┼───────────┼──────────────┼───────────┼──────────────┼──────────────┼──────────────────