# F1 Elo Prototyping

Notebook for exploring ideas before binning or porting to scripts.

## Environment setup

In [1]:
import yaml

import pandas as pd
import plotly.express as px

In [2]:
# Move project root folder
%cd ..

/Users/mitchell/projects/personal/f1-rating-system


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


## Data importing

In [3]:
with open("params.yaml") as conf_file:
    CONFIG = yaml.safe_load(conf_file)

mod_df = pd.read_csv(CONFIG["data"]["modelled_path"])
constructors_df = pd.read_csv(CONFIG["data"]["constructors_csv"])[["constructorId", "name"]].rename(columns={"name": "constructorName"})
drivers_df = pd.read_csv(CONFIG["data"]["drivers_csv"])[["driverId", "driverRef"]]

mod_df = pd.read_csv(CONFIG["data"]["modelled_path"])

# replace driver ID with driver name for visualisation
drivers_df = pd.read_csv(CONFIG["data"]["drivers_csv"])[["driverId", "forename", "surname"]]
drivers_df["driverName"] = drivers_df[["forename", "surname"]].apply(lambda row: " ".join(row), axis=1)
vis_df = mod_df.merge(drivers_df[["driverId", "driverName"]], on=["driverId"], how="left")

con_df = pd.read_csv(CONFIG["data"]["constructors_csv"])[["constructorId", "name"]]
con_df = con_df.rename(columns={"name": "constructorName"})
vis_df = vis_df.merge(con_df, on="constructorId", how="left")

df24 = pd.read_csv(CONFIG["data"]["2024_path"])

In [111]:
# Capturing constructor periods to begin tracking buyouts and rebrands to integrate into app
import numpy as np

results_df = pd.read_csv(CONFIG["data"]["results_csv"])
races_df = pd.read_csv(CONFIG["data"]["races_csv"])
con_df = pd.read_csv(CONFIG["data"]["constructors_csv"])[["constructorId", "name"]].rename(columns={"name": "constructorName"})

period_df = results_df[["raceId", "constructorId"]].drop_duplicates().reset_index(drop=True)
period_df = period_df.merge(races_df[["raceId", "year"]], on="raceId", how="left")
period_df = period_df.merge(con_df[["constructorId", "constructorName"]], on="constructorId", how="left")

con_yr_df = period_df[["constructorId", "constructorName", "year"]].drop_duplicates()
con_yr_df["n_years"] = con_yr_df.groupby("constructorId")["year"].transform("count")
con_yr_df = con_yr_df.sort_values(["constructorId", "n_years", "year"]).reset_index(drop=True)

con_yr_df.to_csv("constructor_year.csv", index=False)

In [17]:
df24.sort_values(["round", "driverId"]).loc[df24["constructorName"] == "Ferrari", ["round", "driverName", "startConScore", "constructorScore", "status", "conScoreChange"]]

Unnamed: 0,round,driverName,startConScore,constructorScore,status,conScoreChange
2,1,Carlos Sainz,2442.497103,2479.971229,finished,37.474126
3,1,Charles Leclerc,2479.971229,2479.971229,finished,0.0
22,2,Charles Leclerc,2479.971229,2507.323572,finished,27.352343
26,2,Oliver Bearman,2507.323572,2507.323572,finished,0.0
40,3,Carlos Sainz,2507.323572,2562.080425,finished,54.756853
41,3,Charles Leclerc,2562.080425,2562.080425,finished,0.0
61,4,Carlos Sainz,2562.080425,2591.320449,finished,29.240023
62,4,Charles Leclerc,2591.320449,2591.320449,finished,0.0
83,5,Carlos Sainz,2607.622979,2607.622979,finished,0.0
82,5,Charles Leclerc,2591.320449,2607.622979,finished,16.30253


In [8]:
df24[["constructorId", "constructorName", "conScoreChange"]].drop_duplicates().groupby(["constructorId", "constructorName"])["conScoreChange"].sum().reset_index()

Unnamed: 0,constructorId,constructorName,conScoreChange
0,1,McLaren,378.834375
1,3,Williams,-62.961433
2,6,Ferrari,1169.57411
3,9,Red Bull,-122.2045
4,15,Sauber,473.670866
5,117,Aston Martin,-108.717601
6,131,Mercedes,-13.642658
7,210,Haas F1 Team,347.348579
8,214,Alpine F1 Team,-224.715036
9,215,RB F1 Team,125.309234


In [40]:
vis_df["outperformance"] = vis_df["actual"] - vis_df["expected"]
out_df = vis_df.groupby("driverId")["outperformance"].mean().reset_index()
count_df = vis_df.groupby("driverId").size().reset_index().rename(columns={0: "races"})
max_df = vis_df.groupby("driverId")["driverScore"].max().reset_index().rename(columns={"driverScore": "maxScore"})

agg_df = vis_df.groupby(["driverId", "driverName"])["driverScore"].mean().reset_index().rename(columns={"driverScore": "meanScore"})
agg_df = agg_df.merge(out_df, on="driverId", how="left").merge(count_df, on="driverId", how="left").merge(max_df, on="driverId", how="left")
agg_df = agg_df[["driverId", "driverName", "races", "meanScore", "outperformance", "maxScore"]]

goat_df = agg_df.sort_values("meanScore", ascending=False).head(20).reset_index(drop=True)
goat_hist_df = vis_df[vis_df["driverId"].isin(goat_df["driverId"])]

goat_hist_df.head()

Unnamed: 0,year,round,date,constructorId,driverId,mapPosition,mapPoints,status,constructorScore,driverScore,expected,actual,driverName,constructorName,outperformance
12,1950,1,1950-05-13,51,579,12,0.0,constructor retirement,1523.560845,1500.0,0.0,0,Juan Fangio,Alfa Romeo,0.0
23,1950,2,1950-05-21,51,579,1,25.0,finished,1552.59561,1529.487883,9.021285,17,Juan Fangio,Alfa Romeo,7.978715
90,1950,4,1950-06-04,51,579,12,0.0,constructor retirement,1562.455044,1529.487883,0.0,0,Juan Fangio,Alfa Romeo,0.0
97,1950,5,1950-06-18,51,579,1,25.0,finished,1582.869833,1554.526005,5.413386,9,Juan Fangio,Alfa Romeo,3.586614
111,1950,6,1950-07-02,51,579,1,25.0,finished,1595.08494,1575.674296,5.30719,8,Juan Fangio,Alfa Romeo,2.69281


In [32]:

vis_df["outperformance"] = vis_df["actual"] - vis_df["expected"]
vis_df.groupby(["driverId", "driverName"])["outperformance"].mean().reset_index().sort_values("outperformance", ascending=False).reset_index(drop=True).head(20)

Unnamed: 0,driverId,driverName,outperformance
0,591,George Amick,10.775499
1,674,Larry Crockett,9.045969
2,794,Joie Chitwood,8.000764
3,743,Eric Thompson,6.669871
4,701,Bill Holland,5.750382
5,766,Lee Wallard,5.709228
6,457,Ben Pon,5.683761
7,673,Mike Nazaruk,5.668481
8,770,Cecil Green,4.750382
9,656,Art Cross,4.733178


In [4]:
dri_df = pd.read_csv(CONFIG["data"]["2024_path"])

In [26]:
# constructor colours
con_df = pd.DataFrame({
    "constructorId": [9, 1, 6, 131, 117, 214, 210, 215, 3, 15],
    "colour": ['#3671C6', '#FF8100', '#E80020', '#25F4D2', '#219971', '#0293CC', '#B6BABD', '#6692FF', '#63C4FF', '#51E251']
})

import json

con_dict = {id: col for _, (id, col) in con_df.iterrows()}


json.dumps(con_dict)

'{"9": "#3671C6", "1": "#FF8100", "6": "#E80020", "131": "#25F4D2", "117": "#219971", "214": "#0293CC", "210": "#B6BABD", "215": "#6692FF", "3": "#63C4FF", "15": "#51E251"}'

## Data visualisation

In [21]:
con_df = mod_df.groupby(["year", "date", "constructorId"])["constructorScore"].max().reset_index()
con_df = con_df.merge(constructors_df, on="constructorId", how="left")
cur_con = set(con_df.loc[con_df["year"] == 2024, "constructorId"])
cur_df = con_df[con_df["constructorId"].isin(cur_con) & (con_df["year"] > 2010)]

px.line(cur_df, x="date", y="constructorScore", color="constructorName")

In [22]:
cur_df.loc[(cur_df["date"] == cur_df["date"].max()), ["constructorName", "constructorScore"]].sort_values("constructorScore", ascending=False).drop_duplicates()

Unnamed: 0,constructorName,constructorScore
12871,Red Bull,1796.971819
12870,Ferrari,1726.404103
12874,Mercedes,1701.023825
12868,McLaren,1685.843528
12873,Aston Martin,1572.046843
12872,Sauber,1548.079025
12877,RB F1 Team,1518.2449
12876,Alpine F1 Team,1490.258123
12875,Haas F1 Team,1485.018449
12869,Williams,1458.211512


In [24]:
dri_df = mod_df.groupby(["year", "date", "driverId"])["driverScore"].max().reset_index()
dri_df = dri_df.merge(drivers_df, on="driverId", how="left")
cur_dri = set(dri_df.loc[dri_df["year"] == 2024, "driverId"])
cur_dri_df = dri_df[dri_df["driverId"].isin(cur_dri) & (dri_df["year"] > 2010)]

px.line(cur_dri_df, x="date", y="driverScore", color="driverRef")

In [25]:
cur_dri_df.loc[(cur_dri_df["date"] == cur_dri_df["date"].max()), ["driverRef", "driverScore"]].sort_values("driverScore", ascending=False).drop_duplicates()

Unnamed: 0,driverRef,driverScore
26355,max_verstappen,1961.950539
26361,norris,1815.50935
26349,alonso,1751.968031
26360,leclerc,1739.437965
26348,hamilton,1688.052404
26356,sainz,1662.909169
26366,piastri,1641.439192
26357,ocon,1630.923083
26362,russell,1616.181912
26359,gasly,1607.87785


# Modelling optimisation

In [None]:
# runtime improvement per model run: 32.4s -> 2.89s