# F1 Elo Prototyping

Notebook for exploring ideas before binning or porting to scripts.

## Environment setup

In [3]:
import yaml

import pandas as pd
import plotly.express as px

In [2]:
# Move project root folder
%cd ..

/Users/mwtmurphy/projects/personal/f1-elo


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


## Data importing

In [6]:
with open("params.yaml") as conf_file:
    CONFIG = yaml.safe_load(conf_file)

mod_df = pd.read_csv(CONFIG["data"]["modelled_path"])
drivers_df = pd.read_csv(CONFIG["data"]["drivers_csv"])[["driverId", "forename", "surname"]]

## Data cleaning

In [7]:
drivers_df["driverName"] = drivers_df[["forename", "surname"]].apply(lambda row: " ".join(row), axis=1)
vis_df = mod_df.merge(drivers_df[["driverId", "driverName"]], on=["driverId"], how="left")
vis_df.head()

Unnamed: 0,year,round,date,constructorId,driverId,mapPosition,mapPoints,elo_score,driverName
0,1950,1,1950-05-13,51,642,1,25.0,1505.0,Nino Farina
1,1950,1,1950-05-13,51,786,2,18.0,1501.666667,Luigi Fagioli
2,1950,1,1950-05-13,51,686,3,15.0,1498.333333,Reg Parnell
3,1950,1,1950-05-13,154,704,4,12.0,1505.0,Yves Cabantous
4,1950,1,1950-05-13,154,627,5,10.0,1502.5,Louis Rosier


## Data exploration

In [8]:
min_races = 2 * vis_df.groupby("year")["round"].nunique().sort_values().iloc[0] # 2x shortest season

gott_df = vis_df.sort_values(["year", "round", "elo_score"], ascending=[True, True, False]).drop_duplicates(["year", "round"])
gott_days = gott_df["driverId"].value_counts()
gott_drivers = set(gott_days[gott_days > min_races].index)
gott_df = vis_df[vis_df["driverId"].isin(gott_drivers)].sort_values(["year", "round"])

px.line(gott_df, x="date", y="elo_score", color="driverName")

Unnamed: 0,year,round,date,constructorId,driverId,mapPosition,mapPoints,elo_score,driverName
12,1950,1,1950-05-13,51,579,12,0.0,1495.0,Juan Fangio
23,1950,2,1950-05-21,51,579,1,25.0,1500.246281,Juan Fangio
24,1950,2,1950-05-21,6,647,2,18.0,1505.0,Alberto Ascari
90,1950,4,1950-06-04,51,579,12,0.0,1495.259996,Juan Fangio
92,1950,4,1950-06-04,6,647,14,0.0,1504.852155,Alberto Ascari


In [12]:
count_df = gott_days.reset_index()
count_df = count_df.merge(drivers_df[["driverId", "driverName"]], on=["driverId"], how="left")
count_df = count_df[["driverName", "count"]].rename(columns={
    "driverName": "Driver name",
    "count": "Races as top ranked"
}).head(10)
count_df.index += 1

count_df

Unnamed: 0,Driver name,Races as top ranked
1,Fernando Alonso,203
2,Michael Schumacher,174
3,Ayrton Senna,96
4,Max Verstappen,60
5,Nelson Piquet,59
6,Ronnie Peterson,53
7,Bruce McLaren,50
8,Lewis Hamilton,41
9,Alain Prost,39
10,Juan Fangio,28


In [6]:

drivers_23 = set(vis_df.loc[vis_df["year"] == 2023, "driverId"])
df_23 = vis_df[vis_df["driverId"].isin(drivers_23)].sort_values(["year", "round"])

px.line(df_23, x="date", y="elo_score", color="driverName")

In [7]:
now_df = df_23[df_23["year"] == max(df_23["year"])]
now_df = now_df[now_df["round"] == max(now_df["round"])].sort_values("elo_score", ascending=False)
now_df[["date", "driverName", "elo_score"]]

Unnamed: 0,date,driverName,elo_score
26220,2023-11-05,Max Verstappen,1640.071679
26227,2023-11-05,Lewis Hamilton,1601.205336
26221,2023-11-05,Lando Norris,1582.604205
26222,2023-11-05,Fernando Alonso,1566.355462
26237,2023-11-05,Alexander Albon,1562.633778
26235,2023-11-05,George Russell,1552.340766
26234,2023-11-05,Charles Leclerc,1542.681518
26226,2023-11-05,Pierre Gasly,1533.860854
26225,2023-11-05,Carlos Sainz,1530.525805
26229,2023-11-05,Esteban Ocon,1525.929999
