# F1 Elo Prototyping

Notebook for exploring ideas before binning or porting to scripts.

## Environment setup

In [7]:
import yaml

import pandas as pd
import plotly.express as px

In [8]:
# Move project root folder
%cd ..

/Users/mwtmurphy/projects/f1-elo


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


## Data importing

In [43]:
with open("params.yaml") as conf_file:
    CONFIG = yaml.safe_load(conf_file)

mod_df = pd.read_csv(CONFIG["data"]["modelled_path"])
constructors_df = pd.read_csv(CONFIG["data"]["constructors_csv"])[["constructorId", "name"]].rename(columns={"name": "constructorName"})


## Data visualisation

In [48]:
con_df = mod_df.groupby(["year", "date", "constructorId"])["constructorScore"].max().reset_index()
con_df = con_df.merge(constructors_df, on="constructorId", how="left")
cur_con = set(con_df.loc[con_df["year"] == 2024, "constructorId"])
cur_df = con_df[con_df["constructorId"].isin(cur_con) & (con_df["year"] > 2010)]

px.line(cur_df, x="date", y="constructorScore", color="constructorName")

In [49]:
cur_df.loc[(cur_df["date"] == "2024-03-09"), ["constructorName", "constructorScore"]].sort_values("constructorScore", ascending=False).drop_duplicates()

Unnamed: 0,constructorName,constructorScore
12801,Red Bull,2046.750446
12800,Ferrari,1872.285401
12804,Mercedes,1807.454813
12798,McLaren,1719.29795
12803,Aston Martin,1598.323264
12806,Alpine F1 Team,1571.279525
12802,Sauber,1558.416867
12807,RB F1 Team,1497.453206
12799,Williams,1458.293233
12805,Haas F1 Team,1424.471856


# Modelling optimisation

In [74]:
import itertools
import typing
import yaml

import numpy as np
import pandas as pd

import line_profiler

# Create a profile object
profiler = line_profiler.LineProfiler()


In [82]:
with open("params.yaml") as conf_file:
    CONFIG = yaml.safe_load(conf_file)

MOD_DF = pd.read_csv(CONFIG["data"]["features_path"])
MOD_DF[["constructorScore", "driverScore", "expected", "actual"]] = None
YR_RNDS = MOD_DF[["year", "round"]].drop_duplicates().values
MOD_MAT = MOD_DF.values

DRI_RTG = {dri: CONFIG["model"]["start_score"] for dri in set(MOD_DF["driverId"])}
CON_RTG = {con: CONFIG["model"]["start_score"] for con in set(MOD_DF["constructorId"])}

# dummy subset to test
MOD_MAT = MOD_MAT[MOD_MAT[:, 0] < 1955]

@profiler
def model_data(k: float, c: float, w: float, export: bool = False) -> typing.Union[float, None]:
    '''If export == False, returns negative RMSEE based on params. 
    If export == True, exports modelled data to 'interim' data folder 
    for data reporting.'''

    dri_scores = DRI_RTG.copy()
    con_scores = CON_RTG.copy()
    exp, out = [], []

    for yr, rnd in YR_RNDS:

        valid_ix = (MOD_MAT[:, 0] == yr) & (MOD_MAT[:, 1] == rnd)
        sub_ix = valid_ix & np.isin(MOD_MAT[:, 7], ["finished", "driver retirement"])
        
        rnd_dri_scores = {dri: {"diff": 0, "n": 0, "exp": 0, "act": 0} for dri in MOD_MAT[sub_ix, 4]}
        rnd_con_scores = {con: {"diff": 0, "n": 0, "exp": 0, "act": 0} for con in MOD_MAT[sub_ix, 3]}

        yr_mat = MOD_MAT[sub_ix, 3:6] # year-round matrix: con_id, dri_id, map_position
        for ix_1, ix_2 in itertools.combinations(range(yr_mat.shape[0]), 2):

            dri_a = yr_mat[ix_1, 1]
            con_a = yr_mat[ix_1, 0]
            pos_a = yr_mat[ix_1, 2]
            elo_a = dri_scores[dri_a] + (w * con_scores[con_a])

            dri_b = yr_mat[ix_2, 1]
            con_b = yr_mat[ix_2, 0]
            pos_b = yr_mat[ix_2, 2]
            elo_b = dri_scores[dri_b] + (w * con_scores[con_b])
            
            # continue if drivers in same car
            if pos_a == pos_b:
                continue

            # calculate position influence
            q_a = 10 ** (elo_a / c)
            q_b = 10 ** (elo_b / c)
    
            e_a = q_a / (q_a + q_b)        
            e_b = q_b / (q_a + q_b)

            # score outcome
            if pos_a < pos_b:
                o_a = 1
                o_b = 0
            else:
                o_a = 0
                o_b = 1
                
            # calculate score change and update round scores
            diff_a = k * (o_a - e_a)
            diff_b = k * (o_b - e_b)

            # log driver results and changes
            rnd_dri_scores[dri_a]["exp"] += e_a
            rnd_dri_scores[dri_a]["act"] += o_a
            rnd_dri_scores[dri_a]["diff"] += diff_a
            rnd_dri_scores[dri_a]["n"] += 1
            
            rnd_dri_scores[dri_b]["exp"] += e_a
            rnd_dri_scores[dri_b]["act"] += o_a
            rnd_dri_scores[dri_b]["diff"] += diff_b
            rnd_dri_scores[dri_b]["n"] += 1
            
            # log constructor changes if diff constructors
            if con_a != con_b:
                rnd_con_scores[con_a]["diff"] += diff_a
                rnd_con_scores[con_a]["n"] += 1
                rnd_con_scores[con_b]["diff"] += diff_b
                rnd_con_scores[con_b]["n"] += 1
                
            # store expected and final values for error analysis
            exp += [e_a, e_b]
            out += [o_a, o_b]
        
        # update driver values for finishing drivers and driver-caused retirements
        for dri in rnd_dri_scores.keys():
            if rnd_dri_scores[dri]["n"] != 0: # more than 1 car on grid
                dri_scores[dri] += (rnd_dri_scores[dri]["diff"] / rnd_dri_scores[dri]["n"])
                
        MOD_MAT[valid_ix, 9] = map(lambda el: dri_scores[el], MOD_MAT[valid_ix, 4]) # driver score
        MOD_MAT[valid_ix, 10] = map(lambda el: rnd_dri_scores[el]["exp"], MOD_MAT[valid_ix, 4]) # expected outcome
        MOD_MAT[valid_ix, 11] = map(lambda el: rnd_dri_scores[el]["act"], MOD_MAT[valid_ix, 4]) # actual outcome

        # update constructor values for finishing drivers
        for con in rnd_con_scores.keys():
            if rnd_con_scores[con]["n"] != 0: # more than 1 car on grid
                con_scores[con] += (rnd_con_scores[con]["diff"] / rnd_con_scores[con]["n"])
        
        MOD_MAT[valid_ix, 8] = map(lambda el: con_scores[el], MOD_MAT[valid_ix, 3])

        
    
    if export == False:
        err_df = pd.DataFrame({"pred": exp, "true": out})
        err_df["squared_error"] = (err_df["true"] - err_df["pred"]) ** 2
        neg_rmse = -(pow(err_df["squared_error"].sum() / err_df.shape[0], 0.5))
        
        return neg_rmse
    
    else:
        MOD_DF.to_csv(CONFIG["data"]["modelled_path"], index=False)


In [83]:
MOD_DF.head()

Unnamed: 0,year,round,date,constructorId,driverId,mapPosition,mapPoints,status,constructorScore,driverScore,expected,actual
0,1950,1,1950-05-13,51,642,1,25.0,finished,,,,
1,1950,1,1950-05-13,51,786,2,18.0,finished,,,,
2,1950,1,1950-05-13,51,686,3,15.0,finished,,,,
3,1950,1,1950-05-13,154,704,4,12.0,finished,,,,
4,1950,1,1950-05-13,154,627,5,10.0,finished,,,,


In [84]:
with open(CONFIG["data"]["params_path"], "r") as infile:
        params_log = yaml.safe_load(infile)

model_data(k=params_log["k"], c=params_log["c"], w=params_log["w"], export=True)

profiler.print_stats()


Timer unit: 1e-09 s

Total time: 0.141183 s
File: /var/folders/6b/0lfq4yq96r5b5yk1tb6vsfcw0000gn/T/ipykernel_77464/1639403726.py
Function: model_data at line 15

Line #      Hits         Time  Per Hit   % Time  Line Contents
    15                                           @profiler
    16                                           def model_data(k: float, c: float, w: float, export: bool = False) -> typing.Union[float, None]:
    17                                               '''If export == False, returns negative RMSEE based on params. 
    18                                               If export == True, exports modelled data to 'interim' data folder 
    19                                               for data reporting.'''
    20                                           
    21         1      16000.0  16000.0      0.0      dri_scores = DRI_RTG.copy()
    22         1       5000.0   5000.0      0.0      con_scores = CON_RTG.copy()
    23         1       1000.0   1000.0      0

In [None]:
# run 1: 32.4s
# run 2: 32.2s (easy items moved out of function)
# run 3: s (switch from pandas to numpy matrix)