# F1 Elo Prototyping

Notebook for exploring ideas before binning or porting to scripts.

## Environment setup

In [1]:
import yaml

import pandas as pd
import plotly.express as px

In [2]:
# Move project root folder
%cd ..

/Users/mwtmurphy/projects/f1-elo


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


## Data importing

In [3]:
with open("params.yaml") as conf_file:
    CONFIG = yaml.safe_load(conf_file)

mod_df = pd.read_csv(CONFIG["data"]["modelled_path"])
constructors_df = pd.read_csv(CONFIG["data"]["constructors_csv"])[["constructorId", "name"]].rename(columns={"name": "constructorName"})


## Data visualisation

In [4]:
con_df = mod_df.groupby(["year", "date", "constructorId"])["constructorScore"].max().reset_index()
con_df = con_df.merge(constructors_df, on="constructorId", how="left")
cur_con = set(con_df.loc[con_df["year"] == 2024, "constructorId"])
cur_df = con_df[con_df["constructorId"].isin(cur_con) & (con_df["year"] > 2010)]

px.line(cur_df, x="date", y="constructorScore", color="constructorName")

In [5]:
cur_df.loc[(cur_df["date"] == "2024-03-09"), ["constructorName", "constructorScore"]].sort_values("constructorScore", ascending=False).drop_duplicates()

Unnamed: 0,constructorName,constructorScore
12798,McLaren,
12799,Williams,
12800,Ferrari,
12801,Red Bull,
12802,Sauber,
12803,Aston Martin,
12804,Mercedes,
12805,Haas F1 Team,
12806,Alpine F1 Team,
12807,RB F1 Team,


# Modelling optimisation

In [6]:
import itertools
import typing
import yaml

import numpy as np
import pandas as pd

import line_profiler

# Create a profile object
profiler = line_profiler.LineProfiler()


In [40]:
with open("params.yaml") as conf_file:
    CONFIG = yaml.safe_load(conf_file)

MOD_DF = pd.read_csv(CONFIG["data"]["features_path"])
MOD_DF[["constructorScore", "driverScore", "expected", "actual"]] = None
IX_CHUNKS = MOD_DF.reset_index().groupby(["year", "round"])["index"].agg(["min", "max"]).values
MOD_MAT = MOD_DF.values


DRI_RTG = {dri: CONFIG["model"]["start_score"] for dri in set(MOD_DF["driverId"])}
CON_RTG = {con: CONFIG["model"]["start_score"] for con in set(MOD_DF["constructorId"])}

# dummy subset to test
# MOD_MAT = MOD_MAT[MOD_MAT[:, 0] < 1955]

@profiler
def model_data(k: float, c: float, w: float, export: bool = False) -> typing.Union[float, None]:
    '''If export == False, returns negative RMSEE based on params. 
    If export == True, exports modelled data to 'interim' data folder 
    for data reporting.'''

    dri_scores = DRI_RTG.copy()
    con_scores = CON_RTG.copy()
    exp, out = [], []

    for start_ix, end_ix in IX_CHUNKS:
        yr_mat = MOD_MAT[start_ix:end_ix]
        rnd_dri_scores = {dri: {"diff": 0, "n": 0, "exp": 0, "act": 0} for dri in yr_mat[:, 4]}
        rnd_con_scores = {con: {"diff": 0, "n": 0, "exp": 0, "act": 0} for con in yr_mat[:, 3]}

        for ix_1, ix_2 in itertools.combinations(range(yr_mat.shape[0]), 2):
            con_a, dri_a, pos_a, st_a = yr_mat[ix_1, [3, 4, 5, 7]]
            con_b, dri_b, pos_b, st_b = yr_mat[ix_2, [3, 4, 5, 7]]
    
            # continue if drivers in same car or a driver does not finish for misc reason
            if pos_a == pos_b or "misc retirement" in [st_a, st_b]:
                continue

            # get current rating
            elo_a = dri_scores[dri_a] + (w * con_scores[con_a])
            elo_b = dri_scores[dri_b] + (w * con_scores[con_b])
            
            # calculate position influence
            q_a = 10 ** (elo_a / c)
            q_b = 10 ** (elo_b / c)
    
            e_a = q_a / (q_a + q_b)        
            e_b = q_b / (q_a + q_b)

            # score outcome
            if pos_a < pos_b:
                o_a = 1
                o_b = 0
            else:
                o_a = 0
                o_b = 1
                
            # calculate score change and update round scores
            diff_a = k * (o_a - e_a)
            diff_b = k * (o_b - e_b)

            # log driver results and changes if neither retire due to car failure (not attributable to drivers)
            if "constructor retirement" not in [st_a, st_b]:
                rnd_dri_scores[dri_a]["exp"] += e_a
                rnd_dri_scores[dri_a]["act"] += o_a
                rnd_dri_scores[dri_a]["diff"] += diff_a
                rnd_dri_scores[dri_a]["n"] += 1
            
                rnd_dri_scores[dri_b]["exp"] += e_a
                rnd_dri_scores[dri_b]["act"] += o_a
                rnd_dri_scores[dri_b]["diff"] += diff_b
                rnd_dri_scores[dri_b]["n"] += 1
            
            # log constructor changes if diff constructors and neither driver retires due to driver error (not attributable to constructors)
            if con_a != con_b and "driver_retirement" not in [st_a, st_b]:
                rnd_con_scores[con_a]["diff"] += diff_a
                rnd_con_scores[con_a]["n"] += 1
                rnd_con_scores[con_b]["diff"] += diff_b
                rnd_con_scores[con_b]["n"] += 1
                
            # store expected and final values for error analysis
            exp += [e_a, e_b]
            out += [o_a, o_b]
        
        # update driver values for finishing drivers and driver-caused retirements
        for dri in rnd_dri_scores.keys():
            if rnd_dri_scores[dri]["n"] != 0: # more than 1 car on grid
                dri_scores[dri] += (rnd_dri_scores[dri]["diff"] / rnd_dri_scores[dri]["n"])
                
        yr_mat[:, 9] = list(map(lambda el: dri_scores[el], yr_mat[:, 4])) # driver score
        yr_mat[:, 10] = list(map(lambda el: rnd_dri_scores[el]["exp"], yr_mat[:, 4])) # expected outcome
        yr_mat[:, 11] = list(map(lambda el: rnd_dri_scores[el]["act"], yr_mat[:, 4])) # actual outcome

        # update constructor values for finishing drivers
        for con in rnd_con_scores.keys():
            if rnd_con_scores[con]["n"] != 0: # more than 1 car on grid
                con_scores[con] += (rnd_con_scores[con]["diff"] / rnd_con_scores[con]["n"])
        
        yr_mat[:, 8] = list(map(lambda el: con_scores[el], yr_mat[:, 3]))

    if export == False:
        err_df = pd.DataFrame({"pred": exp, "true": out})
        err_df["squared_error"] = (err_df["true"] - err_df["pred"]) ** 2
        neg_rmse = -(pow(err_df["squared_error"].sum() / err_df.shape[0], 0.5))
        
        return neg_rmse
    
    else:
        RES_DF = pd.DataFrame(MOD_MAT, columns=MOD_DF.columns)
        RES_DF.to_csv(CONFIG["data"]["modelled_path"], index=False)


In [33]:
MOD_DF.head()

Unnamed: 0,year,round,date,constructorId,driverId,mapPosition,mapPoints,status,constructorScore,driverScore,expected,actual
0,1950,1,1950-05-13,51,642,1,25.0,finished,,,,
1,1950,1,1950-05-13,51,786,2,18.0,finished,,,,
2,1950,1,1950-05-13,51,686,3,15.0,finished,,,,
3,1950,1,1950-05-13,154,704,4,12.0,finished,,,,
4,1950,1,1950-05-13,154,627,5,10.0,finished,,,,


In [37]:
with open(CONFIG["data"]["params_path"], "r") as infile:
        params_log = yaml.safe_load(infile)

%time model_data(k=params_log["k"], c=params_log["c"], w=params_log["w"], export=True)

#profiler.print_stats()

CPU times: user 2.89 s, sys: 4.45 ms, total: 2.89 s
Wall time: 2.92 s


In [35]:
# run 1: 32.4s
# run 2: 32.2s (easy items moved out of function)
# run 3: > 1m (switch from pandas to numpy matrix) - stopped and lineprofiler added
# run 4: 2.89s (moved from advanced to simple indexing which stopped matrix being copied)

MOD_MAT[:5, :]

array([[1950, 1, '1950-05-13', 51, 642, 1, 25.0, 'finished',
        1573.9956882315437, 1598.6609176420584, 5.5, 11],
       [1950, 1, '1950-05-13', 51, 786, 2, 18.0, 'finished',
        1573.9956882315437, 1580.7225689798659, 5.5, 11],
       [1950, 1, '1950-05-13', 51, 686, 3, 15.0, 'finished',
        1573.9956882315437, 1562.7842203176735, 5.5, 11],
       [1950, 1, '1950-05-13', 154, 704, 4, 12.0, 'finished',
        1531.3393503098303, 1544.845871655481, 5.5, 11],
       [1950, 1, '1950-05-13', 154, 627, 5, 10.0, 'finished',
        1531.3393503098303, 1526.9075229932887, 5.5, 11]], dtype=object)

In [27]:
MOD_DF.reset_index().groupby(["year", "round"])["index"].agg(["min", "max"]).values

array([[  0,  22],
       [ 23,  43],
       [ 44,  78],
       [ 79,  96],
       [ 97, 110]])

In [38]:
RES_DF = pd.DataFrame(MOD_MAT, columns=MOD_DF.columns)

In [39]:
RES_DF.head()

Unnamed: 0,year,round,date,constructorId,driverId,mapPosition,mapPoints,status,constructorScore,driverScore,expected,actual
0,1950,1,1950-05-13,51,642,1,25.0,finished,1573.995688,1598.660918,5.5,11
1,1950,1,1950-05-13,51,786,2,18.0,finished,1573.995688,1580.722569,5.5,11
2,1950,1,1950-05-13,51,686,3,15.0,finished,1573.995688,1562.78422,5.5,11
3,1950,1,1950-05-13,154,704,4,12.0,finished,1531.33935,1544.845872,5.5,11
4,1950,1,1950-05-13,154,627,5,10.0,finished,1531.33935,1526.907523,5.5,11
