Iterations 7, ELO

In [237]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from scipy.stats import linregress
pd.set_option('precision', 2)

In [238]:
race_list = []
suffix_list = []
for year in ['2018','2019','2020','2021','2022']:
    for race_name in ['BUCS_XC','National_XC','IC','MK','Liverpool']:
        try:
            fp = '../data/Men/'+race_name+'_'+year+'.csv'
            race_list.append(pd.read_csv(fp))
            suffix_list.append(str.lower(race_name[:2])+year[-2:])
        except FileNotFoundError:
            #print(fp)
            pass

In [239]:
race_list = [df.drop(columns='raw_time') for df in race_list]

for i,df in enumerate(race_list):
    df.rename(columns = {'position':'position_'+suffix_list[i]}, inplace = True)

In [240]:
complete = pd.merge(race_list[0],race_list[1],how='outer',on='raw_name')
for x in race_list[2:]:
    complete = pd.merge(complete,x,how='outer',on='raw_name')
    
raw_name = complete.pop('raw_name')
complete.insert(0,"raw_name",raw_name)
complete.insert(1,"rating",1500) #init rating value

In [241]:
complete.head()

Unnamed: 0,raw_name,rating,position_na18,position_na19,position_mk19,position_li19,position_bu20,position_na20,position_ic20,position_mk21,position_li21
0,Adam Hickey,1500,1.0,5.0,8.0,4.0,,4.0,2.0,,
1,Andy Coley-Maud,1500,2.0,,,,,,,,
2,John Gilbert,1500,3.0,,,,,15.0,,,
3,Peter Huck,1500,4.0,,,,,,,,
4,Christopher Olley,1500,5.0,,,,,,,,


In [245]:
def performance_ratings_from_numpy_ordered_list(numpy_array,length):
    """ It's important to exclude new entrants"""
    performance_rating = np.full(length,np.nan)
    for i in range(len(numpy_array)-1):
        rating_in = numpy_array[i]
        wins = numpy_array[i:]
        losses = numpy_array[:i]
        wins = wins[wins > rating_in - 400]
        losses = losses[losses < rating_in + 400]
        average = np.mean(np.hstack((wins,losses)))
        wcount = wins.shape[0]
        lcount = losses.shape[0]
        count = wcount + lcount
        perfor = wcount - lcount
        performance_rating[i] = 400 * perfor/count + average
    return performance_rating

In [246]:
def get_rating(df,input_data_col,new_result_col,sfx=' '):
    df_overlap = df[~df[new_result_col].isnull()]
    numpy_array = df_overlap.sort_values(new_result_col,ascending=True).rating.to_numpy()
    return performance_ratings_from_numpy_ordered_list(numpy_array,len(df))
    
def update_with_suffix(complete,sfx):
    new_result_col = 'position_'+sfx
    complete.sort_values(new_result_col,ascending=True,inplace=True)
    complete['rating_'+sfx] = get_rating(complete,'rating',new_result_col,sfx)
    #weighted mean as final iteration
    complete = complete.assign(rating=complete.loc[:, ['rating','rating_'+sfx]].mean(axis=1))
    complete = complete.drop(columns='position_'+sfx)
    complete.sort_values("rating",ascending=False,inplace=True)
    return complete

In [247]:
for suffix in suffix_list:
    complete = update_with_suffix(complete,suffix)

In [248]:
complete.head(30)

Unnamed: 0,raw_name,rating,rating_na18,rating_na19,rating_mk19,rating_li19,rating_bu20,rating_na20,rating_ic20,rating_mk21,rating_li21
0,Adam Hickey,1982.81,1900.0,1897.18,1869.51,1925.01,,1960.63,2045.55,,
10,Calum Johnson,1910.66,1868.0,,,,,1931.6,1943.11,,1945.86
485,Tom Mortimer,1910.15,,,,1855.88,1876.34,,1928.85,,1967.3
250,Mahamed Mahamed,1901.55,,1909.98,1923.85,1934.62,1932.73,,2047.6,,1827.49
407,Joe Wilkinson,1882.32,,,1849.53,1711.88,1871.0,1894.46,1926.34,,
404,Tom Evans,1880.56,,,1905.53,1909.21,,,1955.14,,
255,Joe Steward,1879.03,,1884.38,,,1919.0,1952.47,,,
7,Carl Avery,1876.9,1877.6,1903.58,,1824.85,,1943.28,,,
28,Jayme Rossiter,1866.91,1810.4,,1793.53,,1913.67,1914.79,,,
567,Daniel Jarvis,1864.11,,,,,1908.34,1916.33,,1917.97,


In [249]:
complete_two = complete.dropna(thresh=4) #raw name and rating

In [250]:
complete_two[['raw_name','rating']].sort_values("rating",ascending=False).head(30)

Unnamed: 0,raw_name,rating
0,Adam Hickey,1982.81
10,Calum Johnson,1910.66
485,Tom Mortimer,1910.15
250,Mahamed Mahamed,1901.55
407,Joe Wilkinson,1882.32
404,Tom Evans,1880.56
255,Joe Steward,1879.03
7,Carl Avery,1876.9
28,Jayme Rossiter,1866.91
567,Daniel Jarvis,1864.11


In [251]:
athlete_list = ['Phillip Crout','Jamie Edgerton','Joseph Massingham','Jeremy Dempsey','Jose Gray']
complete_two[complete_two.raw_name.isin(athlete_list)]

Unnamed: 0,raw_name,rating,rating_na18,rating_na19,rating_mk19,rating_li19,rating_bu20,rating_na20,rating_ic20,rating_mk21,rating_li21
504,Joseph Massingham,1734.16,,,,1674.55,1721.67,1813.85,,,
53,Phillip Crout,1680.23,1730.4,1759.58,,,1700.34,1607.39,,,1709.83
594,Jeremy Dempsey,1638.07,,,,,1631.0,,,1823.14,1581.83
359,Jose Gray,1479.88,,1305.18,,,,1557.18,,,


In [252]:
for suffix in suffix_list:
    print(complete["rating_"+suffix].mean())

1503.2000000000003
1513.1776
1529.8258905436442
1541.216115142666
1529.715689709889
1532.2277140197302
1549.7496305122797
1517.6130540515503
1576.2998921964167
