In [1]:
import pandas as pd
import numpy as np
from math import sqrt, ceil
from timeit import timeit, Timer

In [2]:
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import rc
import seaborn as sns
from matplotlib.animation import FuncAnimation
from matplotlib.patches import ConnectionPatch
import matplotlib.animation as animation

rc('text', usetex=False)
plt.style.use('seaborn')
%matplotlib inline

legend_opts = {
    'fontsize':12,
    'frameon':True, 
    'framealpha':1,
    'facecolor':'white',
    'edgecolor':'black',
    'labelspacing':0.1}

In [30]:
%load_ext autoreload
%aimport utils
%autoreload 1
from utils import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Load data

In [4]:
male_scores = pd.read_csv('scores/trimmed_male.csv')
female_scores = pd.read_csv('scores/trimmed_female.csv')

In [8]:
# Randomly choose 10 years for training set, and remaining 4 years for test set
# (which must include the last year 2018 for visualisation purpose)
all_years = range(2005, 2019)
years_before_last = range(2005, 2018)
year_seed = np.random.RandomState(seed=42)
train_years = sorted(list(year_seed.choice(years_before_last, size=10, replace=False)))
test_years = sorted([year for year in years_before_last if year not in train_years] + [2018])
train_years, test_years

([2005, 2006, 2007, 2009, 2010, 2012, 2013, 2014, 2016, 2017],
 [2008, 2011, 2015, 2018])

In [9]:
all_season_scores = male_scores.loc[male_scores['event']!='WR']
all_world_scores = male_scores.loc[male_scores['event']=='WR']

In [10]:
# Split 10 years into train and validation set
random_state = np.random.RandomState(seed=42)
years1 = list(random_state.choice(train_years, size=5, replace=False))
years2 = [year for year in train_years if year not in years1]
years1, years2

([2016, 2006, 2012, 2005, 2014], [2007, 2009, 2010, 2013, 2017])

# Predict ranking in training set from different methods

## Male

In [51]:
beta1 = np.load('data/beta1.npy')
beta2 = np.load('data/beta2.npy')

Average

In [118]:
all_skater_scores = {}
for year in train_years:
    yearly_skater_scores = []
    season_scores, world_scores = get_yearly_scores(year, all_season_scores, all_world_scores)

    avg = AverageScore()
    avg.fit(season_scores)
    yearly_skater_scores.append(avg.skater_scores.reindex(world_scores.index).dropna())

    add = Linear()
    add.fit(season_scores)
    yearly_skater_scores.append(add.skater_scores.reindex(world_scores.index).dropna())

    mul = LogLinear()
    mul.fit(season_scores)
    yearly_skater_scores.append(mul.skater_scores.reindex(world_scores.index).dropna())

    _, _, skater_scores = train_multi(season_scores, n_factors=1, alpha=0.0005, n_iter=47)
    skater_scores = skater_scores.squeeze().reindex(world_scores.index).dropna()
    yearly_skater_scores.append(skater_scores)

    _, _, skater_scores = train_sequential_multi(season_scores, n_factors=4, alpha=0.0005, n_iter=35)
    skater_scores = (skater_scores @ beta1).reindex(world_scores.index).dropna()
    yearly_skater_scores.append(skater_scores)
    
    yearly_skater_scores = pd.concat(yearly_skater_scores, axis=1)
    yearly_skater_scores.columns = ['avg', 'add', 'mul', 'hyb', 'multi']
    all_skater_scores[year] = yearly_skater_scores

Alpha: 0.0005, Iter: 46, Last RMSE: 8.63, Delta RMSE: -0.0010916085
Factor: 0, Alpha: 0.0005, Iter: 35, Last RMSE: 8.662, Delta RMSE: -0.0080908948
Factor: 1, Alpha: 0.0005, Iter: 35, Last RMSE: 8.605, Delta RMSE: -0.0024092949
Factor: 2, Alpha: 0.0005, Iter: 35, Last RMSE: 8.519, Delta RMSE: -0.0036599525
Factor: 3, Alpha: 0.0005, Iter: 35, Last RMSE: 8.518, Delta RMSE: -0.000746563
Alpha: 0.0005, Iter: 46, Last RMSE: 7.96, Delta RMSE: -0.0009913676
Factor: 0, Alpha: 0.0005, Iter: 35, Last RMSE: 7.999, Delta RMSE: -0.007749829
Factor: 1, Alpha: 0.0005, Iter: 35, Last RMSE: 7.986, Delta RMSE: -0.0006821397
Factor: 2, Alpha: 0.0005, Iter: 35, Last RMSE: 7.957, Delta RMSE: -0.0011246966
Factor: 3, Alpha: 0.0005, Iter: 35, Last RMSE: 7.931, Delta RMSE: -0.0010131301
Alpha: 0.0005, Iter: 46, Last RMSE: 7.62, Delta RMSE: -0.0015000168
Factor: 0, Alpha: 0.0005, Iter: 35, Last RMSE: 7.679, Delta RMSE: -0.0133247831
Factor: 1, Alpha: 0.0005, Iter: 35, Last RMSE: 7.67, Delta RMSE: -0.0005301796

In [119]:
all_skater_scores[2005]

Unnamed: 0_level_0,avg,add,mul,hyb,multi
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Stephane, LAMBIEL",196.47,109.731448,2.298033,7.396454,13.495678
"Jeffrey, BUTTLE",206.566667,130.1489,2.57901,8.881744,16.285082
"Evan, LYSACEK",177.116667,100.2209,2.185489,7.016066,12.987351
"Johnny, WEIR",212.113333,134.286965,2.613022,9.181933,16.779091
"Chengjiang, LI",196.0875,113.58645,2.359024,7.782148,14.319629
"Brian, JOUBERT",202.6425,127.310793,2.534848,8.737669,16.140595
"Emanuel, SANDHU",189.623333,117.296638,2.417828,8.096663,15.053905
"Kevin, VAN DER PERREN",188.965,110.906654,2.331865,7.533844,13.861783
"Frederic, DAMBIER",176.326667,95.684756,2.128268,6.659239,12.141046
"Timothy, GOEBEL",208.28,122.470961,2.455402,8.29008,14.874947
