This notebook, written by E. Karlé, contains the code necessary to reproduce Figure 5 and Tables 3,4 from the article Dynamic Ranking with the BTL Model: A Nearest Neighbor based Rank Centrality Method. https://arxiv.org/abs/2109.13743

This notebook is dedicated to the analysis of the NFL data set, available in the nflWAR package (Yurko et al., 2019), via the estimation of the top 10 ranks at the end of each season and via the computation of correlations with respect to the ELO ratings, considered in this anaylisi as the true ratings.

To use this notebook, the user needs to dowload the nfl data from the nflWAR package, for seasons 2011 to 2015 and write the path to this directory as the variable data_dir.

In [4]:
import numpy as np
import pickle
import random
import matplotlib.pyplot as plt
import os
import pandas as pd
import math

os.environ["PATH"] += os.pathsep + '/Library/TeX/texbin'

import sys
sys.path.append('modules')
import scipy.stats as ss
from tabulate import tabulate

import graph_module as graph
import nfl_module as nfl

In [5]:
# Load datasets
data_dir = 'nfl_data'
all_rnds = np.arange(1,17)
all_seasons = np.arange(2011,2016)

team_id = pd.read_csv(os.path.join(data_dir, "nfl_id.csv"))
elo_all = pd.read_csv(os.path.join(data_dir, "nfl_elo.csv"))

# Analysis of the NFL data set

In [6]:
corr_drc = []
corr_mle = []
rank_corr_drc = []
rank_corr_mle = []
rank_corr_borda = []

for season in all_seasons:
    print(season)
    random.seed(0)
    np.random.seed(0)
    result = nfl.get_final_rank_season(data_dir, season, team_id, all_rnds,elo_all,t = 1,loocv= True,num_loocv = 40,borda = True,delta_borda = False,elo = True, mle = True,loocv_mle = 20)

    df_rank_elo,df_score_elo,l_drc,l_borda,l_mle = result
    pi_drc,df_drc,_ = l_drc
    beta_mle,df_mle = l_mle
    df_borda = l_borda[1]

    # Get pi_mle
    pi_mle = np.exp(beta_mle)
    pi_mle = pi_mle/sum(pi_mle)

    # Normalize the ELO scores, as our estimators of strenght pi_rc and beta_mle are normalized
    df_score_elo = df_score_elo/sum(df_score_elo)

    # Correlation between ELO scores and estimated strength for DRC and MLE
    corr_drc.append(np.corrcoef(df_score_elo,pi_drc)[0,1])
    corr_mle.append(np.corrcoef(df_score_elo,pi_mle)[0,1])

    # Correlation between ELO ranks and estimated ranks for DRC, MLE and Borda
    rank_corr_drc.append(ss.kendalltau(df_rank_elo,df_drc)[0])
    rank_corr_mle.append(ss.kendalltau(df_rank_elo,df_mle)[0])
    rank_corr_borda.append(ss.kendalltau(df_rank_elo,df_borda)[0])

    # Save results with value of delta_star in title
    df_drc.to_csv('Results_nfl/ranks_drc_'+str(season)+'.csv')
    df_borda.to_csv('Results_nfl/ranks_borda_'+str(season)+'.csv')
    df_rank_elo.to_csv('Results_nfl/ranks_elo_'+str(season)+'.csv')
    df_mle.to_csv('Results_nfl/ranks_mle_'+str(season)+'.csv')

2011


  pis[i,:] = sim.RC_dyn(t,Y,A, delta, tol=1e-12)
  prob = pi[j]/(pi[i]+pi[j])
  prob = pi[j]/(pi[i]+pi[j])


1-th cv done
2-th cv done
3-th cv done
4-th cv done
5-th cv done
6-th cv done
7-th cv done
8-th cv done
9-th cv done
10-th cv done
11-th cv done
12-th cv done
13-th cv done
14-th cv done
15-th cv done
16-th cv done
17-th cv done
18-th cv done
19-th cv done
20-th cv done
season 2011 finished. 



NameError: name 'pi_rc' is not defined

# Creation of the Tables 3,4 : correlations between ELO and our estimators

In [None]:
# Table 3 : correlation of the ranks

row_drc = rank_corr_drc.insert(0,'DRC')
row_mle = rank_corr_mle.insert(0,'MLE')
row_borda = rank_corr_borda.insert(0,'Borda')

print(tabulate([row_drc,row_mle,row_borda],headers=all_seasons))

In [None]:
# Table 4 : correlation of the strengths

row_drc = corr_drc.insert(0,'DRC')
row_mle = corr_mle.insert(0,'MLE')

print(tabulate([row_drc,row_mle],headers=all_seasons))