In [23]:
11%2

1

# Import libraries and setup

In [1]:
import sys, os, csv, importlib

In [2]:
import numpy as np
import scipy as sc
import scipy.linalg as spl
import scipy.stats as ss
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from nfl_utils import *

In [3]:
# NFL_DATA_DIR = "../data/nfl"
NFL_DATA_DIR = "data/nfl"
SEASONS = range(2011,2016)
ALL_RNDS = range(1, 17)

#### Read in NFL team data

In [4]:
team_id = pd.read_csv(os.path.join(NFL_DATA_DIR, "nfl_id.csv"))
team_id.shape

(32, 2)

In [5]:
import pickle

bt_out_seasons = []
for i in SEASONS:
    with open("season_" + str(i) + ".pickle", "rb") as input_file:
        e = pickle.load(input_file)[0]
        bt_out_seasons.append(e)

In [6]:
bt_out_seasons[4]

Unnamed: 0,rank
ARI,5
ATL,14
BAL,28
BUF,16
CAR,1
CHI,13
CIN,4
CLE,31
DAL,29
DEN,2


### ELO Estimation

In [7]:
# Get the ELO predictions data from fivethirtyeight
# Source: https://github.com/fivethirtyeight/data/blob/master/nfl-elo/README.md?fbclid=IwAR0vJvH9pf4oYfAqzGlgN6e6RquNJq2rf7ZmzomQHn4p7BYXuwaN3vtsSLA
elo_ft_data = pd.read_csv(os.path.join(NFL_DATA_DIR, "nfl_elo.csv"), na_values='')

# Get all ELO data for the specified seasons
elo_out_seasons = [get_elo_rank_season(elo_all = elo_ft_data, season = season) for season in SEASONS]

## Get ELO vs. BT top 10 table summary

### Generate table summary

In [8]:
# We want the top 10 teams to be compared
TOP_N = 10

In [11]:
num_seasons = len(elo_out_seasons)
get_elo_seasons = [get_team_season(season_idx=season_idx, model_season_list=elo_out_seasons) for 
                   season_idx in range(num_seasons)]
get_bt_seasons = [get_team_season(season_idx=season_idx, model_season_list=bt_out_seasons) for 
                  season_idx in range(num_seasons)]

elo_bt_join = []
for idx, season in enumerate(SEASONS):
    elo_bt_join.append(get_join_elo_bt_season(season_num=season, 
                                              elo_team_season=get_elo_seasons[idx], 
                                              bt_team_season=get_bt_seasons[idx], 
                                              top_n=TOP_N))
    
# Create a separate rank column    
rank_col = pd.DataFrame(list(range(1, TOP_N + 1)))
rank_col.columns = ['rank']

In [12]:
out_elo_bt = pd.concat(elo_bt_join[-5:], sort=False, axis=1)
out_elo_bt = pd.concat([rank_col, out_elo_bt], sort=False, axis=1)
out_elo_bt

Unnamed: 0,rank,ELO 2011,BT 2011,ELO 2012,BT 2012,ELO 2013,BT 2013,ELO 2014,BT 2014,ELO 2015,BT 2015
0,1,GB,GB,NE,HOU,SEA,SEA,SEA,DEN,SEA,CAR
1,2,NE,SF,DEN,ATL,SF,DEN,NE,ARI,CAR,DEN
2,3,NO,NO,GB,SF,NE,NO,DEN,NE,ARI,NE
3,4,PIT,NE,SF,CHI,DEN,KC,GB,SEA,KC,CIN
4,5,BAL,DET,ATL,GB,CAR,SF,DAL,DAL,DEN,ARI
5,6,SF,BAL,SEA,NE,CIN,NE,PIT,GB,NE,GB
6,7,ATL,PIT,NYG,DEN,NO,IND,BAL,PHI,PIT,MIN
7,8,PHI,HOU,CIN,SEA,ARI,CAR,IND,SD,CIN,KC
8,9,SD,CHI,BAL,BAL,IND,ARI,ARI,DET,GB,PIT
9,10,HOU,ATL,HOU,IND,SD,CIN,CIN,KC,MIN,SEA


In [13]:
print(out_elo_bt.to_latex(index_names=False, escape=False, index=False, 
                          column_format='c|c|c|c|c|c|c|c|c|c|c|c|c|c|', 
                          header=True, bold_rows=True))

\begin{tabular}{c|c|c|c|c|c|c|c|c|c|c|c|c|c|}
\toprule
 rank & ELO 2011 & BT 2011 & ELO 2012 & BT 2012 & ELO 2013 & BT 2013 & ELO 2014 & BT 2014 & ELO 2015 & BT 2015 \\
\midrule
    1 &       GB &      GB &       NE &     HOU &      SEA &     SEA &      SEA &     DEN &      SEA &     CAR \\
    2 &       NE &      SF &      DEN &     ATL &       SF &     DEN &       NE &     ARI &      CAR &     DEN \\
    3 &       NO &      NO &       GB &      SF &       NE &      NO &      DEN &      NE &      ARI &      NE \\
    4 &      PIT &      NE &       SF &     CHI &      DEN &      KC &       GB &     SEA &       KC &     CIN \\
    5 &      BAL &     DET &      ATL &      GB &      CAR &      SF &      DAL &     DAL &      DEN &     ARI \\
    6 &       SF &     BAL &      SEA &      NE &      CIN &      NE &      PIT &      GB &       NE &      GB \\
    7 &      ATL &     PIT &      NYG &     DEN &       NO &     IND &      BAL &     PHI &      PIT &     MIN \\
    8 &      PHI &     H

## Calculate average differences for each season from ELO to BTL

In [16]:
elo_out_seasons[4]['rank'].sort_index()

ARI     3
ATL    17
BAL    20
BUF    13
CAR     2
CHI    25
CIN     8
CLE    30
DAL    24
DEN     5
DET    15
GB      9
HOU    11
IND    18
JAC    31
KC      4
MIA    26
MIN    10
NE      6
NO     22
NYG    21
NYJ    12
OAK    23
PHI    16
PIT     7
SD     27
SEA     1
SF     28
STL    19
TB     29
TEN    32
WAS    14
Name: rank, dtype: int64

In [68]:
bt_out_seasons[6]['rank'].sort_index()

ARI     5
ATL    14
BAL    28
BUF    16
CAR     1
CHI    13
CIN     4
CLE    31
DAL    29
DEN     2
DET    19
GB      6
HOU    18
IND    17
JAC    30
KC      8
MIA    26
MIN     7
NE      3
NO     24
NYG    22
NYJ    11
OAK    15
PHI    23
PIT     9
SD     27
SEA    10
SF     21
STL    12
TB     25
TEN    32
WAS    20
Name: rank, dtype: int64

In [17]:
i = 4
a = elo_out_seasons[i]['rank'].sort_index()
b = bt_out_seasons[i]['rank'].sort_index()
index = np.union1d(np.where(a <= 10),np.where(b <= 10))

In [18]:
b[index]

ARI     5
CAR     1
CIN     4
DEN     2
GB      6
KC      8
MIN     7
NE      3
PIT     9
SEA    10
Name: rank, dtype: int64

In [19]:
a[index]

ARI     3
CAR     2
CIN     8
DEN     5
GB      9
KC      4
MIN    10
NE      6
PIT     7
SEA     1
Name: rank, dtype: int64

In [20]:
av_dif = []
TOP_N = 10
for i in range(5):
    a = elo_out_seasons[i]['rank'].sort_index()
    b = bt_out_seasons[i]['rank'].sort_index()
    index = np.union1d(np.where(a <= 10),np.where(b <= 10))
#     index = np.intersect1d(np.where(a <= 10),np.where(b <= 10))
    print(len(index))
    av_dif.append(np.mean(abs(a[index]-b[index])))

12
12
11
14
10


In [21]:
av_dif

[4.166666666666667, 5.0, 3.5454545454545454, 4.285714285714286, 3.4]