In [1]:
import sys, os, importlib

In [2]:
import numpy as np
import scipy as sc
import csv
import pandas as pd
import scipy.linalg as spl
import matplotlib.pyplot as plt
import scipy.stats as ss
%matplotlib inline

In [3]:
sys.path.insert(1, '..')
import grad_utils as model
import opt_utils
import cv_utils

In [4]:
import miccs.optimize as core

In [5]:
importlib.reload(opt_utils)
importlib.reload(cv_utils)

<module 'cv_utils' from '../cv_utils.py'>

In [6]:
team_id = pd.read_csv("../../data/nfl/nfl_id.csv")

In [7]:
team_id

Unnamed: 0,name,id
0,ARI,1
1,ATL,2
2,BAL,3
3,BUF,4
4,CAR,5
5,CHI,6
6,CIN,7
7,CLE,8
8,DAL,9
9,DEN,10


In [8]:
def max_change(beta):
    '''
    get the maximal change in rank in neighboring timepoint based on beta
    '''
    T,N = beta.shape
    arg = np.array([ss.rankdata(-beta[ii]) for ii in range(T)])
    return np.max(abs(arg[1:] - arg[:-1]))

def newton_l2_sq(data,l_penalty):
    '''
    Newton
    '''
    # intiialize optimization
    max_iter = 1000
    ths = 1e-12

    # vanilla PGD parameters
    step_size = 0.03

    # backtracking parameters
    step_init = 0.1
    max_back = 100
    a = 0.2
    b = 0.5
    
    T, N = data.shape[0:2]
    beta = np.zeros(data.shape[:2]).reshape((N * T,1))
    step_size = 1

    # initialize record
    objective_nt = [objective_l2_sq(beta, data, l_penalty)]
#     print("initial objective value: %f"%objective_nt[-1])

    # iteration
    for i in range(max_iter):
        # compute gradient
        gradient = grad_l2_sq(beta, data, l_penalty)[1:]
        hessian = hess_l2_sq(beta, data, l_penalty)[1:,1:]
        # newton update
        beta[1:] = beta[1:] - step_size * sc.linalg.solve(hessian, gradient)

        # objective value
        objective_nt.append(objective_l2_sq(beta, data, l_penalty))

#         print("%d-th Newton, objective value: %f"%(i+1, objective_nt[-1]))
        if objective_nt[-2] - objective_nt[-1] < ths:
#             print("Converged!")
            break

    if i >= max_iter:
        print("Not converged.")
        
    return beta.reshape((T,N))

def plot_nfl_round(beta,team_id,season):
    T, N = beta.shape
    year = range(1,17)
    f = plt.figure(1, figsize = (6,4))

    for i in range(N):
        plt.plot(year,beta[:,i], label=team_id['name'][i], color = np.random.rand(3,))
    plt.xlabel("round")
    plt.ylabel("latent parameter")
    plt.legend(loc='upper left', bbox_to_anchor=(1, 1, 1, 0),prop={'size': 5})
    plt.ticklabel_format(style='plain',axis='x',useOffset=False)

    f.savefig("nfl_round_"+str(season)+".pdf", bbox_inches='tight')
        

def get_single_round_pwise(rnd_num, nfl_data_dir, season):
    """
    Gets the pairwise numpy array of score diffences across teams for a single
       round in a season
    """
    fname = "round" + "_" + str(rnd_num).zfill(2) + ".csv"
    fpath = os.path.join(nfl_data_dir, str(season), fname)
    rnd_df = pd.read_csv(fpath)
    pwise_diff = rnd_df.pivot(index='team', columns='team_other',values='diff').values
    pwise_diff[pwise_diff >= 0] = 1
    pwise_diff[pwise_diff < 0] = 0
    pwise_diff[np.isnan(pwise_diff)] = 0
    return pwise_diff

def get_final_rank_season(data_dir, season, threshold, plot = True):
    ALL_RNDS = range(1, 17)
    game_matrix_list = np.array([get_single_round_pwise(rnd_num=rnd, nfl_data_dir=data_dir, season=season) 
                                  for rnd in ALL_RNDS])
    
    lambdas_smooth = np.linspace(0, 5, 21)[1:]
    lambda_cv, nll_cv, beta = cv_utils.loocv(game_matrix_list, lambdas_smooth, opt_utils.newton_l2_sq)
    
#     ## heuristics are deprecated now
#     lam_list = np.arange(1,80) * 0.5
#     val_list = []

#     data = game_matrix_list
#     for i in range(len(lam_list)):
#         val_list.append(max_change(beta = newton_l2_sq(data,lam_list[i])))

#     # plt.plot(lam_list,val_list)
    
#     while val_list[-1] > threshold:
#         threshold += 1
        
#     ix = next(idx for idx, value in enumerate(val_list) if value <= threshold)
#     lambda_star = lam_list[ix]

#     beta = newton_l2_sq(data,lambda_star)

    if plot:
        plot_nfl_round(beta = beta,team_id = team_id,season = SEASON)

    arg = np.argsort(-beta,axis=1)
    rank_list = pd.DataFrame(data={(i):team_id['name'][arg[i-1,]].values for i in range(1,17)})
    # rank_list.to_csv('rank_list_10_round.csv')
    rank_last = rank_list[16]
    rank_last = pd.DataFrame({'rank':range(len(rank_last))},index = rank_last.values)
    
    return rank_last.sort_index() + 1

def get_elo_rank_season(elo_all,season):
    elo_season = elo_all.iloc[np.where(ELO_all['season'] == season)]
    elo_season = elo_season[pd.isnull(elo_season['playoff'])]
    a = elo_season[['team1','elo1_post']]
    a.columns = ['team','elo']
    a = a.reset_index()
    b = elo_season[['team2','elo2_post']]
    b.columns = ['team','elo']
    b = b.reset_index()

    c = pd.concat([a,b])
    c = c.sort_values(by = ['index'])    
    d = c.groupby(by = ['team']).last()
    
    x = d.index.values
    x[np.where(x == 'LAR')] = 'STL'
    x[np.where(x == 'LAC')] = 'SD'
    x[np.where(x == 'JAX')] = 'JAC'
    x[np.where(x == 'WSH')] = 'WAS'
    
    elo_rank = pd.DataFrame({'rank': ss.rankdata(-d['elo'])},index = x).sort_index()
    
    return elo_rank

# Time unit: round

### B-T model estimation

In [9]:
nfl_data_dir = "../../data/nfl"
season_list = range(2009,2016)
threshold = 3
bt_list = []

for season in season_list:
    print(str(season))
    bt_list.append(get_final_rank_season(data_dir = nfl_data_dir, season = season, threshold = threshold, plot = False))

2009




1-th cv done
2-th cv done
3-th cv done
4-th cv done
5-th cv done
6-th cv done
7-th cv done
8-th cv done
9-th cv done
10-th cv done
11-th cv done
12-th cv done
13-th cv done
14-th cv done
15-th cv done
16-th cv done
17-th cv done
18-th cv done
19-th cv done
20-th cv done
21-th cv done
22-th cv done
23-th cv done
24-th cv done
25-th cv done
26-th cv done
27-th cv done
28-th cv done
29-th cv done
30-th cv done
31-th cv done
32-th cv done
33-th cv done
34-th cv done
35-th cv done
36-th cv done
37-th cv done
38-th cv done
39-th cv done
40-th cv done
41-th cv done
42-th cv done
43-th cv done
44-th cv done
45-th cv done
46-th cv done
47-th cv done
48-th cv done
49-th cv done
50-th cv done
51-th cv done
52-th cv done
53-th cv done
54-th cv done
55-th cv done
56-th cv done
57-th cv done
58-th cv done
59-th cv done
60-th cv done
61-th cv done
62-th cv done
63-th cv done
64-th cv done
65-th cv done
66-th cv done
67-th cv done
68-th cv done
69-th cv done
70-th cv done
71-th cv done
72-th cv done
7



1-th cv done
2-th cv done
3-th cv done
4-th cv done
5-th cv done
6-th cv done
7-th cv done
8-th cv done
9-th cv done
10-th cv done
11-th cv done
12-th cv done
13-th cv done
14-th cv done
15-th cv done
16-th cv done
17-th cv done
18-th cv done
19-th cv done
20-th cv done
21-th cv done
22-th cv done
23-th cv done
24-th cv done
25-th cv done
26-th cv done
27-th cv done
28-th cv done
29-th cv done
30-th cv done
31-th cv done
32-th cv done
33-th cv done
34-th cv done
35-th cv done
36-th cv done
37-th cv done
38-th cv done
39-th cv done
40-th cv done
41-th cv done
42-th cv done
43-th cv done
44-th cv done
45-th cv done
46-th cv done
47-th cv done
48-th cv done
49-th cv done
50-th cv done
51-th cv done
52-th cv done
53-th cv done
54-th cv done
55-th cv done
56-th cv done
57-th cv done
58-th cv done
59-th cv done
60-th cv done
61-th cv done
62-th cv done
63-th cv done
64-th cv done
65-th cv done
66-th cv done
67-th cv done
68-th cv done
69-th cv done
70-th cv done
71-th cv done
72-th cv done
7



1-th cv done
2-th cv done
3-th cv done
4-th cv done
5-th cv done
6-th cv done
7-th cv done
8-th cv done
9-th cv done
10-th cv done
11-th cv done
12-th cv done
13-th cv done
14-th cv done
15-th cv done
16-th cv done
17-th cv done
18-th cv done
19-th cv done
20-th cv done
21-th cv done
22-th cv done
23-th cv done
24-th cv done
25-th cv done
26-th cv done
27-th cv done
28-th cv done
29-th cv done
30-th cv done
31-th cv done
32-th cv done
33-th cv done
34-th cv done
35-th cv done
36-th cv done
37-th cv done
38-th cv done
39-th cv done
40-th cv done
41-th cv done
42-th cv done
43-th cv done
44-th cv done
45-th cv done
46-th cv done
47-th cv done
48-th cv done
49-th cv done
50-th cv done
51-th cv done
52-th cv done
53-th cv done
54-th cv done
55-th cv done
56-th cv done
57-th cv done
58-th cv done
59-th cv done
60-th cv done
61-th cv done
62-th cv done
63-th cv done
64-th cv done
65-th cv done
66-th cv done
67-th cv done
68-th cv done
69-th cv done
70-th cv done
71-th cv done
72-th cv done
7



1-th cv done
2-th cv done
3-th cv done
4-th cv done
5-th cv done
6-th cv done
7-th cv done
8-th cv done
9-th cv done
10-th cv done
11-th cv done
12-th cv done
13-th cv done
14-th cv done
15-th cv done
16-th cv done
17-th cv done
18-th cv done
19-th cv done
20-th cv done
21-th cv done
22-th cv done
23-th cv done
24-th cv done
25-th cv done
26-th cv done
27-th cv done
28-th cv done
29-th cv done
30-th cv done
31-th cv done
32-th cv done
33-th cv done
34-th cv done
35-th cv done
36-th cv done
37-th cv done
38-th cv done
39-th cv done
40-th cv done
41-th cv done
42-th cv done
43-th cv done
44-th cv done
45-th cv done
46-th cv done
47-th cv done
48-th cv done
49-th cv done
50-th cv done
51-th cv done
52-th cv done
53-th cv done
54-th cv done
55-th cv done
56-th cv done
57-th cv done
58-th cv done
59-th cv done
60-th cv done
61-th cv done
62-th cv done
63-th cv done
64-th cv done
65-th cv done
66-th cv done
67-th cv done
68-th cv done
69-th cv done
70-th cv done
71-th cv done
72-th cv done
7



1-th cv done
2-th cv done
3-th cv done
4-th cv done
5-th cv done
6-th cv done
7-th cv done
8-th cv done
9-th cv done
10-th cv done
11-th cv done
12-th cv done
13-th cv done
14-th cv done
15-th cv done
16-th cv done
17-th cv done
18-th cv done
19-th cv done
20-th cv done
21-th cv done
22-th cv done
23-th cv done
24-th cv done
25-th cv done
26-th cv done
27-th cv done
28-th cv done
29-th cv done
30-th cv done
31-th cv done
32-th cv done
33-th cv done
34-th cv done
35-th cv done
36-th cv done
37-th cv done
38-th cv done
39-th cv done
40-th cv done
41-th cv done
42-th cv done
43-th cv done
44-th cv done
45-th cv done
46-th cv done
47-th cv done
48-th cv done
49-th cv done
50-th cv done
51-th cv done
52-th cv done
53-th cv done
54-th cv done
55-th cv done
56-th cv done
57-th cv done
58-th cv done
59-th cv done
60-th cv done
61-th cv done
62-th cv done
63-th cv done
64-th cv done
65-th cv done
66-th cv done
67-th cv done
68-th cv done
69-th cv done
70-th cv done
71-th cv done
72-th cv done
7



1-th cv done
2-th cv done
3-th cv done
4-th cv done
5-th cv done
6-th cv done
7-th cv done
8-th cv done
9-th cv done
10-th cv done
11-th cv done
12-th cv done
13-th cv done
14-th cv done
15-th cv done
16-th cv done
17-th cv done
18-th cv done
19-th cv done
20-th cv done
21-th cv done
22-th cv done
23-th cv done
24-th cv done
25-th cv done
26-th cv done
27-th cv done
28-th cv done
29-th cv done
30-th cv done
31-th cv done
32-th cv done
33-th cv done
34-th cv done
35-th cv done
36-th cv done
37-th cv done
38-th cv done
39-th cv done
40-th cv done
41-th cv done
42-th cv done
43-th cv done
44-th cv done
45-th cv done
46-th cv done
47-th cv done
48-th cv done
49-th cv done
50-th cv done
51-th cv done
52-th cv done
53-th cv done
54-th cv done
55-th cv done
56-th cv done
57-th cv done
58-th cv done
59-th cv done
60-th cv done
61-th cv done
62-th cv done
63-th cv done
64-th cv done
65-th cv done
66-th cv done
67-th cv done
68-th cv done
69-th cv done
70-th cv done
71-th cv done
72-th cv done
7



1-th cv done
2-th cv done
3-th cv done
4-th cv done
5-th cv done
6-th cv done
7-th cv done
8-th cv done
9-th cv done
10-th cv done
11-th cv done
12-th cv done
13-th cv done
14-th cv done
15-th cv done
16-th cv done
17-th cv done
18-th cv done
19-th cv done
20-th cv done
21-th cv done
22-th cv done
23-th cv done
24-th cv done
25-th cv done
26-th cv done
27-th cv done
28-th cv done
29-th cv done
30-th cv done
31-th cv done
32-th cv done
33-th cv done
34-th cv done
35-th cv done
36-th cv done
37-th cv done
38-th cv done
39-th cv done
40-th cv done
41-th cv done
42-th cv done
43-th cv done
44-th cv done
45-th cv done
46-th cv done
47-th cv done
48-th cv done
49-th cv done
50-th cv done
51-th cv done
52-th cv done
53-th cv done
54-th cv done
55-th cv done
56-th cv done
57-th cv done
58-th cv done
59-th cv done
60-th cv done
61-th cv done
62-th cv done
63-th cv done
64-th cv done
65-th cv done
66-th cv done
67-th cv done
68-th cv done
69-th cv done
70-th cv done
71-th cv done
72-th cv done
7

### ELO reference

In [11]:
ELO_all = pd.read_csv("../../data/nfl/nfl_elo.csv",na_values='')
season_list = range(2009,2016)
elo_list = []

for season in season_list:
    print(str(season))
    elo_list.append(get_elo_rank_season(elo_all = ELO_all, season = season))

2009
2010
2011
2012
2013
2014
2015


In [12]:
a = elo_list[6]
b = bt_list[6].sort_index()
c = pd.concat([a,b],axis = 1)
c.columns = ['rank_elo','rank_bt']
c.sort_values(by = ['rank_elo'])

Unnamed: 0,rank_elo,rank_bt
SEA,1.0,4
CAR,2.0,1
ARI,3.0,2
KC,4.0,3
DEN,5.0,6
NE,6.0,14
PIT,7.0,8
CIN,8.0,7
GB,9.0,9
MIN,10.0,5


In [13]:
av_dif = []
for i in range(7):
    a = elo_list[i]
    b = bt_list[i].sort_index()
    av_dif.append(np.mean(abs(a-b)))

In [14]:
av_dif

[rank    3.125
 dtype: float64, rank    3.0625
 dtype: float64, rank    2.625
 dtype: float64, rank    3.1875
 dtype: float64, rank    2.625
 dtype: float64, rank    1.9375
 dtype: float64, rank    2.8125
 dtype: float64]