In [21]:
import numpy as np
import pandas as pd
from itertools import combinations
import matplotlib.pyplot as plt
%matplotlib inline
import scipy as sc
import time
import sys

In [22]:
def score_bagging(Pij, stats1, namesCols, alpha, N, Nsims = 2500, method = 0):
    
    if N == 0:
        Pij_group = Pij
        teamnum = np.arange(32)
        if method == 0:
            # making the teleporting probabilities uniform
            v = np.ones(len(teamnum)-N)/(len(teamnum)-N)
            v = v[np.newaxis]
            v = v.T
            # Setting alpha
            alpha = alpha
            # Setting eT
            eT = np.ones(len(teamnum)-N)[np.newaxis]   
        else:
            # making the teleporting probabilities proportional to
            # the teams winning percentages
            v = np.array(stats1[namesCols])
            v = np.delete(v, group)
            v = v[np.newaxis]
            v = v.T
            v = v/np.sum(v)
            # Setting alpha
            alpha = alpha
            # Setting eT
            eT = np.ones(len(teamnum)-N)[np.newaxis]
                
        # initializing our first guess
        x0 = np.ones(len(teamnum)-N)/len(teamnum)-N
        x0 = x0[np.newaxis]
        x0 = x0.T
            
        # performing the simulation (so our scores are stored in x0)
        for i in range(Nsims):
            x0 = alpha*np.dot(Pij_group,x0) + (1-alpha)*v
        return x0
        
    
    # setting a vector to contain each of the 32 team numbers
    teamnum = np.arange(32)
    # Getting the possible combinations of this to use
    combs = list(combinations(teamnum, N))
    ncombs = len(combs)
    # initializing a matrix to store each of these values
    scores = np.zeros((len(teamnum),ncombs))
    # We iterate over all of these combinations, each time computing a final score vector
    for counter, group in enumerate(combs):
        # Adjusting Pij (first deleting the specified rows, then columns)
        Pij_group = np.delete(Pij,group,axis = 0)
        Pij_group = np.delete(Pij_group,group,axis = 1)
        
        # Ensuring Pij has no zero columns
        if len(np.where(Pij_group.sum(axis = 0) == 0)[0]) > 0:
                empty_vect = np.where(Pij_group.sum(axis = 0) == 0)[0]
                for iCnt in range(len(empty_vect)):
                    Pij_group[empty_vect[iCnt],empty_vect[iCnt]] = 1
        
        # -----------Getting the Scores (FROM PREVIOUS CODE)------------------------
        # If we want the teleportation probabilties unifrom,
        # or based on team winning perc
        if method == 0:
            # making the teleporting probabilities uniform
            v = np.ones(len(teamnum)-N)/(len(teamnum)-N)
            v = v[np.newaxis]
            v = v.T
            # Setting alpha
            alpha = alpha
            # Setting eT
            eT = np.ones(len(teamnum)-N)[np.newaxis]   
        else:
            # making the teleporting probabilities proportional to
            # the teams winning percentages
            v = np.array(stats1[namesCols])
            v = np.delete(v, group)
            v = v[np.newaxis]
            v = v.T
            v = v/np.sum(v)
            # Setting alpha
            alpha = alpha
            # Setting eT
            eT = np.ones(len(teamnum)-N)[np.newaxis]
                
        # initializing our first guess
        x0 = np.ones(len(teamnum)-N)/len(teamnum)-N
        x0 = x0[np.newaxis]
        x0 = x0.T
            
        # performing the simulation (so our scores are stored in x0)
        for i in range(Nsims):
            x0 = alpha*np.dot(Pij_group,x0) + (1-alpha)*v

        # ---------------------- (END OF PREVIOUS CODE) ------------------------
        # getting a vector of the values of the teams computed
        kept_vect = np.delete(teamnum,group)
        # Updating the matrix
        scores[kept_vect,counter] = np.squeeze(x0)
    return scores    

In [23]:
def final_score(S, x_bar, beta):
    # getting the minimum vector
    min_vect = np.zeros(32)
    # computing the minimums
    for iCnt in range(len(min_vect)):
        # accounting for winless teams
        if len(S[iCnt,np.nonzero(S[iCnt,])[0]]) == 0:
            min_vect[iCnt] = 0
        else:
            min_vect[iCnt] =  np.min(S[iCnt,np.nonzero(S[iCnt,])[0]])
    # normalizing the minimum vector
    min_vect = min_vect/np.sum(min_vect)
    # computing x
    x = (1-beta)*x_bar + beta*min_vect
    # normalizing x
    x = x/np.sum(x)
    # returning the value
    return np.squeeze(x)

In [24]:
# reading in the data
data = pd.read_csv("Pij_2017.csv",index_col=0)
stats1 = 1
beta = 1
alpha = 0.4

# converting to a numpy array
data2 = np.array(data)
            
data2[data2!=0] = 1
            
# making the Pij matrix
Pij = data2/data2.sum(axis=0)
            
# If we want the teleportation probabilties unifrom,
# or based on team winning perc

# making the teleporting probabilities uniform
v = np.ones(32)/32
v = v[np.newaxis]
v = v.T
# Setting eT
eT = np.ones(32)[np.newaxis]   

                
# initializing our first guess
x0 = np.ones(32)/32
x0 = x0[np.newaxis]
x0 = x0.T
            
# performing the simulation
for i in range(5000):
    x0 = alpha*np.dot(Pij,x0) + (1-alpha)*v
            
x0 = np.squeeze(x0)
                
# ---------------------- NEW PART OF FUNCTION (ADAPTING x0 to xf) ---------------
            
scores_mat = score_bagging(Pij, stats1, "2018_win_perc", \
                            alpha, N = 2, Nsims = 5000)
            
xf = final_score(scores_mat, x0, beta)

In [25]:
print(xf)

[ 0.04158907  0.03036147  0.02980084  0.03163646  0.02778981  0.02864413
  0.02382135  0.02878312  0.025713    0.03660149  0.03054859  0.02910292
  0.0304025   0.03920452  0.02514368  0.03186547  0.02530375  0.03835376
  0.03841194  0.03469799  0.03415085  0.02590674  0.03706601  0.0275033
  0.03219632  0.03008609  0.03462563  0.02496513  0.04041797  0.0283015
  0.02948851  0.0275161 ]


In [26]:
teams = pd.read_csv('teams_2017.csv',index_col=0)
team_names = np.array(teams["team_name"])
print(team_names)

['NE' 'BUF' 'WAS' 'TEN' 'HOU' 'DET' 'CLE' 'CHI' 'CIN' 'LA' 'SF' 'GB' 'DAL'
 'MIN' 'DEN' 'BAL' 'TB' 'PIT' 'NO' 'KC' 'JAX' 'IND' 'CAR' 'OAK' 'LAC' 'SEA'
 'ATL' 'NYG' 'PHI' 'NYJ' 'ARI' 'MIA']


In [27]:
teamrank = {'Team': team_names, 'score': xf, 'wins': 32*16/2*xf}
teamrank_df = pd.DataFrame(data=teamrank)
teamrank_df = teamrank_df.sort_values(by=['score'], ascending = False)
print(teamrank_df)

   Team     score       wins
0    NE  0.041589  10.646803
28  PHI  0.040418  10.347001
13  MIN  0.039205  10.036358
18   NO  0.038412   9.833457
17  PIT  0.038354   9.818563
22  CAR  0.037066   9.488899
9    LA  0.036601   9.369980
19   KC  0.034698   8.882685
26  ATL  0.034626   8.864162
20  JAX  0.034151   8.742617
24  LAC  0.032196   8.242257
15  BAL  0.031865   8.157559
3   TEN  0.031636   8.098933
10   SF  0.030549   7.820438
12  DAL  0.030402   7.783039
1   BUF  0.030361   7.772537
25  SEA  0.030086   7.702039
2   WAS  0.029801   7.629016
30  ARI  0.029489   7.549058
11   GB  0.029103   7.450348
7   CHI  0.028783   7.368479
5   DET  0.028644   7.332898
29  NYJ  0.028302   7.245185
4   HOU  0.027790   7.114191
31  MIA  0.027516   7.044122
23  OAK  0.027503   7.040844
21  IND  0.025907   6.632126
8   CIN  0.025713   6.582528
16   TB  0.025304   6.477759
14  DEN  0.025144   6.436781
27  NYG  0.024965   6.391073
6   CLE  0.023821   6.098266
