# Stochastic Framework Through Year Calendar

In [9]:
import pandas as pd
import numpy as np

from scipy.stats import spearmanr, kendalltau

In [11]:
schedules = pd.read_csv("data/Schedules_1979-2023.csv")
schedules.head()
    

Unnamed: 0,Date,Year,HomeTm,AwayTm,Home,Away
0,1978-10-13,1979,DEN,SAS,Denver Nuggets,San Antonio Spurs
1,1978-10-13,1979,DET,NJN,Detroit Pistons,New Jersey Nets
2,1978-10-13,1979,IND,ATL,Indiana Pacers,Atlanta Hawks
3,1978-10-13,1979,NYK,HOU,New York Knicks,Houston Rockets
4,1978-10-13,1979,PHI,LAL,Philadelphia 76ers,Los Angeles Lakers


In [105]:
len(schedules[schedules.Year == 2021])

1080

In [12]:
def run_through_calendar(year, gamma, model_pred):

     schedules_year = schedules.copy()[schedules.Year==year].reset_index(drop=True)
     model_pred_year = model_pred.copy()[model_pred.Year == year].reset_index(drop=True)
     # Take list of teams as home and away team and initiate the keys of pred_dic
     Tm = pd.unique(schedules_year[['HomeTm', 'AwayTm']].values.ravel('K'))
     pred_dic = {value: 0 for value in Tm}

     for k in range(len(schedules_year)):
          home_pred = model_pred_year[model_pred_year.Tm == schedules_year.loc[k, "HomeTm"]]['Pred'].item()
          away_pred = model_pred_year[model_pred_year.Tm == schedules_year.loc[k, "AwayTm"]]['Pred'].item()
          
          if home_pred + gamma > away_pred:
               pred_dic[schedules_year.loc[k, "HomeTm"]] +=1
          
          elif home_pred + gamma < away_pred:
               pred_dic[schedules_year.loc[k, "AwayTm"]] +=1
          
          else: # Rare case of equality: then just random
               outcome = np.random.randint(2)
               pred_dic[schedules_year.loc[k, "HomeTm"]] += outcome
               pred_dic[schedules_year.loc[k, "AwayTm"]] += 1-outcome
     
     return pred_dic


In [25]:
pred_LR = pd.read_csv('data/pred_test_LR.csv')
pred_LR.head()

Unnamed: 0,Year,Tm,Real,Pred
0,2014,BOS,25.0,33.722703
1,2014,NYK,37.0,48.635391
2,2014,PHI,19.0,28.445007
3,2014,ATL,38.0,47.303396
4,2014,CLE,33.0,31.40012


In [123]:
list = []

In [155]:
year = 2023
pred_dict = run_through_calendar(year, 15, pred_LR)

In [156]:
pred_LR_year = pred_LR[pred_LR.Year == year]
pred_LR_year.drop('Pred', axis=1, inplace=True)
pred_LR_year['Pred'] = pred_LR_year['Tm'].map(pred_dict)
pred_LR_year

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_LR_year.drop('Pred', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_LR_year['Pred'] = pred_LR_year['Tm'].map(pred_dict)


Unnamed: 0,Year,Tm,Real,Pred
262,2023,BOS,57.0,52
263,2023,NYK,47.0,42
264,2023,PHI,54.0,53
265,2023,ATL,41.0,48
266,2023,CLE,51.0,51
267,2023,DET,17.0,14
268,2023,HOU,22.0,5
269,2023,SAS,22.0,29
270,2023,CHI,40.0,44
271,2023,DEN,53.0,55


In [157]:
spmr, _ = spearmanr(pred_LR_year['Real'], pred_LR_year['Pred'])
ktau, _ = kendalltau(pred_LR_year['Real'], pred_LR_year['Pred'])
spmr, ktau

(0.7920492147474439, 0.621610991828358)

In [158]:
list.append(pred_LR_year)

In [159]:
list

[     Year   Tm  Real  Pred
 112  2018  BOS  55.0    48
 113  2018  NYK  29.0    31
 114  2018  PHI  52.0    31
 115  2018  ATL  24.0    29
 116  2018  CLE  50.0    58
 117  2018  DET  39.0    37
 118  2018  HOU  65.0    59
 119  2018  SAS  47.0    58
 120  2018  CHI  27.0    35
 121  2018  DEN  46.0    40
 122  2018  IND  48.0    35
 123  2018  MIL  44.0    38
 124  2018  GSW  58.0    73
 125  2018  LAL  35.0    26
 126  2018  PHO  21.0    27
 127  2018  POR  49.0    40
 128  2018  UTA  48.0    40
 129  2018  DAL  24.0    35
 130  2018  LAC  42.0    52
 131  2018  SAC  27.0    26
 132  2018  MIA  44.0    40
 133  2018  ORL  25.0    28
 134  2018  MIN  47.0    50
 135  2018  TOR  59.0    53
 136  2018  WAS  43.0    42
 137  2018  MEM  22.0    43
 138  2018  OKC  48.0    55
 139  2018  BRK  28.0    22
 140  2018  NOP  48.0    40
 141  2018  CHO  36.0    39,
      Year   Tm  Real  Pred
 142  2019  BOS  49.0    51
 143  2019  NYK  17.0    25
 144  2019  PHI  51.0    52
 145  2019  ATL  29

In [160]:
big_df = pd.concat(list, ignore_index=True)
big_df

Unnamed: 0,Year,Tm,Real,Pred
0,2018,BOS,55.0,48
1,2018,NYK,29.0,31
2,2018,PHI,52.0,31
3,2018,ATL,24.0,29
4,2018,CLE,50.0,58
...,...,...,...,...
175,2023,MEM,51.0,51
176,2023,OKC,40.0,20
177,2023,BRK,45.0,46
178,2023,NOP,42.0,44


In [162]:
big_df.to_csv("data/pred_test_stoch.csv", index=None)

In [43]:
def tau_rau(year):
    pred_year = 

SyntaxError: invalid syntax (3107900305.py, line 2)

In [47]:
spmr, _ = spearmanr(res_yr['Real'], res_yr['Pred'])
ktau, _ = kendalltau(res_yr['Real'], res_yr['Pred'])

NameError: name 'res_yr' is not defined

In [8]:
target = pd.read_csv("data/target_1979-2023.csv")

In [173]:
target

Unnamed: 0,Tm,Year,W/L%
0,WSB,1979,0.659
1,SEA,1979,0.634
2,PHO,1979,0.610
3,SAS,1979,0.585
4,KCK,1979,0.585
...,...,...,...
1241,POR,2023,0.402
1242,CHO,2023,0.329
1243,HOU,2023,0.268
1244,SAS,2023,0.268
