#Setup

##Google Drive

In [0]:
!pip install sportsreference
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import os

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)  

Collecting sportsreference
[?25l  Downloading https://files.pythonhosted.org/packages/4a/16/64f2181463018c00df5612cd3319a7cbf4403bd7b5c56ba8db1b9bf21a8d/sportsreference-0.4.7-py2.py3-none-any.whl (373kB)
[K     |▉                               | 10kB 14.0MB/s eta 0:00:01[K     |█▊                              | 20kB 7.3MB/s eta 0:00:01[K     |██▋                             | 30kB 10.0MB/s eta 0:00:01[K     |███▌                            | 40kB 6.1MB/s eta 0:00:01[K     |████▍                           | 51kB 7.4MB/s eta 0:00:01[K     |█████▎                          | 61kB 8.7MB/s eta 0:00:01[K     |██████▏                         | 71kB 9.8MB/s eta 0:00:01[K     |███████                         | 81kB 10.9MB/s eta 0:00:01[K     |███████▉                        | 92kB 11.8MB/s eta 0:00:01[K     |████████▊                       | 102kB 9.6MB/s eta 0:00:01[K     |█████████▋                      | 112kB 9.6MB/s eta 0:00:01[K     |██████████▌                    

##Import Packages

In [0]:
import pickle
import pandas as pd
import numpy as np
from sportsreference.nba.teams import Teams
from sportsreference.nba.roster import Roster
from sportsreference.nba.roster import Player
from sportsreference.nba.boxscore import Boxscore
from sklearn.utils import shuffle
from statsmodels.formula.api import logit
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import seaborn as sns
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings("ignore")
np.random.seed(1)

##Download files

In [0]:
# choose a local (colab) directory to store the data.
local_download_path = os.path.expanduser('~/data/pickle/')
try:
  os.makedirs(local_download_path)
except: pass

# 2. Auto-iterate using the query syntax
#    https://developers.google.com/drive/v2/web/search-parameters
file_list = drive.ListFile(
    {'q': "'1e8GS0L0xUXQDgiRorx__FQViQjHNza7c' in parents"}).GetList()

for f in file_list:
  # 3. Create & download by id.
  print('title: %s, id: %s' % (f['title'], f['id']))
  fname = os.path.join(local_download_path, f['title'])
  print('downloading to {}'.format(fname))
  f_ = drive.CreateFile({'id': f['id']})
  f_.GetContentFile(fname)

title: Simulation_2019_20.xlsx, id: 1wUhiDOCQ1sYVab6GkbXzkBvXyBLbaCz4
downloading to /root/data/pickle/Simulation_2019_20.xlsx
title: logistic_model.pkl, id: 1r9655Pr_rfpAYjUj8Ev84OiNbhzc4TeB
downloading to /root/data/pickle/logistic_model.pkl
title: match_df_processed.pkl, id: 1ntpAAnbKO65Q_T7u-kec809CyURKXavp
downloading to /root/data/pickle/match_df_processed.pkl
title: all_players_data_all_season_processed.pkl, id: 1wuFPczmUxGikAqT-mKlu6uWhZXN9folv
downloading to /root/data/pickle/all_players_data_all_season_processed.pkl
title: all_players_data_all_season.pkl, id: 1Ngu5JnhvdPLE5VPo5LutFfZ2Q5uT9Lgg
downloading to /root/data/pickle/all_players_data_all_season.pkl
title: players_list.pkl, id: 17dLZoeKIEeAcS3ZffJf5kt_xEqUab6Gi
downloading to /root/data/pickle/players_list.pkl
title: games_df.p, id: 18opuKiNf9IM7lbh3xa9f5k7rGH056RTt
downloading to /root/data/pickle/games_df.p


#Load data

In [0]:
# retrieve players' stats data from pickle files
all_players_data_all_season_file = '/root/data/pickle/all_players_data_all_season_processed.pkl'
with open(all_players_data_all_season_file, 'rb') as f:
  all_players_data = pickle.load(f)

# retrieve model data from pickle files
model_file = '/root/data/pickle/logistic_model.pkl'
with open(model_file, 'rb') as f:
  model = pickle.load(f)

#Create probability dataframe

##Get team names

In [0]:
team_names = []
team_ids = []
teams = Teams()
for team in teams:
    team_names.append(team.name)
    team_ids.append(team.abbreviation)

##Read excel for starting fives

In [0]:
team_raw_data = pd.DataFrame()
for team in team_ids:
  Tmp = pd.read_excel('/root/data/pickle/Simulation_2019_20.xlsx',sheet_name=team)
  Tmp['Team'] = team
  team_raw_data = pd.concat([team_raw_data, Tmp], ignore_index=True)

##Impute None

In [0]:
## Imputing None values with 15
team_raw_data.loc[team_raw_data['player_efficiency_rating'] == 'None','player_efficiency_rating'] = 15
## Convert column to int
team_raw_data['player_efficiency_rating'] = team_raw_data['player_efficiency_rating'].astype(int)

##Flooring for abnormally high rated players

In [0]:
team_raw_data.loc[team_raw_data['player_efficiency_rating'] >= 35, 'player_efficiency_rating'] = 15
team_raw_data.loc[team_raw_data['player_efficiency_rating'] < 0, 'player_efficiency_rating'] = 0

In [0]:
# long to wide format
team_raw_data = team_raw_data.groupby(['Team','final_position'])['player_efficiency_rating'].mean().reset_index()
Team_data = team_raw_data.pivot_table(values='player_efficiency_rating',index='Team',columns='final_position').reset_index().rename_axis("", axis="columns")

##Cartesian for team match-ups

In [0]:
Team_data = Team_data[['Team', 'PG', 'SG', 'SF', 'PF', 'C', 'Bench']]
Team_data['dummy_col'] = 1

Team_data = Team_data.merge(Team_data, left_on='dummy_col', right_on='dummy_col', how='inner').drop(columns=['dummy_col'])

Team_data.columns = ['Home_team',
                    'PG_home_rating',
                    'SG_home_rating',
                    'SF_home_rating',
                    'PF_home_rating',
                    'C_home_rating',
                    'bench_home_rating',
                    'Away_team',
                    'PG_away_rating',
                    'SG_away_rating',
                    'SF_away_rating',
                    'PF_away_rating',
                    'C_away_rating',
                    'bench_away_rating']

Team_data = Team_data.set_index(['Home_team', 'Away_team'])

##Predictions

In [0]:
predictions = model.predict(Team_data)
prob_df = predictions.unstack()

##Save predictions to pickle

In [0]:
with open('prob_df.pkl', 'wb') as f:
    pickle.dump(prob_df, f)

# save to drive
link = 'https://drive.google.com/open?id=1e8GS0L0xUXQDgiRorx__FQViQjHNza7c'
_, id = link.split("=")

# get the folder id where you want to save your file
file = drive.CreateFile({'parents':[{u'id': id}]})
file.SetContentFile('prob_df.pkl')
file.Upload() 