#Setup

##Google Drive

In [0]:
!pip install sportsreference
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import os

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)  



##Import Packages

In [0]:
import pickle
import pandas as pd
import numpy as np
from datetime import datetime
from sportsreference.nba.teams import Teams
from sportsreference.nba.roster import Roster
from sportsreference.nba.roster import Player
from sportsreference.nba.boxscore import Boxscore
from sportsreference.nba.boxscore import Boxscores
from sportsreference.nba.schedule import Schedule
import warnings
warnings.filterwarnings("ignore")

#Match data

##Boxscore

In [0]:
# Pulls all games between and including October 26, 2010 and November 11, 2019
games = Boxscores(datetime(2010, 10, 26), datetime(2019, 11, 27))
# games = Boxscores(datetime(2018, 11, 27), datetime(2019, 11, 27))
# Prints a dictionary of all results from January 1, 2018 and January 5,
# 2018

# Create a Dataframe with all matches
games_df = pd.DataFrame()
for i,j in games.games.items():
    Tmp =  pd.DataFrame(j)
    Tmp['Date_of_Match'] = i
    games_df = pd.concat([games_df, Tmp],ignore_index=True)

In [0]:
games_df.shape

(11844, 12)

##Players who played in each match

In [0]:
home_players = []
away_players = []
# counter = 0

for i in games_df['boxscore'].values:
    game_data = Boxscore(i)
    home_players.append([i.dataframe.index[0] for i in game_data.home_players])
    away_players.append([i.dataframe.index[0] for i in game_data.away_players])
    # counter += 1
    # print(counter)

games_df['home_players'] = home_players
games_df['away_players'] = away_players

#Unique players list

In [0]:
players_list = []
for i in range(0,len(games_df)):
    players_list.extend(games_df['home_players'][i])

for i in range(0,len(games_df)):
    players_list.extend(games_df['away_players'][i])

players_list = list(set(players_list))

# All players' stats

In [0]:
i = 0
all_players_data = pd.DataFrame()
for player_id in players_list:
  print(i)
  i+=1
  try:
    player = Player(player_id)
    player_name = player.name
    player_data = player.dataframe
    player_data['player_name'] = player_name
    all_players_data = pd.concat([all_players_data, player_data])
  except:
    continue
all_players_data = all_players_data.reset_index().rename(columns = {'level_0': 'season'}).set_index('player_name')

#NBA Schedule 2019-20

In [0]:
# NBA Team names and abbreviations
teams = {}
for t in Teams():
  teams[t.abbreviation] = t.name

In [0]:
# Fetching schedule from API
nba19_schedule = pd.DataFrame()
for team in teams.keys():
  schedule = Schedule(team)
  temp_df = schedule.dataframe
  temp_df["team_abbr"] = team
  temp_df["team_name"] = teams.get(team)

  nba19_schedule = pd.concat([nba19_schedule,temp_df],ignore_index=True)

nba19_schedule.sort_values(by=["datetime"], inplace=True)
nba19_schedule.drop_duplicates(subset='boxscore_index', inplace=True)

#Save data to pickle

##Save games data to pickle

In [0]:
games_df.to_pickle('games_df.p')

# save to drive
link = 'https://drive.google.com/open?id=1e8GS0L0xUXQDgiRorx__FQViQjHNza7c'
_, id = link.split("=")

# get the folder id where you want to save your file
file = drive.CreateFile({'parents':[{u'id': id}]})
file.SetContentFile('games_df.p')
file.Upload() 

##Save unique players list to pickle  

In [0]:
with open('players_list.pkl', 'wb') as f:
    pickle.dump(players_list, f)

# save to drive
link = 'https://drive.google.com/open?id=1e8GS0L0xUXQDgiRorx__FQViQjHNza7c'
_, id = link.split("=")

# get the folder id where you want to save your file
file = drive.CreateFile({'parents':[{u'id': id}]})
file.SetContentFile('players_list.pkl')
file.Upload() 

##Save players data to pickle

In [0]:
with open('all_players_data_all_season.pkl', 'wb') as f:
    pickle.dump(all_players_data, f)

# save to drive
link = 'https://drive.google.com/open?id=1e8GS0L0xUXQDgiRorx__FQViQjHNza7c'
_, id = link.split("=")

# get the folder id where you want to save your file
file = drive.CreateFile({'parents':[{u'id': id}]})
file.SetContentFile('all_players_data_all_season.pkl')
file.Upload() 

##Save schedule to pickle

In [0]:
with open('nba19_schedule.pkl', 'wb') as f:
    pickle.dump(nba19_schedule, f)

# save to drive
link = 'https://drive.google.com/open?id=1e8GS0L0xUXQDgiRorx__FQViQjHNza7c'
_, id = link.split("=")

# get the folder id where you want to save your file
file = drive.CreateFile({'parents':[{u'id': id}]})
file.SetContentFile('nba19_schedule.pkl')
file.Upload() 