In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
import time
from nba_api.stats.static import players
from nba_api.stats.static import teams
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.endpoints import teamgamelog

Reading in roster table of the past 5 NBA All Star games

In [116]:
all_star_by_year = pd.read_csv('All Star List.csv')
all_star_by_year.head()

Unnamed: 0,2015 2016 All Stars,Team,Starter?,Age,2016 2017 All Stars,Team.1,Starter?.1,Age.1,2017 2018 All Stars,Team.2,Starter?.2,Age.2,2018 2019 All Stars,Team.3,Starter?.4,Age.3,2019 2020 All Stars,Team.4,Starter?.3,Age.4
0,Stephen Curry,GSW,Y,27.0,Anthony Davis,NOP,Y,23.0,LeBron James,CLE,Y,33.0,LeBron James,LAL,Y,34,Kawhi Leonard,LAC,Y,28.0
1,Kobe Bryant,LAL,Y,37.0,James Harden,HOU,Y,27.0,Kevin Durant,GSW,Y,29.0,James Harden,HOU,Y,29,Anthony Davis,LAL,Y,26.0
2,Kawhi Leonard,SAS,Y,24.0,Stephen Curry,GSW,Y,28.0,Russell Westbrook,OKC,Y,29.0,Kevin Durant,GSW,Y,30,LeBron James,LAL,Y,35.0
3,Kevin Durant,OKC,Y,27.0,Kevin Durant,GSW,Y,28.0,Kyrie Irving,BOS,Y,25.0,Kyrie Irving,BOS,Y,26,James Harden,HOU,Y,30.0
4,Russell Westbrook,OKC,Y,27.0,Kawhi Leonard,SAS,Y,25.0,Anthony Davis,NOP,Y,24.0,Kawhi Leonard,TOR,Y,27,Luka Doncic,DAL,Y,20.0


Compiling a list of the unique players to make an NBA All Star team

In [117]:
all_stars = []

for year in range(2015, 2020):
    column_name = str(year) + ' ' + str(year + 1) + ' All Stars'
    all_stars = all_stars + list(all_star_by_year[column_name].values)
all_stars = list(set(all_stars))
all_stars = [x for x in all_stars if str(x) != 'nan']

Creating a list to serve as the index for our data tables

In [118]:
games = []
for season in range(2015, 2020):
    for game in range(1,83):
        games.append(['Game ' + str(game) + ' of the ' + str(season) + ' - ' + str(season + 1) + ' Season'])   

Getting a list of each player's identifier to query the NBA's Data API

In [124]:
player_dict = players.get_players()
all_star_ids = []
for all_star in all_stars:
    all_star_ids.append([player for player in player_dict if player['full_name'] == all_star][0]['id'])
player_name_id_dict = dict(zip(all_stars, all_star_ids))

Getting a list of each team's identifier to query the NBA's Data API

In [193]:
team_abbreviations  = ["ATL","BOS","BKN","CHA","CHI","CLE","DAL","DEN","DET","GSW","HOU","IND","LAC","LAL","MEM","MIA","MIL","MIN","NOP","NYK","OKC","ORL","PHI","PHX","POR","SAC","SAS","TOR","UTA","WAS"]

team_dict = teams.get_teams()
team_ids = []
for team_abbreviation in team_abbreviations:
    team_ids.append([team for team in team_dict if team['abbreviation'] == team_abbreviation][0]['id'])
team_abbreviation_id_dict = dict(zip(team_abbreviations, team_ids))

Reading in a table with the team each All Star held a roster spot on for each game of the past 5 regular seasons

In [196]:
player_team = pd.read_csv('player_team_gantt_chart.csv', index_col = [0])
player_team.head()

Unnamed: 0,John Wall,Khris Middleton,Nikola Vucevic,Devin Booker,Domantas Sabonis,Andre Drummond,LaMarcus Aldridge,Ben Simmons,Jimmy Butler,LeBron James,...,D'Angelo Russell,Pascal Siakam,DeMarcus Cousins,Kobe Bryant,Stephen Curry,Kyle Lowry,DeMar DeRozan,Trae Young,Anthony Davis,Klay Thompson
Game 1 of the 2015 - 2016 Season,WAS,MIL,ORL,PHX,,DET,SAS,PHI,CHI,CLE,...,LAL,,SAC,LAL,GSW,TOR,TOR,,NOP,GSW
Game 2 of the 2015 - 2016 Season,WAS,MIL,ORL,PHX,,DET,SAS,PHI,CHI,CLE,...,LAL,,SAC,LAL,GSW,TOR,TOR,,NOP,GSW
Game 3 of the 2015 - 2016 Season,WAS,MIL,ORL,PHX,,DET,SAS,PHI,CHI,CLE,...,LAL,,SAC,LAL,GSW,TOR,TOR,,NOP,GSW
Game 4 of the 2015 - 2016 Season,WAS,MIL,ORL,PHX,,DET,SAS,PHI,CHI,CLE,...,LAL,,SAC,LAL,GSW,TOR,TOR,,NOP,GSW
Game 5 of the 2015 - 2016 Season,WAS,MIL,ORL,PHX,,DET,SAS,PHI,CHI,CLE,...,LAL,,SAC,LAL,GSW,TOR,TOR,,NOP,GSW


Reading in a table of of the daily injury status of each All Star dating back five seasons

In [571]:
player_injury_status_df = pd.read_csv('Daily_Injury_Report.csv', index_col = [0])
player_injury_status_df.head()

Unnamed: 0,John Wall,Khris Middleton,Nikola Vucevic,Devin Booker,Domantas Sabonis,Andre Drummond,LaMarcus Aldridge,Ben Simmons,Jimmy Butler,LeBron James,...,D'Angelo Russell,Pascal Siakam,DeMarcus Cousins,Kobe Bryant,Stephen Curry,Kyle Lowry,DeMar DeRozan,Trae Young,Anthony Davis,Klay Thompson
"OCT 27, 2015",FULL,FULL,FULL,FULL,,FULL,FULL,,FULL,FULL,...,FULL,FULL,FULL,FULL,FULL,FULL,FULL,,FULL,FULL
"OCT 28, 2015",FULL,FULL,FULL,FULL,,FULL,FULL,,FULL,FULL,...,FULL,FULL,FULL,FULL,FULL,FULL,FULL,,FULL,FULL
"OCT 29, 2015",FULL,FULL,FULL,FULL,,FULL,FULL,,FULL,FULL,...,FULL,FULL,FULL,FULL,FULL,FULL,FULL,,FULL,FULL
"OCT 30, 2015",FULL,FULL,FULL,FULL,,FULL,FULL,,FULL,FULL,...,FULL,FULL,FULL,FULL,FULL,FULL,FULL,,FULL,FULL
"OCT 31, 2015",FULL,FULL,FULL,FULL,,FULL,FULL,,FULL,FULL,...,FULL,FULL,FULL,FULL,FULL,FULL,FULL,,FULL,FULL


Reading in a table of each team's playoff probabilites updated weekly but uexpressed daily for the past five seasons

In [573]:
teams_playoff_probabilities_df = pd.read_csv('Weekly NBA Playoff Probabilities.csv', index_col = [0])
teams_playoff_probabilities_df.head()

Unnamed: 0,PHI,MIL,CHI,CLE,BOS,LAC,MEM,ATL,MIA,CHA,...,DET,TOR,HOU,SAS,PHX,OKC,MIN,POR,GSW,WAS
"OCT 27, 2015",3.0,34.0,88.0,99.9,90.0,95.0,81.0,85.0,48.0,60.0,...,45.0,78.0,88.0,97.0,30.0,97.0,2.0,23.0,98.0,66.0
"OCT 28, 2015",3.0,34.0,88.0,99.9,90.0,95.0,81.0,85.0,48.0,60.0,...,45.0,78.0,88.0,97.0,30.0,97.0,2.0,23.0,98.0,66.0
"OCT 29, 2015",3.0,34.0,88.0,99.9,90.0,95.0,81.0,85.0,48.0,60.0,...,45.0,78.0,88.0,97.0,30.0,97.0,2.0,23.0,98.0,66.0
"OCT 30, 2015",3.0,34.0,88.0,99.9,90.0,95.0,81.0,85.0,48.0,60.0,...,45.0,78.0,88.0,97.0,30.0,97.0,2.0,23.0,98.0,66.0
"OCT 31, 2015",3.0,34.0,88.0,99.9,90.0,95.0,81.0,85.0,48.0,60.0,...,45.0,78.0,88.0,97.0,30.0,97.0,2.0,23.0,98.0,66.0


Reading in the date of birth of each All Star in our analysis and catsing the data into a dictionary

In [401]:
all_star_birthdays = pd.read_csv('player_date_of_birth.csv')
player_name_dob_dict = dict(zip(all_stars, all_star_birthdays.values[0]))

Requesting the NBA Data API for the game log of each team over the past five seasons

In [294]:
team_seasons_table = date_of_game = np.full((((len(player_team.index) // 82) + 1), len(team_abbreviations)), None)

index = 0
for season in range(((len(player_team.index) // 82) + 1)):
    for team in range(len(team_abbreviations)):
        team_seasons_table[season][team] = teamgamelog.TeamGameLog(season = (2015 + season), team_id = team_ids[team])
        index += 1
        if ((index%15) == 0):
            print(str(index / 1.5) + "% complete" )
        time.sleep(4)
team_seasons_table_df = pd.DataFrame(data = team_seasons_table, index = range(2015, 2020), columns = team_abbreviations)
team_seasons_table_df.head()     

In [295]:
team_seasons_table_df.head()

Unnamed: 0,ATL,BOS,BKN,CHA,CHI,CLE,DAL,DEN,DET,GSW,...,OKC,ORL,PHI,PHX,POR,SAC,SAS,TOR,UTA,WAS
2015,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...
2016,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...
2017,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...
2018,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...
2019,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...,<nba_api.stats.endpoints.teamgamelog.TeamGameL...


Requesting the NBA Data API for the game log of each All Star over the past five seasons

In [345]:
player_seasons_table = date_of_game = np.full((((len(player_team.index) // 82) + 1), len(all_stars)), None)

index = 0
for season in range(((len(player_team.index) // 82) + 1)):
    for all_star in range(len(all_stars)):
        player_seasons_table[season][all_star] = playergamelog.PlayerGameLog(season = (2015 + season), player_id = all_star_ids[all_star])
        index += 1
        if ((index%27) == 0):
            print(str(index / 2.7) + "% complete" )
        time.sleep(4)
player_seasons_table_df = pd.DataFrame(data = player_seasons_table, index = range(2015, 2020), columns = all_stars)
player_seasons_table_df.head()     

10.0% complete
20.0% complete
29.999999999999996% complete
40.0% complete
50.0% complete
59.99999999999999% complete
70.0% complete
80.0% complete
90.0% complete
100.0% complete


Unnamed: 0,John Wall,Khris Middleton,Nikola Vucevic,Devin Booker,Domantas Sabonis,Andre Drummond,LaMarcus Aldridge,Ben Simmons,Jimmy Butler,LeBron James,...,D'Angelo Russell,Pascal Siakam,DeMarcus Cousins,Kobe Bryant,Stephen Curry,Kyle Lowry,DeMar DeRozan,Trae Young,Anthony Davis,Klay Thompson
2015,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...
2016,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...
2017,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...
2018,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...
2019,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...,<nba_api.stats.endpoints.playergamelog.PlayerG...


Reading in a distance matrix with every teams home city

In [491]:
distances = pd.read_csv('NBA distance matrix.csv',header = None).values
distances_table_df = pd.DataFrame(data = distances, index = team_abbreviations, columns = team_abbreviations)
distances_table_df.head()

Unnamed: 0,ATL,BOS,BKN,CHA,CHI,CLE,DAL,DEN,DET,GSW,...,OKC,ORL,PHI,PHX,POR,SAC,SAS,TOR,UTA,WAS
ATL,0,937,748,226,589,556,720,1211,598,2130,...,756,402,666,1587,2171,2085,882,736,1582,543
BOS,937,0,189,721,850,550,1551,1767,612,2688,...,1494,1117,271,2298,2536,2628,1767,430,2096,394
BKN,748,189,0,394,715,408,1375,1633,485,2564,...,1329,939,82,2145,2446,2506,1585,346,1974,205
CHA,226,721,394,0,588,436,929,1357,506,2291,...,940,464,451,1781,2287,2242,1105,588,1726,330
CHI,589,850,715,588,0,308,806,919,237,1849,...,693,987,664,1453,1756,1791,1054,436,1259,594


Iterating through All Star and the games they have played over the past 5 seasons to create our label and feature  layers indexed by the athlete and game

In [None]:
games_id_layer = np.zeros(player_team.shape)
date_of_game_layer = np.full(player_team.shape,None)
participated_layer = np.zeros(player_team.shape)
team_playoff_probabilities_layer = np.zeros(player_team.shape)
opponent_layer = np.full(player_team.shape,None)
opponent_playoff_probabilities_layer = np.zeros(player_team.shape)
home_game_layer = np.zeros(player_team.shape)
age_layer = np.zeros(player_team.shape)
time_elapsed_layer = np.zeros(player_team.shape)
distance_traveled_layer = np.zeros(player_team.shape)
health_layer = np.zeros(player_team.shape)
day_to_day_layer = np.zeros(player_team.shape)
out_layer = np.zeros(player_team.shape)

games = list(player_team.index.values)
for all_star in range(len(all_stars)):
    for game in range(len(games)):
        season = 2015 + (game // 82)
        if (season == 2019):
            regular_season_games = len(games) - 4*82
        else:
            regular_season_games = 82
        game_of_the_season = (game % regular_season_games) + 1
        team  = player_team[all_stars[all_star]][games[game]]
        if (team not in team_abbreviations):
            games_id_layer[game][all_star] = None
            date_of_game_layer[game][all_star] = None
            participated_layer[game][all_star] = None
            team_playoff_probabilities_layer[game][all_star] = None
            opponent_layer[game][all_star] = None
            opponent_playoff_probabilities_layer[game][all_star] = None
            home_game_layer[game][all_star] = None
            age_layer[game][all_star] = None
            time_elapsed_layer[game][all_star] = None
            distance_traveled_layer[game][all_star] = None
            health_layer[game][all_star] = None
            day_to_day_layer[game][all_star] = None
            out_layer[game][all_star] = None

        else:
            team_game_log_df = team_seasons_table_df[team][season].get_data_frames()[0]
            player_game_log_df = player_seasons_table_df[all_stars[all_star]][season].get_data_frames()[0]
            game_id = team_game_log_df['Game_ID'][(regular_season_games - game_of_the_season)]
            games_id_layer[game][all_star] = game_id
            dates_played = list(team_game_log_df['GAME_DATE'])
            date = dates_played[(regular_season_games - game_of_the_season)]
            date_of_game_layer[game][all_star] = date
            if (game_id in list(player_game_log_df['Game_ID'])):
                participated_layer[game][all_star] = 1
            team_playoff_probabilities_layer[game][all_star] = teams_playoff_probabilities_df[team][date]
            matchup = team_game_log_df['MATCHUP'][(regular_season_games - game_of_the_season)]
            oposing_team = matchup[-3:]
            opponent_layer[game][all_star] = oposing_team
            opponent_playoff_probabilities_layer[game][all_star] = teams_playoff_probabilities_df[oposing_team][date]
            home_game = ('@' not in matchup)
            if (home_game):
                home_game_layer[game][all_star] = 1
            age = datetime.strptime(date, '%b %d, %Y') - datetime.strptime(player_name_dob_dict[all_stars[all_star]], '%b %d, %Y')
            age_layer[game][all_star] = age.days / 365.25
            index_of_last_game_played = dates_played.index(date) + 1
            journey_length = 0
            if (index_of_last_game_played < len(dates_played)):
                date_of_last_game_played = dates_played[index_of_last_game_played]
                days_since_last_game_played = (datetime.strptime(date, '%b %d, %Y') - datetime.strptime(date_of_last_game_played, '%b %d, %Y')).days
                previous_matchup = team_game_log_df['MATCHUP'][index_of_last_game_played]
                if (not home_game and ('@' in previous_matchup)):
                    journey_length = distances_table_df[previous_matchup[-3:]][oposing_team]
                elif (not home_game and ('@' not in previous_matchup)):
                    journey_length = distances_table_df[team][oposing_team]
                elif (home_game and ('@' in previous_matchup)):
                    journey_length = distances_table_df[previous_matchup[-3:]][team]
            else: 
                days_since_last_game_played = 10
                if (not home_game):
                    journey_length = distances_table_df[team][oposing_team]
            time_elapsed_layer[game][all_star] = days_since_last_game_played
            distance_traveled_layer[game][all_star] = journey_length
            injury_status = player_injury_status_df[all_stars[all_star]][date]
            if (injury_status == 'FULL'):
                health_layer[game][all_star] = 1
            if (injury_status == 'DTD'):
                day_to_day_layer[game][all_star] = 1
            if (injury_status == 'OUT'):
                out_layer[game][all_star] = 1


Exporting labels and faeture layers as .csvs

In [540]:
date_of_game_layer_df = pd.DataFrame(data = date_of_game_layer, index = games, columns = all_stars)
date_of_game_layer_df.to_csv('date_of_game_layer.csv', encoding='utf-8')

participated_layer_df = pd.DataFrame(data = participated_layer, index = games, columns = all_stars)
participated_layer_df.to_csv('participated_layer.csv', encoding='utf-8')

team_playoff_probabilities_layer_df = pd.DataFrame(data = team_playoff_probabilities_layer, index = games, columns = all_stars)
team_playoff_probabilities_layer_df.to_csv('team_playoff_probabilities_layer.csv', encoding='utf-8')

opponent_layer_df = pd.DataFrame(data = opponent_layer, index = games, columns = all_stars)
opponent_layer_df.to_csv('opponent_layer.csv', encoding='utf-8')

opponent_playoff_probabilities_layer_df = pd.DataFrame(data = opponent_playoff_probabilities_layer, index = games, columns = all_stars)
opponent_playoff_probabilities_layer_df.to_csv('opponent_playoff_probabilities_layer.csv', encoding='utf-8')

home_game_layer_df = pd.DataFrame(data = home_game_layer, index = games, columns = all_stars)
home_game_layer_df.to_csv('home_game_layer.csv', encoding='utf-8')

age_layer_df = pd.DataFrame(data = age_layer, index = games, columns = all_stars)
age_layer_df.to_csv('age_layer.csv', encoding='utf-8')

time_elapsed_layer_df = pd.DataFrame(data = time_elapsed_layer, index = games, columns = all_stars) 
time_elapsed_layer_df.to_csv('time_elapsed_layer.csv', encoding='utf-8')

distance_traveled_layer_df = pd.DataFrame(data = distance_traveled_layer, index = games, columns = all_stars)
distance_traveled_layer_df.to_csv('distance_traveled_layer.csv', encoding='utf-8')



Melting the data into a single table for the logistic deep neural network models

In [588]:
melted_data = []

for all_star in range(len(all_stars)):
    for game in range(len(games)):
        team  = player_team[all_stars[all_star]][games[game]]
        if (team not in team_abbreviations):
            continue
        else:
            observation = [date_of_game_layer[game][all_star],
                team_playoff_probabilities_layer[game][all_star], 
                opponent_layer[game][all_star], 
                opponent_playoff_probabilities_layer[game][all_star],
                home_game_layer[game][all_star],
                age_layer[game][all_star],
                time_elapsed_layer[game][all_star],
                distance_traveled_layer[game][all_star],
                health_layer[game][all_star],
                day_to_day_layer[game][all_star],
                out_layer[game][all_star],
                participated_layer[game][all_star]]    
        melted_data.append(observation)

melted_data_df = pd.DataFrame(data = melted_data, columns = ['Date of Game', 'Team Playoff Probability', 'Opponent', 'Opponenet Playoff Probability', 'Home Game Indicator', 'Player Age', 'Days Since Last Game', 'Distance Traveled', 'Healthy Indicator', 'Day to Day Indicator', 'Out Indicator', 'Participated'])
melted_data_df.head()                           

Unnamed: 0,Date of Game,Team Playoff Probability,Opponent,Opponenet Playoff Probability,Home Game Indicator,Player Age,Days Since Last Game,Distance Traveled,Healthy Indicator,Day to Day Indicator,Out Indicator,Participated
0,"OCT 28, 2015",66.0,ORL,43.0,0.0,25.141684,10.0,759.0,1.0,0.0,0.0,1.0
1,"OCT 30, 2015",66.0,MIL,34.0,0.0,25.147159,2.0,1067.0,1.0,0.0,0.0,1.0
2,"OCT 31, 2015",66.0,NYK,6.0,1.0,25.149897,1.0,635.0,1.0,0.0,0.0,1.0
3,"NOV 04, 2015",68.0,SAS,98.0,1.0,25.160849,4.0,0.0,1.0,0.0,0.0,1.0
4,"NOV 06, 2015",68.0,BOS,84.0,0.0,25.166324,2.0,394.0,1.0,0.0,0.0,1.0
