In [None]:
# FINAL ANALYSIS REST DAYS
# CONVERT FINAL_ANALYSIS_YYYY_YY FILES TO JUST LOAD AND SAVE DATA TO CSV
# THEN RUN ACTUAL ANALYSIS HERE

#nba_team_lat_lon = get_team_lat_lon(nba_team_details)

In [2]:
import nba_api
from nba_api.stats.endpoints import leaguegamefinder, teamdetails, boxscoreadvancedv2, cumestatsteam
from nba_api.stats.static import teams

from geopy import geocoders
from geopy.geocoders import Nominatim

import pandas as pd
import numpy as np

import datetime

import networkx as nx

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [3]:
def get_team_ids():
    nba_teams = teams.get_teams()
    nba_team_ids = []
    for team in nba_teams:
        nba_team_ids.append(team['id'])
        
    return nba_team_ids

In [4]:
def get_team_details():
    nba_team_ids = get_team_ids()
    
    gn = Nominatim(user_agent='cs-5483-995-nba-travel')

    nba_team_details = {}

    for nba_team_id in nba_team_ids:
        result = teamdetails.TeamDetails(team_id = nba_team_id)
        team_details = result.get_data_frames()[0][['TEAM_ID','ABBREVIATION','NICKNAME','CITY']]
    
        team_details['FULL_NAME'] = team_details['CITY'] + " " + team_details['NICKNAME']

        city_lat_long = gn.geocode(team_details['CITY'][0])
    
        team_details['LAT'] = city_lat_long.raw['lat']
        team_details['LON'] = city_lat_long.raw['lon']
    
        team_details_dict = {'ID':nba_team_id, 'NICKNAME':team_details['NICKNAME'][0], 'CITY':team_details['CITY'][0], 
                         'FULL_NAME':team_details['FULL_NAME'][0], 'LAT':team_details['LAT'][0], 'LON':team_details['LON'][0]}
    
        nba_team_details[team_details['ABBREVIATION'][0]] = team_details_dict
        
    return nba_team_details

In [13]:
def load_nba_game_data(file_path='pd_data_files', base_filename='games_2016_17'):
    games = pd.read_csv(file_path+'/'+base_filename+'.csv')
    #games_adv = pd.read_csv(file_path+'/'+base_filename+'_adv.csv')
    
    #return games, games_adv
    return games

In [6]:
def get_edges_for_team(team_abbreviation, games):
    
    edge_pairs = []
    unique_edge_pairs = []
    unique_edge_pairs_with_weights = []
    edges = []

    games_for_team = games[games['TEAM_ABBREVIATION'] == team_abbreviation]
    games_for_team = games_for_team.reset_index(drop=True)
    only_home_teams = games_for_team['HOME_TEAM_ABBREVIATION'].tolist()
    
    for i in range(0,len(only_home_teams)-1):    
        if only_home_teams[i] != team_abbreviation or only_home_teams[i+1] != team_abbreviation:
            edges.append((only_home_teams[i], only_home_teams[i+1]))
            
            edge_pair = only_home_teams[i] + "->" + only_home_teams[i+1]
            
            if edge_pair not in unique_edge_pairs:
                unique_edge_pairs.append(edge_pair)
        
                edge_pair_with_weights = []
                edge_pair_with_weights.append(edge_pair)
                edge_pair_with_weights.append('')
                edge_pair_with_weights.append([{"NUMBER_GAMES_PLAYED": games_for_team.at[i, "NUMBER_GAMES_PLAYED"], 
                                                "OFF_EFF": games_for_team.at[i, "OFF_EFF"]*100,
                                                "DEF_EFF": games_for_team.at[i, "DEF_EFF"]*100,
                                                "WL": games_for_team.at[i, "WL"],
                                                "GAME_DATE": games_for_team.at[i, "GAME_DATE"],
                                                #"E_OFF_RATING": games_for_team.at[i, "E_OFF_RATING"],
                                                #"E_DEF_RATING": games_for_team.at[i, "E_DEF_RATING"]
                                               }])
                edge_pair_with_weights.append(1)
                unique_edge_pairs_with_weights.append(edge_pair_with_weights)
                
            else:
                index = next((idx for idx, val in enumerate(unique_edge_pairs_with_weights) if edge_pair in val), None)
        
                unique_edge_pairs_with_weights[index][2].append({"NUMBER_GAMES_PLAYED": games_for_team.at[i, "NUMBER_GAMES_PLAYED"], "OFF_EFF": games_for_team.at[i, "OFF_EFF"]*100,
                                                                "DEF_EFF": games_for_team.at[i, "DEF_EFF"]*100, "WL": games_for_team.at[i, "WL"], "GAME_DATE": games_for_team.at[i, "GAME_DATE"],
                                                                 #"E_OFF_RATING": games_for_team.at[i, "E_OFF_RATING"], "E_DEF_RATING": games_for_team.at[i, "E_DEF_RATING"]
                                                                })
        
                unique_edge_pairs_with_weights[index][3] += 1
            
    unique_edge_pairs_list = []
    for unique_edge_pair in unique_edge_pairs:
        unique_edge_pairs_list.append(unique_edge_pair.split('->'))
            
    return unique_edge_pairs_list, unique_edge_pairs_with_weights

In [76]:
def run_rest_day_analysis(nba_team_details, games_df, num_rest_days, edge_weight, rest_days_threshold):
    edges = {}
    unique_edge_pairs_with_weights = {}
    #single_weighted_edges = {}

    for key in nba_team_details:
        edges[key], unique_edge_pairs_with_weights[key] = get_edges_for_team(key, games_df)
        #single_weighted_edges[key] = convert_edge_weights(unique_edge_pairs_with_weights[key], edge_weight)
        
    #print(unique_edge_pairs_with_weights)
        
    #digraphs_for_games_df = {}
    
    #for key in nba_team_details:
       # digraphs_for_games_df[key] = nx.DiGraph(single_weighted_edges[key])
        #print(f'Team Abbrev: {key}, # Nodes: {len(networks_2016_17[key].nodes())}, # Edges: {len(networks_2016_17[key].edges())}\n')
        
    paths_len_game_ahead, num_distinct_paths, consecutive_paths_of_games_ahead_len_dict = get_paths_of_len_multiple_occur(unique_edge_pairs_with_weights, num_rest_days)
    paths_of_game_ahead_over_threshold = []
    exists_path_over_threshold = False
    #print(paths_len_game_ahead)
    
    for k, v in paths_len_game_ahead.items():
        if v > rest_days_threshold:
            exists_path_over_threshold = True
            paths_of_game_ahead_over_threshold.append(k)
            #print(f'{k}, {v}')
            
    #paths_of_game_ahead_over_threshold_sorted = sorted(paths_of_game_ahead_over_threshold, key=lambda x:x[0])
    paths_to_print = []
    if exists_path_over_threshold: 
        i = 0
        for key in nba_team_details:
            for path in consecutive_paths_of_games_ahead_len_dict[key]:
                if path[0] in paths_of_game_ahead_over_threshold:
                    paths_to_print.append(path)
                    i += 1
                    
        paths_to_print_sorted = sorted(paths_to_print, key=lambda x:x[0])
        for path in paths_to_print_sorted:
            print(path)
        print(f'Number of paths that have {num_rest_days} rest days: {i}')
    else:
        print(f'There are no paths that have {num_rest_days} rest days that also occur {rest_days_threshold} times')

In [72]:
def get_paths_of_len_multiple_occur(unique_edge_pairs_with_weights, num_rest_days):
    unique_edge_pairs_list_dict = {}
    consecutive_paths_of_games_ahead_len_dict = {}
    consecutive_paths_of_games_ahead_len_list = []

    games_ahead = 1
    
    for key in unique_edge_pairs_with_weights:

        unique_edge_pairs_list = []

        for edge_pair in unique_edge_pairs_with_weights[key]:
            edge_pair_list = []
            for edge_stats in edge_pair[2]:
                edge_pair_list = [edge_pair[0], edge_stats['NUMBER_GAMES_PLAYED'], edge_stats['OFF_EFF'], edge_stats['DEF_EFF'], edge_stats["WL"], edge_stats["GAME_DATE"]]
                unique_edge_pairs_list.append(edge_pair_list)
                edge_pair_list = []
        
        #print(f'number of unique_edge_pairs: {len(unique_edge_pairs_list)}')
        unique_edge_pairs_list_sorted = sorted(unique_edge_pairs_list, key=lambda x:x[1])
    
        unique_edge_pairs_list_dict[key] = unique_edge_pairs_list_sorted
        
        
    num_paths_analyzed = 0
    num_teams_analyzed = 0
    for key in unique_edge_pairs_list_dict:
    
        num_teams_analyzed += 1
        consecutive_paths_of_games_ahead_len = []

        len_edge_pairs_list = len(unique_edge_pairs_list_dict[key])
        #print(f'len_edge_pairs_list: {len_edge_pairs_list}')
        
        num_paths_analyzed_inner = 0
        
        for i, edge_pair in enumerate(unique_edge_pairs_list_dict[key]):
            #print(i)
            num_paths_analyzed_inner += 1
            
            game_date_diff_within_threshold = True
            if i+1 < len_edge_pairs_list:
                current_game_date = datetime.datetime.strptime(unique_edge_pairs_list_dict[key][i][5], "%Y-%m-%d")
                next_game_date = datetime.datetime.strptime(unique_edge_pairs_list_dict[key][i+1][5], "%Y-%m-%d")
                
                games_date_diff = abs((next_game_date - current_game_date).days)
                
                if games_date_diff != num_rest_days:
                    game_date_diff_within_threshold = False
                    
                
            
            # check if number games played is consecutive (i.e. 3->4->5) with no gaps
            cur_games_played = edge_pair[1]

            consecutive_games_played = True
            for j in range(1, games_ahead+1):
                if i+j < len_edge_pairs_list:
                    if unique_edge_pairs_list_dict[key][i+j][1] != cur_games_played+j:
                        consecutive_games_played=False
                        break
                else:
                    consecutive_games_played=False
                    break
            
            if consecutive_games_played and game_date_diff_within_threshold:
                #game_paths_len_games_ahead = [] 
                total_off_eff = 0
                total_def_eff = 0
                w_l_record = ""
                path_string = ""
                for j in range(i, i+games_ahead+1):
                    # get all values except num_games_player, then convert 
                    # team abbrev to one string value and average out off_eff and def_eff
                    total_off_eff += unique_edge_pairs_list_dict[key][j][2]
                    total_def_eff += unique_edge_pairs_list_dict[key][j][3]
                    path_string_len = len(unique_edge_pairs_list_dict[key][j][0])
                    path_string += unique_edge_pairs_list_dict[key][j][0][:path_string_len-3]
                    w_l_record += unique_edge_pairs_list_dict[key][j][4]
                    #game_paths_len_games_ahead.append(unique_edge_pairs_list_dict[key][j])

                avg_off_eff = total_off_eff / (games_ahead+1)
                avg_def_eff = total_def_eff / (games_ahead+1)
                # adds all nodes in path to path_string
                #path_string += unique_edge_pairs_list_dict[key][i+games_ahead][0][-3:]
                # adds only starting nodes to path string, removes final arrow
                path_string = path_string[:len(path_string)-2]


                game_paths_len_games_ahead = [path_string, avg_off_eff, avg_def_eff, w_l_record]

                consecutive_paths_of_games_ahead_len.append(game_paths_len_games_ahead)
                consecutive_paths_of_games_ahead_len_list.append(game_paths_len_games_ahead)
                
                
        consecutive_paths_of_games_ahead_len_sorted = sorted(consecutive_paths_of_games_ahead_len, key=lambda x:x[0])
        consecutive_paths_of_games_ahead_len_dict[key] = consecutive_paths_of_games_ahead_len
        num_paths_analyzed += num_paths_analyzed_inner
        
    totals = {}
    consecutive_paths_of_games_ahead_len_list_sorted = sorted(consecutive_paths_of_games_ahead_len_list, key=lambda x:x[0])
    for edge_path in consecutive_paths_of_games_ahead_len_list_sorted:
        if edge_path[0] in totals:
            totals[edge_path[0]] += 1
        else:
            totals[edge_path[0]] = 1

    num_distinct_paths = 0
    for k, v in totals.items():    
        if v > 1:
            #print(f'{k}, {v}')
            num_distinct_paths += 1
            
    print(f'num_paths_analyzed: {num_paths_analyzed}')
    print(f'num_teams_analyzed: {num_teams_analyzed}')

    return totals, num_distinct_paths, consecutive_paths_of_games_ahead_len_dict

In [11]:
nba_team_details = get_team_details()

In [41]:
games_2016_17 = load_nba_game_data()
games_2017_18 = load_nba_game_data(base_filename='games_2017_18')
games_2018_19 = load_nba_game_data(base_filename='games_2018_19')
games_2015_16 = load_nba_game_data(base_filename='games_2015_16')

In [79]:
run_rest_day_analysis(nba_team_details, games_2015_16, 3, "OFF_EFF", 2)

num_paths_analyzed: 1805
num_teams_analyzed: 30
['LAL->LAC', 78.24561403508773, 93.14814814814815, 'WW']
['LAL->LAC', 94.88681380871535, 94.36538461538461, 'WL']
['LAL->LAC', 95.3978767806914, 103.46527259148618, 'WW']
['MIL->BOS', 93.50553824852891, 94.44837340876944, 'WL']
['MIL->BOS', 106.10779816513761, 85.03847292098254, 'LL']
['MIL->BOS', 97.21489361702129, 85.52228930609544, 'LL']
Number of paths that have 3 rest days: 6


In [78]:
run_rest_day_analysis(nba_team_details, games_2016_17, 3, "OFF_EFF", 2)

num_paths_analyzed: 1798
num_teams_analyzed: 30
['LAL->SAS', 91.3235294117647, 96.98415942769545, 'WW']
['LAL->SAS', 87.65086206896552, 93.65615692718497, 'WW']
['LAL->SAS', 89.06050420168067, 98.3848690991548, 'WW']
['PHX->LAL', 84.14073451079338, 86.7797619047619, 'WL']
['PHX->LAL', 88.256076879593, 92.06009244992296, 'WL']
['PHX->LAL', 92.6900741534888, 91.6161616161616, 'LW']
Number of paths that have 3 rest days: 6


In [80]:
run_rest_day_analysis(nba_team_details, games_2017_18, 3, "OFF_EFF", 2)

num_paths_analyzed: 1778
num_teams_analyzed: 30
['PHX->SAC', 97.88379073756433, 93.5323826500297, 'LW']
['PHX->SAC', 96.9575979337064, 92.008547008547, 'LL']
['PHX->SAC', 89.90990990990991, 94.8923139736654, 'WW']
['SAC->LAC', 91.1418918918919, 88.01023268853379, 'LL']
['SAC->LAC', 83.19479857215707, 87.15552651232264, 'WW']
['SAC->LAC', 98.68793195108984, 83.2164541590771, 'LL']
Number of paths that have 3 rest days: 6


In [81]:
run_rest_day_analysis(nba_team_details, games_2018_19, 3, "OFF_EFF", 2)

num_paths_analyzed: 1785
num_teams_analyzed: 30
['LAC->POR', 86.10714285714286, 85.75529895602656, 'WL']
['LAC->POR', 108.74882958801497, 87.9211168751907, 'LL']
['LAC->POR', 98.83505351681958, 79.21198326771653, 'LL']
['LAL->POR', 90.3995756718529, 87.96328029375763, 'WL']
['LAL->POR', 93.24561403508773, 95.36202735317781, 'WW']
['LAL->POR', 89.6096096096096, 90.70372259051504, 'WL']
['LAL->POR', 93.46701056660288, 91.99646074646074, 'WL']
['MEM->UTA', 99.38202247191009, 90.44038668098818, 'LL']
['MEM->UTA', 93.85081314304679, 103.7881562881563, 'WW']
['MEM->UTA', 85.76125743415463, 95.51052631578948, 'WW']
['MIA->CLE', 84.84992784992787, 89.44208706584945, 'LW']
['MIA->CLE', 88.12697897215232, 97.00736377025035, 'WW']
['MIA->CLE', 89.27350427350427, 96.09849942285496, 'LW']
['PHX->DEN', 81.744119743407, 85.80822347771502, 'WW']
['PHX->DEN', 91.44230769230768, 104.72062084257206, 'WW']
['PHX->DEN', 93.12687312687314, 88.21680672268909, 'WL']
['POR->LAC', 90.70317725752508, 99.71414141

### Laplacian Matrices