In [None]:
# FINAL ANALYSIS TRAVEL SCHEDULE
# CONVERT FINAL_ANALYSIS_YYYY_YY FILES TO JUST LOAD AND SAVE DATA TO CSV
# THEN RUN ACTUAL ANALYSIS HERE

#nba_team_lat_lon = get_team_lat_lon(nba_team_details)

In [4]:
import nba_api
from nba_api.stats.endpoints import leaguegamefinder, teamdetails, boxscoreadvancedv2, cumestatsteam
from nba_api.stats.static import teams

from geopy import geocoders
from geopy.geocoders import Nominatim

import pandas as pd
import numpy as np

import datetime

import networkx as nx

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [5]:
def get_team_ids():
    nba_teams = teams.get_teams()
    nba_team_ids = []
    for team in nba_teams:
        nba_team_ids.append(team['id'])
        
    return nba_team_ids

In [6]:
def get_team_details():
    nba_team_ids = get_team_ids()
    
    gn = Nominatim(user_agent='cs-5483-995-nba-travel')

    nba_team_details = {}

    for nba_team_id in nba_team_ids:
        result = teamdetails.TeamDetails(team_id = nba_team_id)
        team_details = result.get_data_frames()[0][['TEAM_ID','ABBREVIATION','NICKNAME','CITY']]
    
        team_details['FULL_NAME'] = team_details['CITY'] + " " + team_details['NICKNAME']

        city_lat_long = gn.geocode(team_details['CITY'][0])
    
        team_details['LAT'] = city_lat_long.raw['lat']
        team_details['LON'] = city_lat_long.raw['lon']
    
        team_details_dict = {'ID':nba_team_id, 'NICKNAME':team_details['NICKNAME'][0], 'CITY':team_details['CITY'][0], 
                         'FULL_NAME':team_details['FULL_NAME'][0], 'LAT':team_details['LAT'][0], 'LON':team_details['LON'][0]}
    
        nba_team_details[team_details['ABBREVIATION'][0]] = team_details_dict
        
    return nba_team_details

In [7]:
def load_nba_game_data(file_path='pd_data_files', base_filename='games_2016_17'):
    games = pd.read_csv(file_path+'/'+base_filename+'.csv')
    #games_adv = pd.read_csv(file_path+'/'+base_filename+'_adv.csv')
    
    #return games, games_adv
    return games

In [8]:
def get_edges_for_team(team_abbreviation, games):
    
    edge_pairs = []
    unique_edge_pairs = []
    unique_edge_pairs_with_weights = []
    edges = []

    games_for_team = games[games['TEAM_ABBREVIATION'] == team_abbreviation]
    games_for_team = games_for_team.reset_index(drop=True)
    only_home_teams = games_for_team['HOME_TEAM_ABBREVIATION'].tolist()
    
    for i in range(0,len(only_home_teams)-1):    
        if only_home_teams[i] != team_abbreviation or only_home_teams[i+1] != team_abbreviation:
            edges.append((only_home_teams[i], only_home_teams[i+1]))
            
            edge_pair = only_home_teams[i] + "->" + only_home_teams[i+1]
            
            if edge_pair not in unique_edge_pairs:
                unique_edge_pairs.append(edge_pair)
        
                edge_pair_with_weights = []
                edge_pair_with_weights.append(edge_pair)
                edge_pair_with_weights.append('')
                edge_pair_with_weights.append([{"NUMBER_GAMES_PLAYED": games_for_team.at[i, "NUMBER_GAMES_PLAYED"], 
                                                "OFF_EFF": games_for_team.at[i, "OFF_EFF"]*100,
                                                "DEF_EFF": games_for_team.at[i, "DEF_EFF"]*100,
                                                "WL": games_for_team.at[i, "WL"],
                                                #"E_OFF_RATING": games_for_team.at[i, "E_OFF_RATING"],
                                                #"E_DEF_RATING": games_for_team.at[i, "E_DEF_RATING"]
                                               }])
                edge_pair_with_weights.append(1)
                unique_edge_pairs_with_weights.append(edge_pair_with_weights)
                
            else:
                index = next((idx for idx, val in enumerate(unique_edge_pairs_with_weights) if edge_pair in val), None)
        
                unique_edge_pairs_with_weights[index][2].append({"NUMBER_GAMES_PLAYED": games_for_team.at[i, "NUMBER_GAMES_PLAYED"], "OFF_EFF": games_for_team.at[i, "OFF_EFF"]*100,
                                                                "DEF_EFF": games_for_team.at[i, "DEF_EFF"]*100, "WL": games_for_team.at[i, "WL"],
                                                                 #"E_OFF_RATING": games_for_team.at[i, "E_OFF_RATING"], "E_DEF_RATING": games_for_team.at[i, "E_DEF_RATING"]
                                                                })
        
                unique_edge_pairs_with_weights[index][3] += 1
            
    unique_edge_pairs_list = []
    for unique_edge_pair in unique_edge_pairs:
        unique_edge_pairs_list.append(unique_edge_pair.split('->'))
            
    return unique_edge_pairs_list, unique_edge_pairs_with_weights

In [9]:
def get_paths_of_len_multiple_occur(unique_edge_pairs_with_weights, games_ahead):
    unique_edge_pairs_list_dict = {}
    consecutive_paths_of_games_ahead_len_dict = {}
    consecutive_paths_of_games_ahead_len_list = []

    for key in unique_edge_pairs_with_weights:

        unique_edge_pairs_list = []

        for edge_pair in unique_edge_pairs_with_weights[key]:
            edge_pair_list = []
            for edge_stats in edge_pair[2]:
                edge_pair_list = [edge_pair[0], edge_stats['NUMBER_GAMES_PLAYED'], edge_stats['OFF_EFF'], edge_stats['DEF_EFF'], edge_stats["WL"]]
                unique_edge_pairs_list.append(edge_pair_list)
                edge_pair_list = []
        
        unique_edge_pairs_list_sorted = sorted(unique_edge_pairs_list, key=lambda x:x[1])
    
        unique_edge_pairs_list_dict[key] = unique_edge_pairs_list_sorted
        
    for key in unique_edge_pairs_list_dict:
    
        consecutive_paths_of_games_ahead_len = []

        len_edge_pairs_list = len(unique_edge_pairs_list_dict[key])
        for i, edge_pair in enumerate(unique_edge_pairs_list_dict[key]):
            # check if number games played is consecutive (i.e. 3->4->5) with no gaps
            cur_games_played = edge_pair[1]

            consecutive_games_played = True
            for j in range(1, games_ahead+1):
                if i+j < len_edge_pairs_list:
                    if unique_edge_pairs_list_dict[key][i+j][1] != cur_games_played+j:
                        consecutive_games_played=False
                        break
                else:
                    consecutive_games_played=False
                    break
                    
            if consecutive_games_played:
                #game_paths_len_games_ahead = [] 
                total_off_eff = 0
                total_def_eff = 0
                w_l_record = ""
                path_string = ""
                for j in range(i, i+games_ahead+1):
                    # get all values except num_games_player, then convert 
                    # team abbrev to one string value and average out off_eff and def_eff
                    total_off_eff += unique_edge_pairs_list_dict[key][j][2]
                    total_def_eff += unique_edge_pairs_list_dict[key][j][3]
                    path_string_len = len(unique_edge_pairs_list_dict[key][j][0])
                    path_string += unique_edge_pairs_list_dict[key][j][0][:path_string_len-3]
                    w_l_record += unique_edge_pairs_list_dict[key][j][4]
                    #game_paths_len_games_ahead.append(unique_edge_pairs_list_dict[key][j])

                avg_off_eff = total_off_eff / (games_ahead+1)
                avg_def_eff = total_def_eff / (games_ahead+1)
                # adds all nodes in path to path_string
                #path_string += unique_edge_pairs_list_dict[key][i+games_ahead][0][-3:]
                # adds only starting nodes to path string, removes final arrow
                path_string = path_string[:len(path_string)-2]


                game_paths_len_games_ahead = [path_string, avg_off_eff, avg_def_eff, w_l_record]

                consecutive_paths_of_games_ahead_len.append(game_paths_len_games_ahead)
                consecutive_paths_of_games_ahead_len_list.append(game_paths_len_games_ahead)

        consecutive_paths_of_games_ahead_len_dict[key] = consecutive_paths_of_games_ahead_len
        
    totals = {}
    for edge_path in consecutive_paths_of_games_ahead_len_list:
        if edge_path[0] in totals:
            totals[edge_path[0]] += 1
        else:
            totals[edge_path[0]] = 1

    num_distinct_paths = 0
    for k, v in totals.items():    
        if v > 1:
            #print(f'{k}, {v}')
            num_distinct_paths += 1

    return totals, num_distinct_paths, consecutive_paths_of_games_ahead_len_dict

In [14]:
def run_travel_schedule_analysis(nba_team_details, games_df, games_ahead, edge_weight, travel_path_threshold):
    edges = {}
    unique_edge_pairs_with_weights = {}
    #single_weighted_edges = {}

    for key in nba_team_details:
        edges[key], unique_edge_pairs_with_weights[key] = get_edges_for_team(key, games_df)
        #single_weighted_edges[key] = convert_edge_weights(unique_edge_pairs_with_weights[key], edge_weight)
        
    #digraphs_for_games_df = {}
    
    #for key in nba_team_details:
       # digraphs_for_games_df[key] = nx.DiGraph(single_weighted_edges[key])
        #print(f'Team Abbrev: {key}, # Nodes: {len(networks_2016_17[key].nodes())}, # Edges: {len(networks_2016_17[key].edges())}\n')
        
    paths_len_game_ahead, num_distinct_paths, consecutive_paths_of_games_ahead_len_dict = get_paths_of_len_multiple_occur(unique_edge_pairs_with_weights, games_ahead)
    print(f'games_ahead: {games_ahead}, num_distinct_paths: {num_distinct_paths}')
    
    paths_of_game_ahead_over_threshold = []
    exists_path_over_threshold = False
    for k, v in paths_len_game_ahead.items():
        if v > travel_path_threshold:
            exists_path_over_threshold = True
            paths_of_game_ahead_over_threshold.append(k)
            #print(f'{k}, {v}')
            
    if exists_path_over_threshold: 
        i = 0
        for key in nba_team_details:
            for path in consecutive_paths_of_games_ahead_len_dict[key]:
                if path[0] in paths_of_game_ahead_over_threshold:
                    #print(path)
                    i += 1
        #print(f'Number of paths with {games_ahead+1} nodes: {i}')
    else:
        print(f'There are no paths with {games_ahead+1} nodes that exist more than {travel_path_threshold} times in the entire schedule')

In [11]:
nba_team_details = get_team_details()
nba_team_details['OKC']

{'ID': 1610612760,
 'NICKNAME': 'Thunder',
 'CITY': 'Oklahoma City',
 'FULL_NAME': 'Oklahoma City Thunder',
 'LAT': '35.4729886',
 'LON': '-97.5170536'}

In [12]:
games_2016_17 = load_nba_game_data()
games_2017_18 = load_nba_game_data(base_filename='games_2017_18')
games_2018_19 = load_nba_game_data(base_filename='games_2018_19')
games_2015_16 = load_nba_game_data(base_filename='games_2015_16')

In [15]:
run_travel_schedule_analysis(nba_team_details, games_2015_16, 1, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2015_16, 2, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2015_16, 3, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2015_16, 4, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2015_16, 5, "OFF_EFF", 2)

games_ahead: 1, num_distinct_paths: 350
games_ahead: 2, num_distinct_paths: 56
games_ahead: 3, num_distinct_paths: 0
There are no paths with 4 nodes that exist more than 2 times in the entire schedule
games_ahead: 4, num_distinct_paths: 0
There are no paths with 5 nodes that exist more than 2 times in the entire schedule
games_ahead: 5, num_distinct_paths: 0
There are no paths with 6 nodes that exist more than 2 times in the entire schedule


In [16]:
run_travel_schedule_analysis(nba_team_details, games_2016_17, 1, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2016_17, 2, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2016_17, 3, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2016_17, 4, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2016_17, 5, "OFF_EFF", 2)

games_ahead: 1, num_distinct_paths: 382
games_ahead: 2, num_distinct_paths: 41
There are no paths with 3 nodes that exist more than 2 times in the entire schedule
games_ahead: 3, num_distinct_paths: 2
There are no paths with 4 nodes that exist more than 2 times in the entire schedule
games_ahead: 4, num_distinct_paths: 0
There are no paths with 5 nodes that exist more than 2 times in the entire schedule
games_ahead: 5, num_distinct_paths: 0
There are no paths with 6 nodes that exist more than 2 times in the entire schedule


In [17]:
run_travel_schedule_analysis(nba_team_details, games_2017_18, 1, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2017_18, 2, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2017_18, 3, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2017_18, 4, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2017_18, 5, "OFF_EFF", 2)

games_ahead: 1, num_distinct_paths: 387
games_ahead: 2, num_distinct_paths: 46
games_ahead: 3, num_distinct_paths: 1
There are no paths with 4 nodes that exist more than 2 times in the entire schedule
games_ahead: 4, num_distinct_paths: 0
There are no paths with 5 nodes that exist more than 2 times in the entire schedule
games_ahead: 5, num_distinct_paths: 0
There are no paths with 6 nodes that exist more than 2 times in the entire schedule


In [18]:
run_travel_schedule_analysis(nba_team_details, games_2018_19, 1, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2018_19, 2, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2018_19, 3, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2018_19, 4, "OFF_EFF", 2)
run_travel_schedule_analysis(nba_team_details, games_2018_19, 5, "OFF_EFF", 2)

games_ahead: 1, num_distinct_paths: 370
games_ahead: 2, num_distinct_paths: 36
games_ahead: 3, num_distinct_paths: 2
There are no paths with 4 nodes that exist more than 2 times in the entire schedule
games_ahead: 4, num_distinct_paths: 0
There are no paths with 5 nodes that exist more than 2 times in the entire schedule
games_ahead: 5, num_distinct_paths: 0
There are no paths with 6 nodes that exist more than 2 times in the entire schedule


### Laplacian Matrices

In [None]:
nx.directed_combinatorial_laplacian_matrix(network_2016_17_okc_digraph)

### All Pairs Dijkstra's Shortest Path

In [None]:
paths_len_3_to_5 = []

for n, (dist, path) in nx.all_pairs_dijkstra(network_2016_17_okc_digraph):
    print(path)

In [None]:
# All pairs shortest path
dict(nx.all_pairs_shortest_path(network_2016_17_okc_digraph))

In [None]:
nx.dag_longest_path(network_2016_17_okc_digraph)

In [None]:
nx.johnson(network_2016_17_digraph, weight="weight")