In [1]:
import nba_api
from nba_api.stats.endpoints import leaguegamefinder, teamdetails, boxscoreadvancedv2, cumestatsteam
from nba_api.stats.static import teams

from geopy import geocoders
from geopy.geocoders import Nominatim

import pandas as pd
import numpy as np

import datetime

import networkx as nx

import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', None)

In [2]:
def get_team_ids():
    nba_teams = teams.get_teams()
    nba_team_ids = []
    for team in nba_teams:
        nba_team_ids.append(team['id'])
        
    return nba_team_ids

In [3]:
def get_team_details():
    nba_team_ids = get_team_ids()
    
    gn = Nominatim(user_agent='cs-5483-995-nba-travel')

    nba_team_details = {}

    for nba_team_id in nba_team_ids:
        result = teamdetails.TeamDetails(team_id = nba_team_id)
        team_details = result.get_data_frames()[0][['TEAM_ID','ABBREVIATION','NICKNAME','CITY']]
    
        team_details['FULL_NAME'] = team_details['CITY'] + " " + team_details['NICKNAME']

        city_lat_long = gn.geocode(team_details['CITY'][0])
    
        team_details['LAT'] = city_lat_long.raw['lat']
        team_details['LON'] = city_lat_long.raw['lon']
    
        team_details_dict = {'ID':nba_team_id, 'NICKNAME':team_details['NICKNAME'][0], 'CITY':team_details['CITY'][0], 
                         'FULL_NAME':team_details['FULL_NAME'][0], 'LAT':team_details['LAT'][0], 'LON':team_details['LON'][0]}
    
        nba_team_details[team_details['ABBREVIATION'][0]] = team_details_dict
        
    return nba_team_details

In [4]:
def get_nba_games(season, season_type='Regular Season'):
    
    gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable=season, season_type_nullable=season_type, league_id_nullable='00')
    all_games = gamefinder.get_data_frames()[0]

    all_games = all_games.sort_values(by='GAME_ID', ascending=True)
    all_games = all_games.reset_index(drop=True)
    
    all_games['GAME_DATE'] = pd.to_datetime(all_games['GAME_DATE'])
    all_games['OPP_ABBREVIATION'] = all_games.MATCHUP.str[-3:]
    all_games['NUMBER_GAMES_PLAYED'] = [i+1 for i in all_games.index]

    all_games_opp_name = []
    all_games_home_away = []
    all_games_home_team = []
    all_games_away_team = []
    all_games_off_eff = []
    all_games_def_eff = []
    for _, row in all_games.iterrows():
        all_games_opp_name.append(nba_team_details[row['OPP_ABBREVIATION']]['FULL_NAME'])
        all_games_home_away.append('AWAY' if '@' in row['MATCHUP'] else 'HOME')
        all_games_home_team.append(row['OPP_ABBREVIATION'] if '@' in row['MATCHUP'] else row['TEAM_ABBREVIATION'])
        all_games_away_team.append(row['TEAM_ABBREVIATION'] if '@' in row['MATCHUP'] else row['OPP_ABBREVIATION'])
        all_games_off_eff.append((row['FGA'] - row['OREB'] + row['TOV'] + 0.4*row['FTA'])/row['PTS'])
        row_matchup = all_games[all_games['GAME_ID'] == row['GAME_ID']]
        opponent_details = row_matchup[row_matchup['TEAM_ABBREVIATION'] == row['OPP_ABBREVIATION']]
        #print(opponent_details['PTS'].values)
        #print(opponent_details.shape)
        all_games_def_eff.append((row['FGA'] - row['OREB'] + row['TOV'] + 0.4*row['FTA'])/opponent_details['PTS'].values[0])
    
    all_games['OPP_NAME'] = all_games_opp_name
    all_games['HOME_AWAY'] = all_games_home_away
    all_games['HOME_TEAM_ABBREVIATION'] = all_games_home_team
    all_games['AWAY_TEAM_ABBREVIATION'] = all_games_away_team
    all_games['OFF_EFF'] = all_games_off_eff
    all_games['DEF_EFF'] = all_games_def_eff
    
    NUMBER_GAMES_PLAYEDs_for_teams = [0] * all_games.shape[0]
        
    for team_id in get_team_ids():
        rows_with_team_id = all_games.index[all_games['TEAM_ID'] == team_id].tolist()
        num_games_played = 1
        #print(f'BEFORE team_id: {team_id}, num_games_played: {num_games_played}\n')
        for row_num in rows_with_team_id:
            NUMBER_GAMES_PLAYEDs_for_teams[row_num] = num_games_played
            num_games_played += 1
            
        #print(f'AFTER team_id: {team_id}, num_games_played: {num_games_played}')
      
    all_games['NUMBER_GAMES_PLAYED'] = NUMBER_GAMES_PLAYEDs_for_teams

    return all_games

In [5]:
def add_advanced_stats_to_df(games_df):
    game_ids = list(set(games_df["GAME_ID"].to_list()))
    
    games_df_adv = games_df
    
    i = 1
    first_game_id = game_ids[0]
    #print(f'game_id: {first_game_id}, i: {i}')
    boxscoreadvancedfinder = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=first_game_id)
    advanced_stats = boxscoreadvancedfinder.get_data_frames()[1]
    advanced_stats = advanced_stats[['GAME_ID', 'TEAM_ID', 'E_OFF_RATING', 'E_DEF_RATING', 'E_NET_RATING']]
    
    i += 1
    
    for game_id in game_ids[1:]:
        if i % 50 == 0:
            print(f'game_id: {game_id}, i: {i}')
        boxscoreadvancedfinder = boxscoreadvancedv2.BoxScoreAdvancedV2(game_id=first_game_id)
        advanced_stats_for_game = boxscoreadvancedfinder.get_data_frames()[1]
        advanced_stats_to_keep = advanced_stats_for_game[['GAME_ID', 'TEAM_ID', 'E_OFF_RATING', 'E_DEF_RATING', 'E_NET_RATING']]
        
        advanced_stats = pd.concat([advanced_stats, advanced_stats_to_keep])
        
        i += 1

    games_df_adv.merge(advanced_stats, on=['GAME_ID', 'TEAM_ID'])
    
    return games_df_adv

In [6]:
def get_team_lat_lon(team_details):
    team_cities_lat_lon = {}
    for key, value in team_details.items():
        team_cities_lat_lon[key] = (float(value['LON']), float(value['LAT']))
    
    return team_cities_lat_lon

In [7]:
def get_edges_for_team(team_abbreviation, games):
    
    edge_pairs = []
    unique_edge_pairs = []
    unique_edge_pairs_with_weights = []
    edges = []

    games_for_team = games[games['TEAM_ABBREVIATION'] == team_abbreviation]
    games_for_team = games_for_team.reset_index(drop=True)
    only_home_teams = games_for_team['HOME_TEAM_ABBREVIATION'].tolist()
    
    for i in range(0,len(only_home_teams)-1):    
        if only_home_teams[i] != team_abbreviation or only_home_teams[i+1] != team_abbreviation:
            edges.append((only_home_teams[i], only_home_teams[i+1]))
            
            edge_pair = only_home_teams[i] + "->" + only_home_teams[i+1]
            
            if edge_pair not in unique_edge_pairs:
                unique_edge_pairs.append(edge_pair)
        
                edge_pair_with_weights = []
                edge_pair_with_weights.append(edge_pair)
                edge_pair_with_weights.append('')
                edge_pair_with_weights.append([{"NUMBER_GAMES_PLAYED": games_for_team.at[i, "NUMBER_GAMES_PLAYED"], 
                                                "OFF_EFF": games_for_team.at[i, "OFF_EFF"]*100,
                                               "DEF_EFF": games_for_team.at[i, "DEF_EFF"]*100,
                                               #"E_OFF_RATING": games_for_team.at[i, "E_OFF_RATING"],
                                               #"E_DEF_RATING": games_for_team.at[i, "E_DEF_RATING"]
                                               }])
                edge_pair_with_weights.append(1)
                unique_edge_pairs_with_weights.append(edge_pair_with_weights)
                
            else:
                index = next((idx for idx, val in enumerate(unique_edge_pairs_with_weights) if edge_pair in val), None)
        
                unique_edge_pairs_with_weights[index][2].append({"NUMBER_GAMES_PLAYED": games_for_team.at[i, "NUMBER_GAMES_PLAYED"], "OFF_EFF": games_for_team.at[i, "OFF_EFF"]*100,
                                                                "DEF_EFF": games_for_team.at[i, "DEF_EFF"]*100, 
                                                                 #"E_OFF_RATING": games_for_team.at[i, "E_OFF_RATING"], "E_DEF_RATING": games_for_team.at[i, "E_DEF_RATING"]
                                                                })
        
                unique_edge_pairs_with_weights[index][3] += 1
            
    unique_edge_pairs_list = []
    for unique_edge_pair in unique_edge_pairs:
        unique_edge_pairs_list.append(unique_edge_pair.split('->'))
            
    return unique_edge_pairs_list, unique_edge_pairs_with_weights

In [8]:
def convert_edge_weights(edge_list, weight_value):
    weighted_edge_list = []
    for edge_pair in edge_list:
        edge_teams = edge_pair[0].split('->')
        if len(edge_pair[2]) > 1:
            weight_sum = 0
            for weight_dict in edge_pair[2]:
                weight_sum += weight_dict[weight_value]
                
            edge_teams.append({"weight": weight_sum/len(edge_pair[2])})
            
        else:
            edge_teams.append({"weight": edge_pair[2][0][weight_value]})
            
        weighted_edge_list.append(edge_teams)
        
    return weighted_edge_list

In [9]:
nba_team_details = get_team_details()

In [10]:
nba_team_details['OKC']

{'ID': 1610612760,
 'NICKNAME': 'Thunder',
 'CITY': 'Oklahoma City',
 'FULL_NAME': 'Oklahoma City Thunder',
 'LAT': '35.4729886',
 'LON': '-97.5170536'}

In [11]:
games_2015_16 = get_nba_games(season='2015-16', season_type='Regular Season')
games_2015_16.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,OPP_ABBREVIATION,NUMBER_GAMES_PLAYED,OPP_NAME,HOME_AWAY,HOME_TEAM_ABBREVIATION,AWAY_TEAM_ABBREVIATION,OFF_EFF,DEF_EFF
0,22015,1610612765,DET,Detroit Pistons,21500001,2015-10-27,DET @ ATL,W,239,106,37,96,0.385,12,29,0.414,20,26,0.769,23,36,59,23,5,3,15,15,12.0,ATL,1,Atlanta Hawks,AWAY,ATL,DET,0.928302,1.046809
1,22015,1610612737,ATL,Atlanta Hawks,21500001,2015-10-27,ATL vs. DET,L,239,94,37,82,0.451,8,27,0.296,12,15,0.8,7,33,40,22,9,4,15,25,-12.0,DET,1,Detroit Pistons,HOME,ATL,DET,1.021277,0.90566
2,22015,1610612739,CLE,Cleveland Cavaliers,21500002,2015-10-27,CLE @ CHI,L,240,95,38,94,0.404,9,29,0.31,10,17,0.588,11,39,50,26,5,7,10,21,-2.0,CHI,1,Chicago Bulls,AWAY,CHI,CLE,1.050526,1.028866
3,22015,1610612741,CHI,Chicago Bulls,21500002,2015-10-27,CHI vs. CLE,W,240,97,37,87,0.425,7,19,0.368,16,23,0.696,7,40,47,13,6,10,13,22,2.0,CLE,1,Cleveland Cavaliers,HOME,CHI,CLE,1.053608,1.075789
4,22015,1610612744,GSW,Golden State Warriors,21500003,2015-10-27,GSW vs. NOP,W,241,111,41,96,0.427,9,30,0.3,20,22,0.909,21,35,56,29,8,7,20,29,16.0,NOP,1,New Orleans Pelicans,HOME,GSW,NOP,0.935135,1.092632


In [12]:
games_2015_16.shape

(2460, 36)

In [13]:
games_2015_16.columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS',
       'OPP_ABBREVIATION', 'NUMBER_GAMES_PLAYED', 'OPP_NAME', 'HOME_AWAY',
       'HOME_TEAM_ABBREVIATION', 'AWAY_TEAM_ABBREVIATION', 'OFF_EFF',
       'DEF_EFF'],
      dtype='object')

In [14]:
#games_2015_16_adv = add_advanced_stats_to_df(games_2015_16)

In [15]:
#games_2015_16_adv.columns

In [16]:
games_2015_16.to_csv('pd_data_files/games_2015_16.csv')
#games_2015_16_adv.to_csv('pd_data_files/games_2015_16_adv.csv')