In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 50)
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import time
import requests

from nba_api.stats.static import players, teams
from nba_api.stats.endpoints import playergamelog, playbyplay, leaguegamefinder, HustleStatsBoxScore
from nba_api.stats.library.parameters import SeasonAll, SeasonType

### Exploring Basic Functionality

Below are some frequently-used functions for accessing player/team data.

In [2]:
"""
This function takes the following parameters:

    pth: str path to save game_ids to
    season_type: SeasonType obj representing type of games to access
    
and accumulates all game IDs into a np.array, storing them if requested.
"""
def get_all_game_ids(pth=None, season_type=SeasonType.regular):
    game_ids = np.array([])
    for team_dict in teams.get_teams():
        team_games = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_dict['id'], season_type_nullable=season_type, timeout=10).get_data_frames()[0]
        game_ids = np.unique(np.append(game_ids, team_games['GAME_ID'].unique()))
        time.sleep(2)
    print('That took {0:.2f}s\nNumber of games: {1}'.format(time.time()-start, len(game_ids)))
    if pth:
        np.save(pth, game_ids, allow_pickle=True)
        return game_ids
    else:
        return game_ids       

In [3]:
game_ids = np.load("\home\jsehnert101\game_ids.npy", allow_pickle=True)

In [None]:
# Collect all games where the situational score fits our criteria

df = pd.DataFrame()

n_success = 0

for game_id in game_ids:
    try:
        game = playbyplay.PlayByPlay(game_id=game_id).get_data_frames()[0]
    except requests.exceptions.ReadTimeout:
        time.sleep(2)
        continue
    n_success += 1
    game = game.loc[game['SCORE'].notnull()]
    game[['min','sec']] = game['PCTIMESTRING'].str.split(':', expand=True).astype(int)
    game[["visitor_score", "home_score"]] = game["SCORE"].str.split(" - ", expand = True).astype(int)
    game.rename(columns = {"PERIOD":"period", "SCOREMARGIN":"score_margin"}, inplace = True)
    game.loc[(game['score_margin'] == 'TIE'), 'score_margin'] = 0
    game.loc[:,'score_margin'] = game.loc[:,'score_margin'].astype(int)
    game['winner'] = 'visitor' if game.iloc[-1,-2] > game.iloc[-1,-1] else 'home' # Create column to track who won
    game = game[(game['period'] == 4) & (game['min'] < 1) & (game['sec'] <= 10) & (abs(game['score_margin']) <= 5)]
    if game.empty:
        pass
    else:
        df = df.append(game.loc[:,['GAME_ID', 'period', 'min', 'sec', 'HOMEDESCRIPTION', 'NEUTRALDESCRIPTION', 
                                   'VISITORDESCRIPTION', 'score_margin', 'home_score', 'visitor_score', 'winner']])
    time.sleep(1)
df.set_index()
df

In [4]:
game_ids

array(['0020000001', '0020000002', '0020000003', ..., '1621900004',
       '1621900005', '1621900006'], dtype=object)

### Hustle Stats
Below is a dataframe with 'hustle stats' as recorded by nba-stats.com. This could provide a good frame of reference for what we want to track in terms of hustle stats.

In [None]:
hustle_stats = HustleStatsBoxScore(game_id='0022000449').get_data_frames() # 0: game info, 1: player stats, 2: team stats
print(hustle_stats[1].columns)
hustle_stats[1].head()

In [None]:
fairport_stats = pd.read_excel('/Users/jsehnert101/Fairport_stats.xlsx', header=[0,1], sheet_name='Fairport', na_values='-')
opp_stats = pd.read_excel('/Users/jsehnert101/Fairport_stats.xlsx', header=[0,1], sheet_name='Opponents', na_values='-')
fairport_stats


In [None]:
# Get 4th quarter stats
fairport_ft_pct = fairport_stats.loc[4,('Free Throws', 'FT%')]
fairport_TO_pct = fairport_stats.loc[4,('Assists and Turnovers', 'TO%')] # Prob. of turnover
fairport_ft_pct

In [None]:
# Get 4th quarter opponent stats
opp_3PT_pct = opp_stats.loc[4,('Three Pointers','3FG%')]
opp_points_per_sideline = 
opp_points_per_baseline = 
fairport_stats.columns[0]