In [2]:
import pandas as pd 
import numpy as np 
from datetime import date
%run formulas.ipynb 

In [3]:
def combine_team_games(df, keep_method='home'):
    '''Combine a TEAM_ID-GAME_ID unique table into rows by game. Slow.

        Parameters
        ----------
        df : Input DataFrame.
        keep_method : {'home', 'away', 'winner', 'loser', ``None``}, default 'home'
            - 'home' : Keep rows where TEAM_A is the home team.
            - 'away' : Keep rows where TEAM_A is the away team.
            - 'winner' : Keep rows where TEAM_A is the losing team.
            - 'loser' : Keep rows where TEAM_A is the winning team.
            - ``None`` : Keep all rows. Will result in an output DataFrame the same
                length as the input DataFrame.
                
        Returns
        -------
        result : DataFrame
    '''
    # Join every row to all others with the same game ID.
    joined = pd.merge(df, df, suffixes=['_A', '_B'],
                      on=['GAME_ID'])
    # Filter out any row that is joined to itself.
    result = joined[joined.TEAM_ID_A != joined.TEAM_ID_B]
    # Take action based on the keep_method flag.
    if keep_method is None:
        # Return all the rows.
        pass
    elif keep_method.lower() == 'home':
        # Keep rows where TEAM_A is the home team.
        result = result[result.MATCHUP_A.str.contains(' vs. ')]
    elif keep_method.lower() == 'away':
        # Keep rows where TEAM_A is the away team.
        result = result[result.MATCHUP_A.str.contains(' @ ')]
    elif keep_method.lower() == 'winner':
        result = result[result.WL_A == 'W']
    elif keep_method.lower() == 'loser':
        result = result[result.WL_A == 'L']
    else:
        raise ValueError(f'Invalid keep_method: {keep_method}')
    return result

In [4]:
from nba_api.stats.static import teams

nba_teams = teams.get_teams()
# nba_teams

In [11]:
from nba_api.stats.endpoints import leaguegamefinder

gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable = "2013-14")
# The first DataFrame of those returned is what we want.
games = combine_team_games(gamefinder.get_data_frames()[0], keep_method = 'home')
games = games[games.GAME_ID.str[0:3] == '002']
games.sort_values('GAME_ID', inplace = True)
games.reset_index(inplace = True)
games['WL_A'].replace({'W' : 1, 'L' : 0}, inplace = True)
games.rename(columns = {'GAME_DATE_A' : 'GAME_DATE', 'SEASON_ID_A' : 'SEASON_ID'}, inplace = True)


# get columns for four factors
four_factors = {'efg_pct':{'a':[], 'b':[]}, 
                'tov_pct':{'a':[], 'b':[]}, 
                'orb_pct':{'a':[], 'b':[]}, 
                'ft/fga':{'a':[], 'b':[]}} 
for ind, row in games.iterrows(): 
    for factor in four_factors: 
        if factor == 'efg_pct': 
            four_factors[factor]['a'].append(efg_pct(row, True)) 
            four_factors[factor]['b'].append(efg_pct(row, False))  
        elif factor == 'tov_pct':
            four_factors[factor]['a'].append(tov_pct(row, True)) 
            four_factors[factor]['b'].append(tov_pct(row, False))  
        elif factor == 'orb_pct': 
            four_factors[factor]['a'].append(orb_pct(row, True)) 
            four_factors[factor]['b'].append(orb_pct(row, False))
        else: 
            four_factors[factor]['a'].append(ft_per_fga(row, True)) 
            four_factors[factor]['b'].append(ft_per_fga(row, False))  
    
columns = [cols for cols in games.columns]
loc_a = columns.index('FTA_A')
for factor in four_factors: 
    games.insert(loc=loc_a, column=factor.upper() + '_A', value=four_factors[factor]['a'])
    loc_a = columns.index('FTA_A')
    
loc_b = columns.index('FTA_B')
for factor in four_factors: 
    games.insert(loc=loc_b, column=factor.upper() + '_B', value=four_factors[factor]['b'])
    loc_a = columns.index('FTA_B')

games.drop(['FGM_A', 'FG3M_A', 'FGA_A', 'FG_PCT_A', 'FG3A_A', 'FG3_PCT_A', 'FTM_A', 'OREB_A', 'DREB_A',
            'FTA_A', 'FT_PCT_A', 'REB_A', 'AST_A', 'STL_A', 'BLK_A', 'TOV_A', 'MIN_A', 'PF_A', 'MATCHUP_A', 
            'FGM_B', 'FG3M_B', 'FGA_B', 'FG_PCT_B', 'FG3A_B', 'FG3_PCT_B', 'FTM_B', 'OREB_B', 'DREB_B',
            'FTA_B', 'FT_PCT_B', 'REB_B', 'AST_B', 'STL_B', 'BLK_B', 'TOV_B', 'MIN_B', 'PF_B', 'MATCHUP_B', 
            'WL_B', 'GAME_DATE_B',  
            'index'], axis = 1, inplace = True)

# columns = [cols for cols in games.columns]
# columns
games

Unnamed: 0,SEASON_ID,TEAM_ID_A,TEAM_ABBREVIATION_A,TEAM_NAME_A,GAME_ID,GAME_DATE,WL_A,PTS_A,FT/FGA_A,ORB_PCT_A,...,SEASON_ID_B,TEAM_ID_B,TEAM_ABBREVIATION_B,TEAM_NAME_B,PTS_B,FT/FGA_B,ORB_PCT_B,TOV_PCT_B,EFG_PCT_B,PLUS_MINUS_B
0,22013,1610612754,IND,Indiana Pacers,0021300001,2013-10-29,1,97,0.309859,0.277778,...,22013,1610612753,ORL,Orlando Magic,87,0.064516,0.276596,0.148601,0.435484,-10.0
1,22013,1610612748,MIA,Miami Heat,0021300002,2013-10-29,1,107,0.305556,0.142857,...,22013,1610612741,CHI,Chicago Bulls,95,0.216867,0.239130,0.161987,0.463855,-12.0
2,22013,1610612747,LAL,Los Angeles Lakers,0021300003,2013-10-29,1,116,0.193548,0.375000,...,22013,1610612746,LAC,Los Angeles Clippers,103,0.156627,0.227273,0.146628,0.542169,-13.0
3,22013,1610612739,CLE,Cleveland Cavaliers,0021300004,2013-10-30,1,98,0.273810,0.363636,...,22013,1610612751,BKN,Brooklyn Nets,94,0.231707,0.219512,0.139457,0.457317,-4.0
4,22013,1610612755,PHI,Philadelphia 76ers,0021300005,2013-10-30,1,114,0.250000,0.250000,...,22013,1610612748,MIA,Miami Heat,110,0.117647,0.179487,0.173168,0.588235,-4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,22013,1610612752,NYK,New York Knicks,0021301226,2014-04-16,1,95,0.160494,0.234043,...,22013,1610612761,TOR,Toronto Raptors,92,0.137500,0.324324,0.214439,0.506250,-3.0
1226,22013,1610612753,ORL,Orlando Magic,0021301227,2014-04-16,0,86,0.215190,0.195652,...,22013,1610612754,IND,Indiana Pacers,101,0.090909,0.326087,0.114460,0.528409,15.0
1227,22013,1610612757,POR,Portland Trail Blazers,0021301228,2014-04-16,1,110,0.158416,0.413793,...,22013,1610612746,LAC,Los Angeles Clippers,104,0.333333,0.163265,0.075103,0.452381,-6.0
1228,22013,1610612758,SAC,Sacramento Kings,0021301229,2014-04-16,0,99,0.244186,0.282609,...,22013,1610612756,PHX,Phoenix Suns,104,0.186047,0.145833,0.103112,0.511628,5.0


In [18]:
class Team_Cumulative_Stats: 
    def __init__(self, team_abbr): # team_abbr is the abbreviated name of the team city. EX: ATL for Alanta Hawks
        self.team = team_abbr 
        # The first element of the list is 0 for all features. This is because there are no values for the first game.
        self.nth_game = 0 # used in part 1 (see below)
        self.curr_game = 0 # used in part 2 (see below)
        
        # this class tracks the features for 1 team, so we exclude all the team_b stats 
        features = [col[:-2] for col in games.columns 
                    if col[-2:] != '_B' and col[-3:] != '_ID' and col != 'GAME_DATE']  
        
        # excluded features I decided not to include
        excluded = ['TEAM_ABBREVIATION', 'TEAM_NAME', 'WL', 'TEAM_ID']
        
        self.features =  {feature:[np.nan] 
                          for feature in features 
                          if feature not in excluded}
        self.features['GAME_ID'] = []
        
    def add_game(self, game, home): 
        '''
        game -- a row of the games dataframe
        home -- True if the team represented by this class is home, False if away
        '''
        suffix = '_A' if home else '_B' # in the games dataframe, _A is for the hometeam, and _B is for away
        for feature in self.features:
            if feature != 'GAME_ID': # all of the stats have to be averaged, except for GAME_ID
                
                # if this is the first game, there is nothing to average
                if self.nth_game == 0: 
                    new_val = row.loc[feature + suffix] 
                
                # otherwise, do the math to get the next average value
                else: 
                    last_entry = self.features[feature][self.nth_game]
                    if last_entry == np.nan: 
                        last_entry = 0 
                    new_val = (last_entry * self.nth_game + row.loc[feature + suffix]) / (self.nth_game + 1)
                self.features[feature].append(new_val) 
        self.features['GAME_ID'].append(row.loc['GAME_ID'])
        self.nth_game += 1 
        
    def get_next_game(self, home): # Used to use self.features, once it is built, to create the DataFrame (part 2)
        '''home --- True if the team is home, False if away'''
        suffix = '_A' if home else '_B'
        result = {feature + suffix : [self.features[feature][self.curr_game]] for feature in self.features
                 if feature != 'GAME_ID'} 
        result['GAME_ID'] = [self.features['GAME_ID'][self.curr_game]]
        self.curr_game += 1 
        return result
    
# dictionary with a teams city abbreviation as the key, and a Team_Cumulative_Stats object for that team as the value
# see the above cell for the definition of nba_teams
teams = {team['abbreviation']:Team_Cumulative_Stats(team['abbreviation']) for team in nba_teams}

# part 1 (build the data from the games dataframe)
for ind, row in games.iterrows():
    team_a = teams[row.loc['TEAM_ABBREVIATION_A']] 
    team_b = teams[row.loc['TEAM_ABBREVIATION_B']] 
    team_a.add_game(row, True) 
    team_b.add_game(row, False)
    
# part 2  (gather that data back together into a dictionary with the two competing teams on the same line)
for ind, row in games.iterrows(): 
    team_a = row.loc['TEAM_ABBREVIATION_A'] 
    team_b = row.loc['TEAM_ABBREVIATION_B']
    a = teams[team_a].get_next_game(True)
    b = teams[team_b].get_next_game(False)
    if ind == 0: # if this is the first loop, initialize the training_data
        training_data = {**a, **b} # training data is a dictionary, which will become the dataframe after it is fully built
    else: # build the training data using values returned by the get_next_game method
        for feature in a:
            training_data[feature].append(a[feature][0])
        for feature in b: 
            if feature != 'GAME_ID':
                training_data[feature].append(b[feature][0])

# create the dataframe, add the WL_A column, and PTS_A, PTS_B columns
test = pd.DataFrame.from_dict(training_data)
test['WL_A'] = games['WL_A']
test.rename(columns = {'PTS_A' : 'AVG_PTS_A', 'PTS_B' : 'AVG_PTS_B'}, inplace = True)
test['PTS_A'], test['PTS_B'] = games['PTS_A'], games['PTS_B']

test.shape


(1230, 16)

1. Check to make sure that the dataframe I made actually represents what happened
2. Remove useless stats, add useful stats that we discussed last week
3. Make this work useful for an arbitrary # of games
4. Use the techniques in [here](https://math.stackexchange.com/questions/684519/what-is-the-most-scientific-way-to-assign-weights-to-historical-data) to weigh recent years more

In [12]:
from nba_api.stats.endpoints import cumestatsteam
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import teams

nba_teams = teams.get_teams()
season = '2013-14'
season_type = 'Regular Season'
gamefinder = leaguegamefinder.LeagueGameFinder(season_nullable = season)
games = combine_team_games(gamefinder.get_data_frames()[0], keep_method = 'home')

sixers = [team['id'] for team in nba_teams if team['abbreviation'] == 'PHI']
print(sixers)
game_ids = [] 
n = 20 
i = 0 

# get the first 20 games the sixers played in the 2013-14 season
for ind, row in games.iterrows(): 
    # if 'PHI' == row.loc['TEAM_ABBREVIATION_A'] or 'PHI' == row.loc['TEAM_ABBREVIATION_B']: 
    game_ids.append(row.loc['GAME_ID'])
    i += 1 
    if i == 20: 
        break 

df = cumestatsteam.CumeStatsTeam(league_id = '00', game_ids = game_ids, season = season, season_type_all_star = season_type, team_id = sixers[0])
df.get_data_frames()[1]

[1610612755]


Unnamed: 0,CITY,NICKNAME,TEAM_ID,W,L,W_HOME,L_HOME,W_ROAD,L_ROAD,TEAM_TURNOVERS,...,TOT_REB,AST,PF,STL,TOTAL_TURNOVERS,BLK,PTS,AVG_REB,AVG_PTS,DQ


In [23]:
from nba_api.stats.endpoints import teamdashboardbygeneralsplits

start = '10/29/2013' 
end = '11/04/2013'

generalTeamInfo = teamdashboardbygeneralsplits.TeamDashboardByGeneralSplits(team_id=sixers[0], per_mode_detailed='Per100Possessions', date_from_nullable=start, date_to_nullable=end, season=season, timeout=120)
generalTeamDict = generalTeamInfo.get_normalized_dict()
generalTeamDashboard = generalTeamDict['OverallTeamDashboard'][0]
generalTeamDashboard

{'GROUP_SET': 'Overall',
 'GROUP_VALUE': '2013-14',
 'SEASON_YEAR': '2013-14',
 'GP': 4,
 'W': 3,
 'L': 1,
 'W_PCT': 0.75,
 'MIN': 46.4,
 'FGM': 39.6,
 'FGA': 84.1,
 'FG_PCT': 0.471,
 'FG3M': 7.2,
 'FG3A': 22.0,
 'FG3_PCT': 0.33,
 'FTM': 15.0,
 'FTA': 22.5,
 'FT_PCT': 0.667,
 'OREB': 9.9,
 'DREB': 32.9,
 'REB': 42.8,
 'AST': 22.5,
 'TOV': 18.4,
 'STL': 9.9,
 'BLK': 3.9,
 'BLKA': 4.3,
 'PF': 17.9,
 'PFD': 19.1,
 'PTS': 101.4,
 'PLUS_MINUS': -1.4,
 'GP_RANK': 1,
 'W_RANK': 1,
 'L_RANK': 1,
 'W_PCT_RANK': 1,
 'MIN_RANK': 1,
 'FGM_RANK': 1,
 'FGA_RANK': 1,
 'FG_PCT_RANK': 1,
 'FG3M_RANK': 1,
 'FG3A_RANK': 1,
 'FG3_PCT_RANK': 1,
 'FTM_RANK': 1,
 'FTA_RANK': 1,
 'FT_PCT_RANK': 1,
 'OREB_RANK': 1,
 'DREB_RANK': 1,
 'REB_RANK': 1,
 'AST_RANK': 1,
 'TOV_RANK': 1,
 'STL_RANK': 1,
 'BLK_RANK': 1,
 'BLKA_RANK': 1,
 'PF_RANK': 1,
 'PFD_RANK': 1,
 'PTS_RANK': 1,
 'PLUS_MINUS_RANK': 1,
 'CFID': 155,
 'CFPARAMS': '2013-14'}