In [1]:
# %pip install nba_api

In [2]:
from nba_api.stats.static import teams

# Get all teams and their abbreviations
all_teams = teams.get_teams()

for team in all_teams:
    print(f"{team['abbreviation']}: {team['full_name']}")


ATL: Atlanta Hawks
BOS: Boston Celtics
CLE: Cleveland Cavaliers
NOP: New Orleans Pelicans
CHI: Chicago Bulls
DAL: Dallas Mavericks
DEN: Denver Nuggets
GSW: Golden State Warriors
HOU: Houston Rockets
LAC: Los Angeles Clippers
LAL: Los Angeles Lakers
MIA: Miami Heat
MIL: Milwaukee Bucks
MIN: Minnesota Timberwolves
BKN: Brooklyn Nets
NYK: New York Knicks
ORL: Orlando Magic
IND: Indiana Pacers
PHI: Philadelphia 76ers
PHX: Phoenix Suns
POR: Portland Trail Blazers
SAC: Sacramento Kings
SAS: San Antonio Spurs
OKC: Oklahoma City Thunder
TOR: Toronto Raptors
UTA: Utah Jazz
MEM: Memphis Grizzlies
WAS: Washington Wizards
DET: Detroit Pistons
CHA: Charlotte Hornets


In [3]:
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import teams

def find_games_by_abbreviations(team_abbreviation, opponent_abbreviation, season="2022-23"):
    all_teams = teams.get_teams()
    team = next((t for t in all_teams if t["abbreviation"] == team_abbreviation), None)
    opponent = next((t for t in all_teams if t["abbreviation"] == opponent_abbreviation), None)

    if not team or not opponent:
        print("Invalid team abbreviation(s). Please check the input.")
        return

    team_id = team["id"]
    opponent_abbreviation_upper = opponent_abbreviation.upper()

    game_finder = leaguegamefinder.LeagueGameFinder(
        team_id_nullable=team_id,
        season_nullable=season
    )
    games = game_finder.get_data_frames()[0]
    games_vs_opponent = games[games["MATCHUP"].str.contains(opponent_abbreviation_upper)]
    print(f"Games for {team_abbreviation} vs {opponent_abbreviation} in {season}:")
    print(games_vs_opponent[["GAME_ID", "GAME_DATE", "MATCHUP", "WL"]])

    return games_vs_opponent

games = find_games_by_abbreviations("GSW", "LAL", "2022-23")


Games for GSW vs LAL in 2022-23:
       GAME_ID   GAME_DATE      MATCHUP WL
0   0042200236  2023-05-12    GSW @ LAL  L
1   0042200235  2023-05-10  GSW vs. LAL  W
2   0042200234  2023-05-08    GSW @ LAL  L
3   0042200233  2023-05-06    GSW @ LAL  L
4   0042200232  2023-05-04  GSW vs. LAL  W
5   0042200231  2023-05-02  GSW vs. LAL  L
30  0022200964  2023-03-05    GSW @ LAL  L
36  0022200893  2023-02-23    GSW @ LAL  L
39  0022200853  2023-02-11  GSW vs. LAL  L
94  0022200002  2022-10-18  GSW vs. LAL  W
97  0012200040  2022-10-09  GSW vs. LAL  L


In [4]:
def get_first_game_stats(games_df):
    if not games_df.empty:
        first_game = games_df.iloc[0]
        return first_game
    else:
        print("No games found.")
        return None

first_game_stats = get_first_game_stats(games)

if first_game_stats is not None:
    print("Stats for the first game:")
    print(first_game_stats)


Stats for the first game:
SEASON_ID                            42022
TEAM_ID                         1610612744
TEAM_ABBREVIATION                      GSW
TEAM_NAME            Golden State Warriors
GAME_ID                         0042200236
GAME_DATE                       2023-05-12
MATCHUP                          GSW @ LAL
WL                                       L
MIN                                    241
PTS                                    101
FGM                                     39
FGA                                    103
FG_PCT                               0.379
FG3M                                    13
FG3A                                    48
FG3_PCT                              0.271
FTM                                     10
FTA                                     14
FT_PCT                               0.714
OREB                                    20
DREB                                    33
REB                                     53
AST                         

In [5]:
from nba_api.stats.static import teams, players
from nba_api.stats.endpoints import cumestatsteamgames, cumestatsteam, gamerotation
import pandas as pd
import numpy as np
import json
import difflib
import time
import requests

In [6]:
# Retry Wrapper 
def retry(func, retries=3):
    def retry_wrapper(*args, **kwargs):
        attempts = 0
        while attempts < retries:
            try:
                return func(*args, **kwargs)
            except requests.exceptions.RequestException as e:
                print(e)
                time.sleep(30)
                attempts += 1

    return retry_wrapper

In [7]:
def getSeasonScheduleFrame(seasons, seasonType): 

    # Get date from string
    def getGameDate(matchup):
        return matchup.partition(' at')[0][:10]

    # Get Home team from string
    def getHomeTeam(matchup):
        return matchup.partition(' at')[2]

    # Get Away team from string
    def getAwayTeam(matchup):
        return matchup.partition(' at')[0][10:]

    # Match nickname from schedule to team table to find ID
    def getTeamIDFromNickname(nickname):
        return teamLookup.loc[teamLookup['nickname'] == difflib.get_close_matches(nickname, teamLookup['nickname'], 1)[0]].values[0][0] 
    
    @retry
    def getRegularSeasonSchedule(season, teamID, seasonType):
        season = str(season) + "-" + str(season + 1)[-2:]  # Convert year to season format (e.g., 2020 -> 2020-21)
        teamGames = cumestatsteamgames.CumeStatsTeamGames(
            league_id='00',
            season=season,
            season_type_all_star=seasonType,
            team_id=teamID
        ).get_normalized_json()

        teamGames = pd.DataFrame(json.loads(teamGames)['CumeStatsTeamGames'])
        teamGames['SEASON'] = season
        return teamGames    
    
    # Get team lookup table
    teamLookup = pd.DataFrame(teams.get_teams())
    
    # Initialize the list to collect the data frames
    schedule_frames = []

    # Get teams' schedule for each season
    for season in seasons:
        for id in teamLookup['id']:
            time.sleep(1)
            # Collect all schedules in the list
            schedule_frames.append(getRegularSeasonSchedule(season, id, seasonType))
    
    # Concatenate all the DataFrames
    scheduleFrame = pd.concat(schedule_frames, ignore_index=True)

    scheduleFrame['GAME_DATE'] = pd.to_datetime(scheduleFrame['MATCHUP'].map(getGameDate))
    scheduleFrame['HOME_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getHomeTeam)
    scheduleFrame['HOME_TEAM_ID'] = scheduleFrame['HOME_TEAM_NICKNAME'].map(getTeamIDFromNickname)
    scheduleFrame['AWAY_TEAM_NICKNAME'] = scheduleFrame['MATCHUP'].map(getAwayTeam)
    scheduleFrame['AWAY_TEAM_ID'] = scheduleFrame['AWAY_TEAM_NICKNAME'].map(getTeamIDFromNickname)

    # Drop duplicates (one row for each team, but only need one)
    scheduleFrame = scheduleFrame.drop_duplicates()

    # Reset the index
    scheduleFrame = scheduleFrame.reset_index(drop=True)
            
    return scheduleFrame

In [8]:
# Get Single Game aggregation columns

def getSingleGameMetrics(gameID,homeTeamID,awayTeamID,awayTeamNickname,seasonYear,gameDate):

    @retry
    def getGameStats(teamID,gameID,seasonYear):
        gameStats = cumestatsteam.CumeStatsTeam(game_ids=gameID,league_id ="00",
                                               season=seasonYear,season_type_all_star="Regular Season",
                                               team_id = teamID).get_normalized_json()

        gameStats = pd.DataFrame(json.loads(gameStats)['TotalTeamStats'])

        return gameStats

    data = getGameStats(homeTeamID,gameID,seasonYear)
    data.at[1,'NICKNAME'] = awayTeamNickname
    data.at[1,'TEAM_ID'] = awayTeamID
    data.at[1,'OFFENSIVE_EFFICIENCY'] = (data.at[1,'FG'] + data.at[1,'AST'])/(data.at[1,'FGA'] - data.at[1,'OFF_REB'] + data.at[1,'AST'] + data.at[1,'TOTAL_TURNOVERS'])
    data.at[1,'SCORING_MARGIN'] = data.at[1,'PTS'] - data.at[0,'PTS']

    data.at[0,'OFFENSIVE_EFFICIENCY'] = (data.at[0,'FG'] + data.at[0,'AST'])/(data.at[0,'FGA'] - data.at[0,'OFF_REB'] + data.at[0,'AST'] + data.at[0,'TOTAL_TURNOVERS'])
    data.at[0,'SCORING_MARGIN'] = data.at[0,'PTS'] - data.at[1,'PTS']

    data['SEASON'] = seasonYear
    data['GAME_DATE'] = gameDate
    data['GAME_ID'] = gameID

    return data

In [13]:
def getGameLogs(gameLogs, scheduleFrame):
    
    # Functions to prepare additional columns after gameLogs table loads
    def getHomeAwayFlag(gameDF):
        gameDF['HOME_FLAG'] = np.where((gameDF['W_HOME']==1) | (gameDF['L_HOME']==1),1,0)
        gameDF['AWAY_FLAG'] = np.where((gameDF['W_ROAD']==1) | (gameDF['L_ROAD']==1),1,0) 

    def getTotalWinPctg(gameDF):
        gameDF['TOTAL_GAMES_PLAYED'] = gameDF.groupby(['TEAM_ID','SEASON'])['GAME_DATE'].rank(ascending=True)
        gameDF['TOTAL_WINS'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['W'].cumsum()
        gameDF['TOTAL_WIN_PCTG'] = gameDF['TOTAL_WINS']/gameDF['TOTAL_GAMES_PLAYED']
        return gameDF.drop(['TOTAL_GAMES_PLAYED','TOTAL_WINS'],axis=1)

    def getHomeWinPctg(gameDF):
        gameDF['HOME_GAMES_PLAYED'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['HOME_FLAG'].cumsum()
        gameDF['HOME_WINS'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['W_HOME'].cumsum()
        gameDF['HOME_WIN_PCTG'] = gameDF['HOME_WINS']/gameDF['HOME_GAMES_PLAYED']
        return gameDF.drop(['HOME_GAMES_PLAYED','HOME_WINS'],axis=1)

    def getAwayWinPctg(gameDF):
        gameDF['AWAY_GAMES_PLAYED'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['AWAY_FLAG'].cumsum()
        gameDF['AWAY_WINS'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['W_ROAD'].cumsum()
        gameDF['AWAY_WIN_PCTG'] = gameDF['AWAY_WINS']/gameDF['AWAY_GAMES_PLAYED']
        return gameDF.drop(['AWAY_GAMES_PLAYED','AWAY_WINS'],axis=1)

    def getRollingOE(gameDF):
        gameDF['ROLLING_OE'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['OFFENSIVE_EFFICIENCY'].transform(lambda x: x.rolling(3, 1).mean())

    def getRollingScoringMargin(gameDF):
        gameDF['ROLLING_SCORING_MARGIN'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['SCORING_MARGIN'].transform(lambda x: x.rolling(3, 1).mean())

    def getRestDays(gameDF):
        gameDF['LAST_GAME_DATE'] = gameDF.sort_values(by='GAME_DATE').groupby(['TEAM_ID','SEASON'])['GAME_DATE'].shift(1)
        gameDF['NUM_REST_DAYS'] = (gameDF['GAME_DATE'] - gameDF['LAST_GAME_DATE'])/np.timedelta64(1,'D') 
        return gameDF.drop('LAST_GAME_DATE',axis=1)
    
    start = time.perf_counter_ns()

    i = int(len(gameLogs)/2) # Can use a previously completed gameLog dataset

    while i < len(scheduleFrame):
        time.sleep(1)
        gameLogs = pd.concat([gameLogs, getSingleGameMetrics(
            scheduleFrame.at[i,'GAME_ID'],
            scheduleFrame.at[i,'HOME_TEAM_ID'],
            scheduleFrame.at[i,'AWAY_TEAM_ID'],
            scheduleFrame.at[i,'AWAY_TEAM_NICKNAME'],
            scheduleFrame.at[i,'SEASON'],
            scheduleFrame.at[i,'GAME_DATE']
        )], ignore_index=True)
        
        end = time.perf_counter_ns()

        # Output time it took to load x amount of records
        if i % 100 == 0:
            mins = ((end - start) / 1e9) / 60
            print(i, str(mins) + ' minutes')

        i += 1
        
    # Get Table Level Aggregation Columns
    getHomeAwayFlag(gameLogs)
    gameLogs = getHomeWinPctg(gameLogs)
    gameLogs = getAwayWinPctg(gameLogs)
    gameLogs = getTotalWinPctg(gameLogs)
    getRollingScoringMargin(gameLogs)
    getRollingOE(gameLogs)
    gameLogs = getRestDays(gameLogs)

    return gameLogs.reset_index(drop=True)


In [14]:
# Get ScheduleFrame
seasons = [2020]
seasonType = 'Regular Season'

start = time.perf_counter_ns()
scheduleFrame = getSeasonScheduleFrame(seasons, seasonType)
end = time.perf_counter_ns()

secs = (end - start) / 1e9
mins = secs / 60

print(mins)


0.5582482062499999


In [15]:
#Example Output of Single Game Metrics
getSingleGameMetrics(scheduleFrame.at[104,'GAME_ID'],scheduleFrame.at[104,'HOME_TEAM_ID'],
                     scheduleFrame.at[104,'AWAY_TEAM_ID'],scheduleFrame.at[104,'AWAY_TEAM_NICKNAME'],
                     scheduleFrame.at[104,'SEASON'],scheduleFrame.at[104,'GAME_DATE'])


Unnamed: 0,CITY,NICKNAME,TEAM_ID,W,L,W_HOME,L_HOME,W_ROAD,L_ROAD,TEAM_TURNOVERS,...,BLK,PTS,AVG_REB,AVG_PTS,DQ,OFFENSIVE_EFFICIENCY,SCORING_MARGIN,SEASON,GAME_DATE,GAME_ID
0,Cleveland,Cavaliers,1610612739,1,0,1,0,0,0,2,...,3,117,49.0,117.0,0,0.588785,7.0,2020-21,2021-03-17,22000620
1,OPPONENTS,Celtics,1610612738,0,1,0,0,0,1,0,...,9,110,48.0,110.0,0,0.567308,-7.0,2020-21,2021-03-17,22000620


In [16]:
#Create the gameLogs DataFrame
gameLogs = pd.DataFrame()
gameLogs = getGameLogs(gameLogs,scheduleFrame)
gameLogs.to_csv('gameLogs.csv')

0 0.020749885416666666 minutes
100 2.0281644409666666 minutes
200 4.440886677083333 minutes
300 6.995901514583333 minutes
400 9.60710476875 minutes
500 12.244549273616666 minutes
600 14.932521427783334 minutes
700 17.519262308333335 minutes
800 20.202168057633333 minutes
900 22.828745334033332 minutes
1000 25.443806656950002 minutes


In [17]:
#Example Output of Game Logs
gameLogs[(gameLogs['TEAM_ID'] == 1610612737 ) & (gameLogs['SEASON'] == '2022-23')].sort_values('GAME_DATE')


Unnamed: 0,CITY,NICKNAME,TEAM_ID,W,L,W_HOME,L_HOME,W_ROAD,L_ROAD,TEAM_TURNOVERS,...,GAME_DATE,GAME_ID,HOME_FLAG,AWAY_FLAG,HOME_WIN_PCTG,AWAY_WIN_PCTG,TOTAL_WIN_PCTG,ROLLING_SCORING_MARGIN,ROLLING_OE,NUM_REST_DAYS


In [18]:
def getGameLogFeatureSet(gameDF):

    def shiftGameLogRecords(gameDF):
        gameDF['LAST_GAME_OE'] = gameLogs.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['OFFENSIVE_EFFICIENCY'].shift(1)
        gameDF['LAST_GAME_HOME_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['HOME_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_AWAY_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['AWAY_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_TOTAL_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['TOTAL_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_ROLLING_SCORING_MARGIN'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['ROLLING_SCORING_MARGIN'].shift(1)
        gameDF['LAST_GAME_ROLLING_OE'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['ROLLING_OE'].shift(1)
    
    
    def getHomeTeamFrame(gameDF):
        homeTeamFrame = gameDF[gameDF['CITY'] != 'OPPONENTS']
        homeTeamFrame = homeTeamFrame[['LAST_GAME_OE','LAST_GAME_HOME_WIN_PCTG','NUM_REST_DAYS','LAST_GAME_AWAY_WIN_PCTG','LAST_GAME_TOTAL_WIN_PCTG','LAST_GAME_ROLLING_SCORING_MARGIN','LAST_GAME_ROLLING_OE','W','TEAM_ID','GAME_ID','SEASON']]

        colRenameDict = {}
        for col in homeTeamFrame.columns:
            if (col != 'GAME_ID') & (col != 'SEASON') :
                colRenameDict[col] = 'HOME_' + col 

        homeTeamFrame.rename(columns=colRenameDict,inplace=True)

        return homeTeamFrame

    def getAwayTeamFrame(gameDF):
        awayTeamFrame = gameDF[gameDF['CITY'] == 'OPPONENTS']
        awayTeamFrame = awayTeamFrame[['LAST_GAME_OE','LAST_GAME_HOME_WIN_PCTG','NUM_REST_DAYS','LAST_GAME_AWAY_WIN_PCTG','LAST_GAME_TOTAL_WIN_PCTG','LAST_GAME_ROLLING_SCORING_MARGIN','LAST_GAME_ROLLING_OE','TEAM_ID','GAME_ID','SEASON']]

        colRenameDict = {}
        for col in awayTeamFrame.columns:
            if (col != 'GAME_ID') & (col != 'SEASON'):
                colRenameDict[col] = 'AWAY_' + col 

        awayTeamFrame.rename(columns=colRenameDict,inplace=True)

        return awayTeamFrame
    
    shiftGameLogRecords(gameLogs)
    awayTeamFrame = getAwayTeamFrame(gameLogs)
    homeTeamFrame = getHomeTeamFrame(gameLogs)
    
    return pd.merge(homeTeamFrame, awayTeamFrame, how="inner", on=[ "GAME_ID","SEASON"]).drop(['GAME_ID','AWAY_TEAM_ID','HOME_TEAM_ID'],axis=1)


In [19]:
modelData = getGameLogFeatureSet(gameLogs)


In [20]:
# Final Data Set before Train,Test, Validation Split
modelData

Unnamed: 0,HOME_LAST_GAME_OE,HOME_LAST_GAME_HOME_WIN_PCTG,HOME_NUM_REST_DAYS,HOME_LAST_GAME_AWAY_WIN_PCTG,HOME_LAST_GAME_TOTAL_WIN_PCTG,HOME_LAST_GAME_ROLLING_SCORING_MARGIN,HOME_LAST_GAME_ROLLING_OE,HOME_W,SEASON,AWAY_LAST_GAME_OE,AWAY_LAST_GAME_HOME_WIN_PCTG,AWAY_NUM_REST_DAYS,AWAY_LAST_GAME_AWAY_WIN_PCTG,AWAY_LAST_GAME_TOTAL_WIN_PCTG,AWAY_LAST_GAME_ROLLING_SCORING_MARGIN,AWAY_LAST_GAME_ROLLING_OE
0,0.555556,0.685714,3.0,0.444444,0.563380,9.333333,0.571405,1,2020-21,0.612903,0.250000,2.0,0.228571,0.239437,-2.000000,0.605315
1,0.573913,0.676471,1.0,0.444444,0.557143,-0.666667,0.589472,1,2020-21,0.500000,0.305556,2.0,0.303030,0.304348,-18.000000,0.512228
2,0.584746,0.666667,2.0,0.444444,0.550725,8.666667,0.640592,1,2020-21,0.612403,0.500000,2.0,0.428571,0.463768,0.666667,0.567718
3,0.609756,0.656250,4.0,0.444444,0.544118,11.333333,0.653327,1,2020-21,0.539568,0.500000,2.0,0.441176,0.470588,0.666667,0.570866
4,0.512397,0.354839,1.0,0.558824,0.461538,11.000000,0.596498,1,2020-21,0.727273,0.656250,1.0,0.457143,0.552239,17.333333,0.633081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,0.547826,0.363636,2.0,0.333333,0.348837,-9.666667,0.563248,1,2020-21,0.541667,0.350000,1.0,0.208333,0.272727,-7.000000,0.539031
1076,0.601626,0.416667,2.0,0.454545,0.434783,-5.333333,0.577983,1,2020-21,0.456140,0.222222,2.0,0.300000,0.263158,-11.666667,0.534768
1077,0.586207,0.400000,1.0,0.200000,0.292308,-10.000000,0.553596,0,2020-21,0.593220,0.548387,2.0,0.424242,0.484375,-2.000000,0.585593
1078,0.568966,0.551724,3.0,0.424242,0.483871,0.666667,0.584466,1,2020-21,0.553571,0.413793,2.0,0.205882,0.301587,-0.666667,0.542381


In [21]:
modelData.to_csv('nbaHomeWinLossModelDataset.csv')

In [22]:
def getGameLogFeatureSet(gameDF):

    def shiftGameLogRecords(gameDF):
        gameDF['LAST_GAME_OE'] = gameLogs.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['OFFENSIVE_EFFICIENCY'].shift(1)
        gameDF['LAST_GAME_HOME_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['HOME_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_AWAY_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['AWAY_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_TOTAL_WIN_PCTG'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['TOTAL_WIN_PCTG'].shift(1)
        gameDF['LAST_GAME_ROLLING_SCORING_MARGIN'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['ROLLING_SCORING_MARGIN'].shift(1)
        gameDF['LAST_GAME_ROLLING_OE'] = gameDF.sort_values('GAME_DATE').groupby(['TEAM_ID','SEASON'])['ROLLING_OE'].shift(1)
    
    
    def getHomeTeamFrame(gameDF):
        homeTeamFrame = gameDF[gameDF['CITY'] != 'OPPONENTS']
        homeTeamFrame = homeTeamFrame[['LAST_GAME_OE','LAST_GAME_HOME_WIN_PCTG','NUM_REST_DAYS','LAST_GAME_AWAY_WIN_PCTG','LAST_GAME_TOTAL_WIN_PCTG','LAST_GAME_ROLLING_SCORING_MARGIN','LAST_GAME_ROLLING_OE','W','TEAM_ID','GAME_ID','SEASON']]

        colRenameDict = {}
        for col in homeTeamFrame.columns:
            if (col != 'GAME_ID') & (col != 'SEASON') :
                colRenameDict[col] = 'HOME_' + col 

        homeTeamFrame.rename(columns=colRenameDict,inplace=True)

        return homeTeamFrame

    def getAwayTeamFrame(gameDF):
        awayTeamFrame = gameDF[gameDF['CITY'] == 'OPPONENTS']
        awayTeamFrame = awayTeamFrame[['LAST_GAME_OE','LAST_GAME_HOME_WIN_PCTG','NUM_REST_DAYS','LAST_GAME_AWAY_WIN_PCTG','LAST_GAME_TOTAL_WIN_PCTG','LAST_GAME_ROLLING_SCORING_MARGIN','LAST_GAME_ROLLING_OE','TEAM_ID','GAME_ID','SEASON']]

        colRenameDict = {}
        for col in awayTeamFrame.columns:
            if (col != 'GAME_ID') & (col != 'SEASON'):
                colRenameDict[col] = 'AWAY_' + col 

        awayTeamFrame.rename(columns=colRenameDict,inplace=True)

        return awayTeamFrame
    
    shiftGameLogRecords(gameLogs)
    awayTeamFrame = getAwayTeamFrame(gameLogs)
    homeTeamFrame = getHomeTeamFrame(gameLogs)
    
    return pd.merge(homeTeamFrame, awayTeamFrame, how="inner", on=[ "GAME_ID","SEASON"]).drop(['GAME_ID','AWAY_TEAM_ID','HOME_TEAM_ID'],axis=1)


In [23]:
modelData = getGameLogFeatureSet(gameLogs)


In [24]:
# Final Data Set before Train,Test, Validation Split
modelData

Unnamed: 0,HOME_LAST_GAME_OE,HOME_LAST_GAME_HOME_WIN_PCTG,HOME_NUM_REST_DAYS,HOME_LAST_GAME_AWAY_WIN_PCTG,HOME_LAST_GAME_TOTAL_WIN_PCTG,HOME_LAST_GAME_ROLLING_SCORING_MARGIN,HOME_LAST_GAME_ROLLING_OE,HOME_W,SEASON,AWAY_LAST_GAME_OE,AWAY_LAST_GAME_HOME_WIN_PCTG,AWAY_NUM_REST_DAYS,AWAY_LAST_GAME_AWAY_WIN_PCTG,AWAY_LAST_GAME_TOTAL_WIN_PCTG,AWAY_LAST_GAME_ROLLING_SCORING_MARGIN,AWAY_LAST_GAME_ROLLING_OE
0,0.555556,0.685714,3.0,0.444444,0.563380,9.333333,0.571405,1,2020-21,0.612903,0.250000,2.0,0.228571,0.239437,-2.000000,0.605315
1,0.573913,0.676471,1.0,0.444444,0.557143,-0.666667,0.589472,1,2020-21,0.500000,0.305556,2.0,0.303030,0.304348,-18.000000,0.512228
2,0.584746,0.666667,2.0,0.444444,0.550725,8.666667,0.640592,1,2020-21,0.612403,0.500000,2.0,0.428571,0.463768,0.666667,0.567718
3,0.609756,0.656250,4.0,0.444444,0.544118,11.333333,0.653327,1,2020-21,0.539568,0.500000,2.0,0.441176,0.470588,0.666667,0.570866
4,0.512397,0.354839,1.0,0.558824,0.461538,11.000000,0.596498,1,2020-21,0.727273,0.656250,1.0,0.457143,0.552239,17.333333,0.633081
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075,0.547826,0.363636,2.0,0.333333,0.348837,-9.666667,0.563248,1,2020-21,0.541667,0.350000,1.0,0.208333,0.272727,-7.000000,0.539031
1076,0.601626,0.416667,2.0,0.454545,0.434783,-5.333333,0.577983,1,2020-21,0.456140,0.222222,2.0,0.300000,0.263158,-11.666667,0.534768
1077,0.586207,0.400000,1.0,0.200000,0.292308,-10.000000,0.553596,0,2020-21,0.593220,0.548387,2.0,0.424242,0.484375,-2.000000,0.585593
1078,0.568966,0.551724,3.0,0.424242,0.483871,0.666667,0.584466,1,2020-21,0.553571,0.413793,2.0,0.205882,0.301587,-0.666667,0.542381


In [25]:
modelData.to_csv('nbaHomeWinLossModelDataset.csv')