In [None]:
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

root_path = 'drive/MyDrive/NBA-predictions'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# our data consists of all regular season games from 2013 to the end of the 2015 season
all_games = pd.read_csv(root_path +'/data/nba_games_2013_2015.csv', sep=';')
# sort games by GAME_DATE
all_games = all_games.sort_values(by=['GAME_DATE'])

# 2012 team stats
df_2012 = pd.read_csv(root_path +'/data/2012_team_averages.csv')

In [None]:
all_games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
7379,22013,1610612754,IND,Indiana Pacers,21300001,2013-10-29,IND vs. ORL,W,241,97,34,71,0.479,7,17,0.412,22,32,0.688,10,34,44,17,4,18,20,13,10.0
7374,22013,1610612746,LAC,Los Angeles Clippers,21300003,2013-10-29,LAC @ LAL,L,239,103,41,83,0.494,8,21,0.381,13,23,0.565,10,30,40,27,11,4,16,21,-13.0
7375,22013,1610612753,ORL,Orlando Magic,21300001,2013-10-29,ORL @ IND,L,241,87,36,93,0.387,9,19,0.474,6,10,0.6,13,26,39,17,10,6,17,26,-10.0
7378,22013,1610612741,CHI,Chicago Bulls,21300002,2013-10-29,CHI @ MIA,L,238,95,35,83,0.422,7,26,0.269,18,23,0.783,11,30,41,23,11,4,18,27,-12.0
7377,22013,1610612747,LAL,Los Angeles Lakers,21300003,2013-10-29,LAL vs. LAC,W,240,116,42,93,0.452,14,29,0.483,18,28,0.643,18,34,52,23,8,6,19,23,13.0


In [None]:
# create dictionary of team name and team abbreviation
team_name_list = list(all_games['TEAM_NAME'].unique())
team_abbrev_list = list(all_games['TEAM_ABBREVIATION'].unique())

# there is a mistake in our data, we will remove the incorrect name from the dictionary
team_name_list.remove('LA Clippers')
team_name_list.remove('Charlotte Bobcats')
team_name_list.remove('New Orleans Pelicans')
team_name_list.append('New Orleans Pelicans')

team_dict = {team_name_list[i]: team_abbrev_list[i] for i in range(len(team_name_list))}

#print(team_name_list)
#print(team_abbrev_list)
#team_dict

In [None]:
all_games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
7379,22013,1610612754,IND,Indiana Pacers,21300001,2013-10-29,IND vs. ORL,W,241,97,34,71,0.479,7,17,0.412,22,32,0.688,10,34,44,17,4,18,20,13,10.0
7374,22013,1610612746,LAC,Los Angeles Clippers,21300003,2013-10-29,LAC @ LAL,L,239,103,41,83,0.494,8,21,0.381,13,23,0.565,10,30,40,27,11,4,16,21,-13.0
7375,22013,1610612753,ORL,Orlando Magic,21300001,2013-10-29,ORL @ IND,L,241,87,36,93,0.387,9,19,0.474,6,10,0.6,13,26,39,17,10,6,17,26,-10.0
7378,22013,1610612741,CHI,Chicago Bulls,21300002,2013-10-29,CHI @ MIA,L,238,95,35,83,0.422,7,26,0.269,18,23,0.783,11,30,41,23,11,4,18,27,-12.0
7377,22013,1610612747,LAL,Los Angeles Lakers,21300003,2013-10-29,LAL vs. LAC,W,240,116,42,93,0.452,14,29,0.483,18,28,0.643,18,34,52,23,8,6,19,23,13.0


In [None]:
test = all_games[all_games.TEAM_ABBREVIATION=='MIA']
test.shape

(246, 28)

In [None]:
def set_moving_average_team_stats(team_abbreviation:str, n_games=5, df=all_games):
  '''
  Returns the moving average of the numeric columns of all_games for the last n_games (int).

  Returns a DataFrame
  '''
  temp_df = df.copy()

  df_filter = temp_df.TEAM_ABBREVIATION == team_abbreviation
  temp_df = temp_df[df_filter].sort_values(by=['GAME_DATE'])
  
  # create dictionary for mapping columns
  column_mapping = dict()
  for index, col in enumerate(temp_df.columns.to_list()[9:]):
    #print(index, col)
    column_mapping[col] = 'RA_'+ col

  for k, v in column_mapping.items():
    temp_df[v] = temp_df[k].rolling(window=n_games, center=False).mean()

  return temp_df

In [None]:
set_moving_average_team_stats('MIA').head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,RA_PTS,RA_FGM,RA_FGA,RA_FG_PCT,RA_FG3M,RA_FG3A,RA_FG3_PCT,RA_FTM,RA_FTA,RA_FT_PCT,RA_OREB,RA_DREB,RA_REB,RA_AST,RA_STL,RA_BLK,RA_TOV,RA_PF,RA_PLUS_MINUS
7376,22013,1610612748,MIA,Miami Heat,21300002,2013-10-29,MIA vs. CHI,W,239,107,37,72,0.514,11,20,0.55,22,29,0.759,5,35,40,26,10,7,18,21,12.0,,,,,,,,,,,,,,,,,,,
7361,22013,1610612748,MIA,Miami Heat,21300005,2013-10-30,MIA @ PHI,L,242,110,42,85,0.494,16,40,0.4,10,13,0.769,7,24,31,30,7,0,19,25,-4.0,,,,,,,,,,,,,,,,,,,
7341,22013,1610612748,MIA,Miami Heat,21300028,2013-11-01,MIA @ BKN,L,239,100,32,67,0.478,7,14,0.5,29,36,0.806,4,26,30,24,10,2,15,29,-1.0,,,,,,,,,,,,,,,,,,,
7297,22013,1610612748,MIA,Miami Heat,21300042,2013-11-03,MIA vs. WAS,W,241,103,37,70,0.529,11,27,0.407,18,27,0.667,8,24,32,32,9,6,16,18,10.0,,,,,,,,,,,,,,,,,,,
7263,22013,1610612748,MIA,Miami Heat,21300051,2013-11-05,MIA @ TOR,W,241,104,41,83,0.494,10,23,0.435,12,14,0.857,10,28,38,31,7,4,11,27,9.0,104.8,37.8,75.4,0.5018,11.0,24.8,0.4584,18.2,23.8,0.7716,6.8,27.4,34.2,28.6,8.6,3.8,15.8,24.0,5.2
7239,22013,1610612748,MIA,Miami Heat,21300070,2013-11-07,MIA vs. LAC,W,240,102,38,71,0.535,5,16,0.313,21,33,0.636,8,22,30,27,6,3,16,23,5.0,103.8,38.0,75.2,0.506,9.8,24.0,0.411,18.0,24.6,0.747,7.4,24.8,32.2,28.8,7.8,3.0,15.4,24.4,3.8
7205,22013,1610612748,MIA,Miami Heat,21300089,2013-11-09,MIA vs. BOS,L,240,110,41,71,0.577,5,10,0.5,23,34,0.676,4,27,31,26,4,7,10,13,-1.0,103.8,37.8,72.4,0.5226,7.6,18.0,0.431,20.6,28.8,0.7284,6.8,25.4,32.2,28.0,7.2,4.4,13.6,22.0,4.4
7161,22013,1610612748,MIA,Miami Heat,21300107,2013-11-12,MIA vs. MIL,W,242,118,46,79,0.582,12,23,0.522,14,24,0.583,5,35,40,29,9,7,13,17,23.0,107.4,40.6,74.8,0.5434,8.6,19.8,0.4354,17.6,26.4,0.6838,7.0,27.2,34.2,29.0,7.0,5.4,13.2,19.6,9.2
7117,22013,1610612748,MIA,Miami Heat,21300130,2013-11-15,MIA vs. DAL,W,240,110,40,74,0.541,5,13,0.385,25,30,0.833,4,30,34,17,19,5,16,15,6.0,108.8,41.2,75.6,0.5458,7.4,17.0,0.431,19.0,27.0,0.717,6.2,28.4,34.6,26.0,9.0,5.2,13.2,19.0,8.4
7107,22013,1610612748,MIA,Miami Heat,21300136,2013-11-16,MIA @ CHA,W,241,97,35,69,0.507,6,15,0.4,21,23,0.913,6,37,43,18,5,8,15,22,16.0,107.4,40.0,72.8,0.5484,6.6,15.4,0.424,20.8,28.8,0.7282,5.4,30.2,35.6,23.4,8.6,6.0,14.0,18.0,9.8


In [None]:
# # add the TEAM_ABBREVIATION column
# DEPRECATED temp_df = df_2012.copy()
# df_2012['TEAM_ABBREVIATION'] = temp_df['TEAM'].replace(team_dict)
# df_2012.head(2)

In [None]:
# DEPRECATED df_2012[df_2012.TEAM_ABBREVIATION == 'MIA']

In [None]:
# def get_historical_team_stats(team_abbreviation: str, home_team=True, historical_df=df_2012):
#   '''
#   (if home_team=True)
#   Returns the following stats for a team 
#     - 'HOME_PREV_SEAS_WIN_PERC',
#     - 'HOME_PREV_SEAS_AVG_PTS_PER_GAME',
#     - 'HOME_PREV_SEAS_FG_PERC',
#     - 'HOME_PREV_SEAS_THREE_PERC',
#     - 'HOME_PREV_SEAS_FREE_THROW_PERC',
#     - 'HOME_PREV_SEAS_PLUS_MINUS'
    
#     (if home_team=False)
#     Returns the following stats for a team 
#     - 'AWAY_PREV_SEAS_WIN_PERC',
#     - 'AWAY_PREV_SEAS_AVG_PTS_PER_GAME',
#     - 'AWAY_PREV_SEAS_FG_PERC',
#     - 'AWAY_PREV_SEAS_THREE_PERC',
#     - 'AWAY_PREV_SEAS_FREE_THROW_PERC',
#     - 'AWAY_PREV_SEAS_PLUS_MINUS'

#   '''
  
#   temp_df = historical_df.copy()

#   if home_team:
#     column_names = {
#                'WIN%': 'HOME_PREV_SEAS_WIN_PERC',
#                'PTS': 'HOME_PREV_SEAS_AVG_PTS_PER_GAME',
#                'FG%': 'HOME_PREV_SEAS_FG_PERC',
#                '3P%': 'HOME_PREV_SEAS_THREE_PERC',
#                'FT%': 'HOME_PREV_SEAS_FREE_THROW_PERC',
#                '+/-': 'HOME_PREV_SEAS_PLUS_MINUS'
#     }
  
#   else:
#       column_names = {
#                'WIN%': 'AWAY_PREV_SEAS_WIN_PERC',
#                'PTS': 'AWAY_PREV_SEAS_AVG_PTS_PER_GAME',
#                'FG%': 'AWAY_PREV_SEAS_FG_PERC',
#                '3P%': 'AWAY_PREV_SEAS_THREE_PERC',
#                'FT%': 'AWAY_PREV_SEAS_FREE_THROW_PERC',
#                '+/-': 'AWAY_PREV_SEAS_PLUS_MINUS'
#     }

#   df_filtered = temp_df[temp_df.TEAM_ABBREVIATION == team_abbreviation]
#   team_stats = df_filtered[['WIN%', 'PTS', 'FG%', '3P%', 'FT%', '+/-']]
#   team_stats = team_stats.rename(columns=column_names)
#   return team_stats

# #get_historical_team_stats(team_abbreviation='MIA',home_team=True)

In [None]:
def create_ma_dataframe(df=all_games):
  '''
  Creates a moving average dataframe, with 1 entry per game, while assigning the HOME and AWAY teams, HOME_WIN columns, and moving average stats.

  Returns a dataframe.
  '''
  temp_df = df.copy()
  games_df = pd.DataFrame()
  new_df = pd.DataFrame()

  for team_abbrev in list(temp_df.TEAM_ABBREVIATION.unique()):
    #print(team_abbrev)
    team_df = set_moving_average_team_stats(team_abbreviation=team_abbrev, n_games=5, df=temp_df)
    games_df = games_df.append(team_df)
  
  # DROP NA COLUMNS FROM MOVING AVERAGE
  # games_df = games_df.dropna()

  for game_id in games_df.GAME_ID.unique():
    #print(game_id)
    game_entry = dict() # will be used to insert rows into our dataframe
    game_entry['GAME_DATE'] = games_df[games_df.GAME_ID == game_id].iloc[0].GAME_DATE
    game_entry['GAME_ID'] = games_df[games_df.GAME_ID == game_id].iloc[0].GAME_ID

    # the '@' symbol from the MATCHUP column denotes the current entry is the AWAY_TEAM
    if '@' in games_df[games_df.GAME_ID == game_id].iloc[0].MATCHUP:
      game_entry['MATCHUP'] = games_df[games_df.GAME_ID == game_id].iloc[0].MATCHUP
      game_entry['HOME_TEAM'] = games_df[games_df.GAME_ID == game_id].iloc[0].MATCHUP[-3:]
      game_entry['AWAY_TEAM'] = games_df[games_df.GAME_ID == game_id].iloc[0].MATCHUP[:3]

      # rename RA_ columns to have HOME_ or AWAY_ prefixes
      # the last 19 columns need to be renamed
      #print(games_df[games_df.GAME_ID == game_id])
      #print(game_id)
      for col in games_df[games_df.GAME_ID == game_id].columns[-19:]:
        #print(col)
        #print(games_df[games_df.GAME_ID == game_id].iloc[0].TEAM_ABBREVIATION)
        game_entry['AWAY_'+col] = games_df[games_df.GAME_ID == game_id].iloc[0][col]
        game_entry['HOME_'+col] = games_df[games_df.GAME_ID == game_id].iloc[1][col]
        
    else:
      game_entry['MATCHUP'] = temp_df[temp_df.GAME_ID == game_id].iloc[1].MATCHUP
      game_entry['HOME_TEAM'] = temp_df[temp_df.GAME_ID == game_id].iloc[1].MATCHUP[-3:]
      game_entry['AWAY_TEAM'] = temp_df[temp_df.GAME_ID == game_id].iloc[1].MATCHUP[:3]

      for col in games_df[games_df.GAME_ID == game_id].columns[-19:]:
        #print(col)
        #print(games_df[games_df.GAME_ID == game_id].iloc[0].TEAM_ABBREVIATION)
        game_entry['AWAY_'+col] = games_df[games_df.GAME_ID == game_id].iloc[1][col]
        game_entry['HOME_'+col] = games_df[games_df.GAME_ID == game_id].iloc[0][col]

    # Set HOME_WIN column
    # Create code here to look up the team name in the current iloc[#], figure out if they won or loss, and if they are the home team
    #print(temp_df[temp_df.GAME_ID == game_id].iloc[0])

    # TEAM IS HOME
    if game_entry['HOME_TEAM'] == temp_df[temp_df.GAME_ID == game_id].iloc[0].TEAM_ABBREVIATION:
      if temp_df[temp_df.GAME_ID == game_id].iloc[0].WL == 'W':
        game_entry['HOME_WIN'] = 1
      else:
        game_entry['HOME_WIN'] = 0

    # TEAM IS AWAY    
    elif game_entry['AWAY_TEAM'] == temp_df[temp_df.GAME_ID == game_id].iloc[0].TEAM_ABBREVIATION:
      if temp_df[temp_df.GAME_ID == game_id].iloc[0].WL == 'W':
        game_entry['HOME_WIN'] = 0
      else:
        game_entry['HOME_WIN'] = 1

    
    new_entry = pd.Series(game_entry)
    #print(new_entry)
    new_df = new_df.append(new_entry, ignore_index=True)

  return new_df

# games_df = create_ma_dataframe()
# games_df = games_df.dropna()

In [None]:
#games_df

In [None]:
#games_df['HOME_WIN'].sum() / games_df['HOME_WIN'].count()

In [None]:
#games_df.to_csv(root_path +'/data/all_games_ppmadf.csv')

In [None]:
def create_training_data(df=all_games):
  '''
  Create a dataframe for training/testing using the df argument.

  Returns a DataFrame.
  '''

  AWAY_TEAM_DICT = {
      'ATL' : 'AWAY_ATL',
      'BKN' : 'AWAY_BKN',
      'BOS' : 'AWAY_BOS', 
      'CHA' : 'AWAY_CHA', 
      'CHI' : 'AWAY_CHI', 
      'CLE' : 'AWAY_CLE', 
      'DAL' : 'AWAY_DAL', 
      'DEN' : 'AWAY_DEN', 
      'DET' : 'AWAY_DET', 
      'GSW' : 'AWAY_GSW', 
      'HOU' : 'AWAY_HOU', 
      'IND' : 'AWAY_IND', 
      'LAC' : 'AWAY_LAC', 
      'LAL' : 'AWAY_LAL', 
      'MEM' : 'AWAY_MEM', 
      'MIA' : 'AWAY_MIA', 
      'MIL' : 'AWAY_MIL', 
      'MIN' : 'AWAY_MIN', 
      'NOP' : 'AWAY_NOP', 
      'NYK' : 'AWAY_NYK', 
      'OKC' : 'AWAY_OKC', 
      'ORL' : 'AWAY_ORL', 
      'PHI' : 'AWAY_PHI', 
      'PHX' : 'AWAY_PHX', 
      'POR' : 'AWAY_POR', 
      'SAC' : 'AWAY_SAC', 
      'SAS' : 'AWAY_SAS', 
      'TOR' : 'AWAY_TOR', 
      'UTA' : 'AWAY_UTA', 
      'WAS' : 'AWAY_WAS',  
  }
  AWAY_TEAM = pd.get_dummies(df.AWAY_TEAM)
  AWAY_TEAM = AWAY_TEAM.rename(columns=AWAY_TEAM_DICT)

  HOME_TEAM = pd.get_dummies(df.HOME_TEAM)

  df = df.drop(columns=[
                        'GAME_DATE',
                        'GAME_ID',
                        'MATCHUP',
                        'HOME_TEAM',
                        'AWAY_TEAM'
                        ])
  df_model = pd.concat((
      HOME_TEAM, 
      AWAY_TEAM,
      df
  ), axis=1)
  
  #min/max scale all of the data after the HOME/AWAY teams
  from sklearn import preprocessing
  mms = preprocessing.MinMaxScaler()
  
  for i in df_model.columns[60:]:
    df_model[i] = mms.fit_transform(df_model[i].values.reshape(-1,1))
  
  return df_model

# Data adjustments
### Combine Rolling Mean Columns: Offensive stats and Defensive stats

In this section we are going to try and simplify our model. 

I wanted to attempt to consolidate my data columns on my own, before trying PCA reduction (as PCA is much easier to implement).

I also want to re-define the training data, and use the entire 2013 and 2014 season as training data, while trying to predict the entire 2015 season. I believe this is a good metric for a few reasons:
1. My model will have more data to train from
2. The data will be more balanced for each team (I didn't fully investigate the X_train data, but in theory it's possible that the teams weren't represented equally)
3. 2015 was the year that Lebron James left the Miami Heat and went to the Cleveland Cavaliers - which should "shake things up"

In [None]:
# load the most recent data
all_games = pd.read_csv(root_path +'/data/all_games_ppmadf.csv')
all_games.GAME_DATE = pd.to_datetime(all_games.GAME_DATE)

In [None]:
all_games

Unnamed: 0,AWAY_RA_AST,AWAY_RA_BLK,AWAY_RA_DREB,AWAY_RA_FG3A,AWAY_RA_FG3M,AWAY_RA_FG3_PCT,AWAY_RA_FGA,AWAY_RA_FGM,AWAY_RA_FG_PCT,AWAY_RA_FTA,AWAY_RA_FTM,AWAY_RA_FT_PCT,AWAY_RA_OREB,AWAY_RA_PF,AWAY_RA_PLUS_MINUS,AWAY_RA_PTS,AWAY_RA_REB,AWAY_RA_STL,AWAY_RA_TOV,AWAY_TEAM,GAME_DATE,GAME_ID,HOME_RA_AST,HOME_RA_BLK,HOME_RA_DREB,HOME_RA_FG3A,HOME_RA_FG3M,HOME_RA_FG3_PCT,HOME_RA_FGA,HOME_RA_FGM,HOME_RA_FG_PCT,HOME_RA_FTA,HOME_RA_FTM,HOME_RA_FT_PCT,HOME_RA_OREB,HOME_RA_PF,HOME_RA_PLUS_MINUS,HOME_RA_PTS,HOME_RA_REB,HOME_RA_STL,HOME_RA_TOV,HOME_TEAM,HOME_WIN,MATCHUP
0,16.6,3.2,31.0,20.2,6.6,0.3330,79.6,33.8,0.4242,23.2,18.0,0.7736,13.4,23.4,-3.6,92.2,44.4,4.8,14.2,TOR,2013-11-08,21300074,22.0,8.0,35.4,22.2,8.4,0.3792,76.8,34.4,0.4494,22.0,17.0,0.7724,9.2,19.6,10.4,94.2,44.6,6.2,16.6,IND,1,TOR @ IND
1,23.0,6.2,35.4,20.4,7.8,0.3922,78.8,35.8,0.4562,20.0,15.0,0.7558,10.2,19.6,10.4,94.4,45.6,7.0,16.0,IND,2013-11-09,21300087,22.0,4.2,32.6,17.4,6.0,0.3540,79.8,36.6,0.4614,26.2,18.8,0.7506,9.4,25.4,-2.6,98.0,42.0,8.2,16.2,BKN,0,IND @ BKN
2,20.8,3.6,28.8,13.4,4.2,0.3346,77.2,36.0,0.4670,21.2,16.8,0.7930,9.6,23.2,-3.6,93.0,38.4,6.2,15.8,MEM,2013-11-11,21300099,23.2,6.8,34.4,18.6,7.0,0.3840,78.6,36.8,0.4704,19.6,15.0,0.7724,10.0,20.8,10.6,95.6,44.4,6.8,14.6,IND,1,MEM @ IND
3,18.0,4.8,28.8,21.0,9.6,0.4590,78.8,32.6,0.4176,19.2,16.2,0.8264,10.4,19.4,-11.2,91.0,39.2,6.4,17.0,IND,2013-11-15,21300125,20.8,7.2,33.2,17.6,6.2,0.3608,79.8,37.2,0.4674,19.2,16.0,0.8316,11.0,20.6,14.4,96.6,44.2,6.6,13.6,MIL,0,IND vs. MIL
4,19.8,7.4,31.2,17.2,5.6,0.3330,79.6,36.8,0.4636,20.6,16.8,0.8174,10.0,19.6,7.8,96.0,41.2,5.8,12.6,IND,2013-11-16,21300140,22.2,5.8,32.6,14.8,6.0,0.3950,77.4,34.8,0.4500,24.2,20.2,0.8364,11.2,22.0,10.8,95.8,43.8,7.4,15.2,CHI,1,IND @ CHI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3621,20.0,5.4,29.0,17.4,6.8,0.4038,79.4,36.0,0.4550,20.4,13.6,0.6856,10.2,23.4,-5.0,92.4,39.2,8.0,12.2,NOP,2014-02-21,21300815,26.6,6.6,35.2,20.6,8.0,0.3764,81.2,38.6,0.4766,26.8,18.2,0.6714,8.8,16.4,8.4,103.4,44.0,4.2,10.2,CHA,1,NOP @ CHA
3622,22.0,4.6,34.6,18.4,5.4,0.2992,81.6,34.2,0.4202,23.0,16.6,0.7348,9.2,16.2,-1.4,90.4,43.8,7.8,12.6,CHA,2014-11-04,21400054,21.6,8.0,34.2,18.0,5.2,0.3096,87.8,37.8,0.4326,25.4,17.4,0.7078,15.2,20.6,2.8,98.2,49.4,7.6,10.8,NOP,1,CHA @ NOP
3623,19.2,4.8,35.4,19.6,7.0,0.3692,81.4,37.8,0.4654,22.0,16.0,0.7406,11.0,17.8,3.8,98.6,46.4,7.6,14.6,NOP,2015-01-07,21400522,18.2,6.6,35.2,19.0,4.6,0.2466,81.0,34.2,0.4232,26.6,21.0,0.7952,11.8,19.2,-0.4,94.0,47.0,5.2,13.8,CHA,1,NOP @ CHA
3624,21.0,4.4,36.0,32.2,11.4,0.3526,84.4,35.4,0.4188,20.6,16.0,0.7696,8.6,18.0,-1.0,98.2,44.6,5.6,13.4,CHA,2016-01-15,21500598,23.4,4.6,34.2,28.4,10.2,0.3534,92.2,38.8,0.4210,17.4,13.4,0.7700,11.2,20.4,0.4,101.2,45.4,9.6,11.0,NOP,1,CHA @ NOP


In [None]:
df_filter = all_games.GAME_DATE < '2015-08-08'
train_data = create_training_data(df=all_games[df_filter])

df_filter = all_games.GAME_DATE > '2015-08-08'
test_data = create_training_data(df=all_games[df_filter])

In [None]:
train_data.to_csv(root_path +'/data/train_data_ppmadf.csv', index=False)
test_data.to_csv(root_path +'/data/test_data_ppmadf.csv', index=False)

In [None]:
X_train = train_data.drop(columns=['HOME_WIN'])
y_train = train_data['HOME_WIN']

X_test = test_data.drop(columns=['HOME_WIN'])
y_test = test_data['HOME_WIN']

In [None]:
X_train

Unnamed: 0,ATL,BKN,BOS,CHA,CHI,CLE,DAL,DEN,DET,GSW,HOU,IND,LAC,LAL,MEM,MIA,MIL,MIN,NOP,NYK,OKC,ORL,PHI,PHX,POR,SAC,SAS,TOR,UTA,WAS,AWAY_ATL,AWAY_BKN,AWAY_BOS,AWAY_CHA,AWAY_CHI,AWAY_CLE,AWAY_DAL,AWAY_DEN,AWAY_DET,AWAY_GSW,...,AWAY_UTA,AWAY_WAS,AWAY_RA_AST,AWAY_RA_BLK,AWAY_RA_DREB,AWAY_RA_FG3A,AWAY_RA_FG3M,AWAY_RA_FG3_PCT,AWAY_RA_FGA,AWAY_RA_FGM,AWAY_RA_FG_PCT,AWAY_RA_FTA,AWAY_RA_FTM,AWAY_RA_FT_PCT,AWAY_RA_OREB,AWAY_RA_PF,AWAY_RA_PLUS_MINUS,AWAY_RA_PTS,AWAY_RA_REB,AWAY_RA_STL,AWAY_RA_TOV,HOME_RA_AST,HOME_RA_BLK,HOME_RA_DREB,HOME_RA_FG3A,HOME_RA_FG3M,HOME_RA_FG3_PCT,HOME_RA_FGA,HOME_RA_FGM,HOME_RA_FG_PCT,HOME_RA_FTA,HOME_RA_FTM,HOME_RA_FT_PCT,HOME_RA_OREB,HOME_RA_PF,HOME_RA_PLUS_MINUS,HOME_RA_PTS,HOME_RA_REB,HOME_RA_STL,HOME_RA_TOV
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0.107527,0.181818,0.445652,0.368056,0.377049,0.435316,0.353383,0.307692,0.333333,0.393103,0.360,0.615542,0.628571,0.5875,0.468619,0.330396,0.531915,0.129630,0.460526,0.395604,0.66,0.670455,0.449664,0.460317,0.588902,0.270073,0.355556,0.472141,0.355828,0.350000,0.585456,0.371429,0.413333,0.704120,0.346491,0.536232,0.275862,0.636364
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0.451613,0.522727,0.684783,0.375000,0.475410,0.616800,0.323308,0.417582,0.503727,0.282759,0.240,0.570041,0.400000,0.3500,0.761506,0.378855,0.574468,0.333333,0.578947,0.395604,0.28,0.511364,0.288591,0.269841,0.513723,0.379562,0.477778,0.530792,0.484663,0.425000,0.527601,0.385714,0.800000,0.460674,0.429825,0.442029,0.448276,0.610390
2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0.333333,0.227273,0.326087,0.131944,0.180328,0.440221,0.263158,0.428571,0.561235,0.324138,0.312,0.665133,0.357143,0.5750,0.468619,0.348018,0.319149,0.259259,0.565789,0.461538,0.54,0.613636,0.328859,0.349206,0.603222,0.335766,0.488889,0.574780,0.282209,0.266667,0.585456,0.428571,0.493333,0.707865,0.377193,0.528986,0.327586,0.506494
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0.182796,0.363636,0.326087,0.395833,0.622951,0.821582,0.323308,0.241758,0.298190,0.255172,0.288,0.750511,0.414286,0.3375,0.309623,0.303965,0.347518,0.277778,0.644737,0.329670,0.58,0.545455,0.295302,0.285714,0.534010,0.379562,0.511111,0.560117,0.269939,0.308333,0.742569,0.500000,0.480000,0.779026,0.399123,0.521739,0.310345,0.441558
4,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0.279570,0.659091,0.456522,0.263889,0.295082,0.435316,0.353383,0.472527,0.543131,0.303448,0.312,0.727505,0.385714,0.3500,0.707113,0.414097,0.418440,0.222222,0.355263,0.406593,0.44,0.511364,0.201342,0.269841,0.636038,0.291971,0.377778,0.475073,0.423313,0.483333,0.755308,0.514286,0.573333,0.711610,0.381579,0.507246,0.379310,0.545455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3614,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0.505376,0.431818,0.576087,0.416667,0.540984,0.604537,0.624060,0.516484,0.387646,0.393103,0.408,0.737730,0.785714,0.1375,0.560669,0.568282,0.695035,0.129630,0.276316,0.175824,0.56,0.590909,0.382550,0.301587,0.408115,0.474453,0.477778,0.442815,0.325153,0.366667,0.667728,0.457143,0.493333,0.494382,0.407895,0.528986,0.362069,0.467532
3615,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0.451613,0.431818,0.804348,0.388889,0.475410,0.546291,0.646617,0.714286,0.595314,0.627586,0.624,0.752045,0.757143,0.4625,0.841004,0.828194,0.829787,0.314815,0.473684,0.417582,0.72,0.534091,0.442953,0.365079,0.495227,0.467153,0.300000,0.264907,0.349693,0.375000,0.678344,0.314286,0.453333,0.370787,0.289474,0.420290,0.241379,0.285714
3621,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0.290323,0.431818,0.336957,0.270833,0.393443,0.652361,0.345865,0.428571,0.497338,0.296552,0.184,0.390593,0.400000,0.5875,0.439331,0.334802,0.347518,0.425926,0.328947,0.648352,0.52,0.659091,0.395973,0.428571,0.580549,0.430657,0.588889,0.605083,0.503067,0.400000,0.317410,0.342857,0.200000,0.666667,0.548246,0.514493,0.103448,0.220779
3622,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,...,0,0,0.397849,0.340909,0.641304,0.305556,0.278689,0.331698,0.428571,0.329670,0.312034,0.386207,0.304,0.516360,0.328571,0.1375,0.514644,0.290749,0.510638,0.407407,0.355263,0.373626,0.66,0.602273,0.308725,0.206349,0.381265,0.671533,0.544444,0.390029,0.460123,0.366667,0.414013,0.800000,0.480000,0.561798,0.434211,0.710145,0.396552,0.259740


In [None]:
X_test

Unnamed: 0,AWAY_RA_AST,AWAY_RA_BLK,AWAY_RA_DREB,AWAY_RA_FG3A,AWAY_RA_FG3M,AWAY_RA_FG3_PCT,AWAY_RA_FGA,AWAY_RA_FGM,AWAY_RA_FG_PCT,AWAY_RA_FTA,AWAY_RA_FTM,AWAY_RA_FT_PCT,AWAY_RA_OREB,AWAY_RA_PF,AWAY_RA_PLUS_MINUS,AWAY_RA_PTS,AWAY_RA_REB,AWAY_RA_STL,AWAY_RA_TOV,AWAY_TEAM,GAME_DATE,GAME_ID,HOME_RA_AST,HOME_RA_BLK,HOME_RA_DREB,HOME_RA_FG3A,HOME_RA_FG3M,HOME_RA_FG3_PCT,HOME_RA_FGA,HOME_RA_FGM,HOME_RA_FG_PCT,HOME_RA_FTA,HOME_RA_FTM,HOME_RA_FT_PCT,HOME_RA_OREB,HOME_RA_PF,HOME_RA_PLUS_MINUS,HOME_RA_PTS,HOME_RA_REB,HOME_RA_STL,HOME_RA_TOV,HOME_TEAM,MATCHUP
159,22.8,4.8,34.6,23.2,9.4,0.4206,85.4,36.8,0.4352,24.0,17.8,0.7396,10.6,24.0,0.2,100.8,45.2,7.2,12.0,IND,2015-10-28,21500009,19.4,3.6,33.0,26.2,9.2,0.3548,82.8,36.6,0.4438,23.0,17.4,0.8164,10.6,20.8,3.0,99.8,43.6,8.0,14.2,TOR,IND @ TOR
160,20.2,3.8,29.8,13.0,3.6,0.2680,83.0,36.2,0.4376,25.2,19.2,0.7590,13.6,20.4,-4.2,95.2,43.4,9.8,15.0,IND,2015-10-29,21500018,21.6,5.4,33.6,25.4,9.6,0.3848,85.2,36.6,0.4338,24.4,17.2,0.6994,10.8,24.2,-2.4,100.0,44.4,7.4,14.0,MEM,IND vs. MEM
161,19.2,6.0,34.0,19.0,6.2,0.3366,86.8,37.8,0.4390,21.6,14.8,0.6814,11.6,22.2,7.0,96.6,45.6,9.8,12.8,UTA,2015-10-31,21500033,18.6,6.2,30.4,24.8,8.4,0.3318,83.4,33.8,0.4074,21.6,16.0,0.7260,10.4,22.8,-9.0,92.0,40.8,8.2,16.4,IND,UTA @ IND
162,18.4,5.4,28.0,23.4,8.0,0.3326,80.0,33.4,0.4176,20.6,16.2,0.7918,10.0,23.2,-7.4,91.0,38.0,10.2,16.8,IND,2015-11-03,21500054,18.2,3.4,35.2,24.8,8.0,0.3216,88.2,36.0,0.4110,26.4,18.0,0.6784,16.2,19.4,6.2,98.0,51.4,7.6,14.8,DET,IND @ DET
163,26.4,5.0,31.0,27.2,8.8,0.3224,87.2,36.6,0.4216,23.6,19.0,0.7836,11.2,22.4,0.4,101.0,42.2,10.0,17.0,IND,2015-11-04,21500060,19.0,4.0,29.8,23.2,8.0,0.3356,82.6,34.0,0.4126,23.4,18.4,0.7970,11.6,22.0,-4.6,94.4,41.4,10.8,17.2,BOS,IND vs. BOS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3618,22.2,3.6,34.2,32.8,11.8,0.3670,84.8,37.2,0.4384,22.6,18.8,0.8202,8.0,20.0,-0.2,105.0,42.2,6.2,12.8,CHA,2016-01-20,21500636,27.4,5.6,34.6,26.0,8.4,0.3158,87.8,41.0,0.4678,24.6,17.4,0.7376,13.6,20.8,16.8,107.8,48.2,10.4,14.2,OKC,CHA @ OKC
3619,19.2,2.8,29.8,20.4,6.0,0.2886,87.8,39.4,0.4486,21.4,13.4,0.6320,9.0,20.0,-5.2,98.2,38.8,8.4,7.0,NOP,2016-02-11,21500802,24.6,6.4,34.6,27.2,10.0,0.3598,86.4,42.0,0.4864,28.4,22.4,0.7910,13.6,20.0,10.6,116.4,48.2,7.6,15.6,OKC,NOP @ OKC
3620,26.2,5.8,34.0,26.2,8.4,0.3180,89.4,42.8,0.4804,19.2,15.2,0.8002,11.0,21.2,1.8,109.2,45.0,5.6,13.4,OKC,2016-02-25,21500857,21.6,3.6,32.0,19.2,6.4,0.3322,85.6,39.2,0.4572,28.8,23.0,0.8062,10.2,20.2,-6.0,107.8,42.2,8.6,13.2,NOP,OKC @ NOP
3624,21.0,4.4,36.0,32.2,11.4,0.3526,84.4,35.4,0.4188,20.6,16.0,0.7696,8.6,18.0,-1.0,98.2,44.6,5.6,13.4,CHA,2016-01-15,21500598,23.4,4.6,34.2,28.4,10.2,0.3534,92.2,38.8,0.4210,17.4,13.4,0.7700,11.2,20.4,0.4,101.2,45.4,9.6,11.0,NOP,CHA @ NOP
