In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import scale 
from sklearn.linear_model import Ridge, Lasso, RidgeCV, LassoCV
from sklearn.metrics import mean_squared_error

from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import teams

# Gets the box score of every single game (NBA + WNBA + others: nba.com doesn't seperate the leagues) 
# from 2014-2021 into a dataframe.
all_games_finder = leaguegamefinder.LeagueGameFinder()
all_games = all_games_finder.get_data_frames()[0]

# Creates empty dataframe with the same column names passed in from the dataframe containing all the games.
column_names = all_games.columns
games = pd.DataFrame(columns = column_names)

# Appends every game containing an NBA team in the all_games df to the games df. 
nba_teams = teams.get_teams()
for team in nba_teams:
    temp_id = team['id']
    games = games.append(all_games[all_games['TEAM_ID'] == temp_id])

pd.set_option('display.max_columns', None)
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
62,22021,1610612737,ATL,Atlanta Hawks,22100293,2021-11-27,ATL vs. NYK,L,240,90,33,93,0.355,9,37,0.243,15,20,0.75,13,39,52,18,8,6,6,17,-9.0
97,22021,1610612737,ATL,Atlanta Hawks,22100285,2021-11-26,ATL @ MEM,W,239,132,52,89,0.584,13,27,0.481,15,21,0.714,9,40,49,33,8,5,12,15,32.0
141,22021,1610612737,ATL,Atlanta Hawks,22100277,2021-11-24,ATL @ SAS,W,239,124,45,88,0.511,12,26,0.462,22,24,0.917,8,36,44,26,10,5,9,11,18.0
187,22021,1610612737,ATL,Atlanta Hawks,22100255,2021-11-22,ATL vs. OKC,W,239,113,42,87,0.483,14,34,0.412,15,16,0.938,8,36,44,25,6,6,7,16,12.0
210,22021,1610612737,ATL,Atlanta Hawks,22100242,2021-11-20,ATL vs. CHA,W,241,115,43,82,0.524,12,34,0.353,17,21,0.81,8,38,46,24,6,6,12,22,10.0


In [2]:
# Dropping any game (two rows in DF) that has any NaN values or is missing either team's stats
games.isna()
games.dropna(inplace=True)

games = games[games.duplicated(subset = ['GAME_ID'], keep=False)]

# Merging games together (previously seperated in the DF by team: each team's stats from the game were kept in seperate rows
games = games.sort_values(by=['GAME_ID'])
games = games.reset_index(drop=True)

# Team A and B each have a row for their stats in a given matchup; we need to add both stats to the end of their respective rows
# Team A dataframe
tempA = games[games.index % 2 == 0]
tempA2 = games[games.index % 2 == 1]

tempA2 = tempA2.add_prefix('OPP_')

tempA = tempA.reset_index(drop=True)
tempA2 = tempA2.reset_index(drop=True)

a_temp = tempA.join(tempA2)

# Team B dataframe
tempB = games[games.index % 2 == 0]
tempB2 = games[games.index % 2 == 1]

tempB = tempB.add_prefix('OPP_')

tempB = tempB.reset_index(drop=True)
tempB2 = tempB2.reset_index(drop=True)

b_temp = tempB2.join(tempB)

# Adding both teams to main dataframe
games = a_temp.append(b_temp)

# Resorting main dataframe
games = games.sort_values(by=['GAME_ID'])
games = games.reset_index(drop=True)

# Sending data to CSV
games.to_csv('games.csv', index = False)

# Print Head
pd.set_option('display.max_columns', None)
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,OPP_SEASON_ID,OPP_TEAM_ID,OPP_TEAM_ABBREVIATION,OPP_TEAM_NAME,OPP_GAME_ID,OPP_GAME_DATE,OPP_MATCHUP,OPP_WL,OPP_MIN,OPP_PTS,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_STL,OPP_BLK,OPP_TOV,OPP_PF,OPP_PLUS_MINUS
0,12015,1610612746,LAC,LA Clippers,11500001,2015-10-02,LAC vs. DEN,W,238,103,37,91,0.407,9,32,0.281,20,26,0.769,9,30,39,22,12,6,13,15,7.0,12015,1610612743,DEN,Denver Nuggets,11500001,2015-10-02,DEN @ LAC,L,240,96,37,87,0.425,7,18,0.389,15,20,0.75,14,43,57,16,9,2,23,26,-7.0
1,12015,1610612743,DEN,Denver Nuggets,11500001,2015-10-02,DEN @ LAC,L,240,96,37,87,0.425,7,18,0.389,15,20,0.75,14,43,57,16,9,2,23,26,-7.0,12015,1610612746,LAC,LA Clippers,11500001,2015-10-02,LAC vs. DEN,W,238,103,37,91,0.407,9,32,0.281,20,26,0.769,9,30,39,22,12,6,13,15,7.0
2,12015,1610612753,ORL,Orlando Magic,11500002,2015-10-03,ORL vs. CHA,L,240,100,39,89,0.438,6,26,0.231,16,19,0.842,12,33,45,22,11,3,12,30,-6.0,12015,1610612766,CHA,Charlotte Hornets,11500002,2015-10-03,CHA @ ORL,W,241,106,34,75,0.453,12,31,0.387,26,35,0.743,7,34,41,30,7,2,13,15,6.0
3,12015,1610612766,CHA,Charlotte Hornets,11500002,2015-10-03,CHA @ ORL,W,241,106,34,75,0.453,12,31,0.387,26,35,0.743,7,34,41,30,7,2,13,15,6.0,12015,1610612753,ORL,Orlando Magic,11500002,2015-10-03,ORL vs. CHA,L,240,100,39,89,0.438,6,26,0.231,16,19,0.842,12,33,45,22,11,3,12,30,-6.0
4,12015,1610612754,IND,Indiana Pacers,11500003,2015-10-03,IND vs. NOP,L,241,105,38,92,0.413,10,35,0.286,19,31,0.613,18,44,62,21,10,16,19,27,-5.0,12015,1610612740,NOP,New Orleans Pelicans,11500003,2015-10-03,NOP @ IND,W,242,110,37,103,0.359,11,32,0.344,25,37,0.676,19,36,55,16,9,3,11,22,5.0


In [3]:
temp_games = pd.DataFrame()
nba_teams = teams.get_teams()
for team in nba_teams:
    temp_id = team['id']
    temp_games = temp_games.append(games[games['TEAM_ID'] == temp_id])

games = temp_games

# Print Head
pd.set_option('display.max_columns', None)
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,OPP_SEASON_ID,OPP_TEAM_ID,OPP_TEAM_ABBREVIATION,OPP_TEAM_NAME,OPP_GAME_ID,OPP_GAME_DATE,OPP_MATCHUP,OPP_WL,OPP_MIN,OPP_PTS,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_STL,OPP_BLK,OPP_TOV,OPP_PF,OPP_PLUS_MINUS
28,12015,1610612737,ATL,Atlanta Hawks,11500017,2015-10-07,ATL @ CLE,W,238,98,33,75,0.44,7,24,0.292,25,30,0.833,9,37,46,17,8,2,17,19,2.0,12015,1610612739,CLE,Cleveland Cavaliers,11500017,2015-10-07,CLE vs. ATL,L,239,96,35,83,0.422,14,37,0.378,12,15,0.8,7,32,39,28,10,7,17,29,-2.0
53,12015,1610612737,ATL,Atlanta Hawks,11500032,2015-10-09,ATL @ NOP,W,240,103,33,74,0.446,11,26,0.423,26,36,0.722,5,43,48,21,8,5,16,23,10.0,12015,1610612740,NOP,New Orleans Pelicans,11500032,2015-10-09,NOP vs. ATL,L,238,93,32,84,0.381,7,27,0.259,22,31,0.71,6,38,44,15,8,7,13,26,-10.0
102,12015,1610612737,ATL,Atlanta Hawks,11500060,2015-10-14,ATL vs. SAS,W,240,100,32,78,0.41,11,31,0.355,25,29,0.862,3,45,48,24,10,4,16,26,14.0,12015,1610612759,SAS,San Antonio Spurs,11500060,2015-10-14,SAS @ ATL,L,240,86,27,82,0.329,4,20,0.2,28,36,0.778,11,39,50,17,9,2,18,25,-14.0
118,12015,1610612737,ATL,Atlanta Hawks,11500068,2015-10-16,ATL @ DAL,W,241,91,31,84,0.369,9,28,0.321,20,24,0.833,9,40,49,18,9,7,15,14,7.0,12015,1610612742,DAL,Dallas Mavericks,11500068,2015-10-16,DAL vs. ATL,L,241,84,31,85,0.365,9,33,0.273,13,17,0.765,8,41,49,18,8,6,18,19,-7.0
143,12015,1610612737,ATL,Atlanta Hawks,11500081,2015-10-18,ATL vs. MIA,L,240,92,33,75,0.44,6,22,0.273,20,22,0.909,8,36,44,23,6,9,22,24,-9.0,12015,1610612748,MIA,Miami Heat,11500081,2015-10-18,MIA @ ATL,W,240,101,36,92,0.391,10,30,0.333,19,25,0.76,15,29,44,26,8,2,13,16,9.0


In [4]:
# Storing some columns for future use
game_ids = games['GAME_ID'].values
team_ids = games['TEAM_ID'].values
minutes = games['MIN'].values
abrv = games['TEAM_ABBREVIATION'].values
spread = games['PLUS_MINUS'].values

# Dropping Non-essential categorical data
games = games.drop(columns=['SEASON_ID', 'OPP_SEASON_ID', 'OPP_TEAM_ID','GAME_ID', 'OPP_GAME_ID', 'TEAM_ABBREVIATION', 'OPP_TEAM_ABBREVIATION', 'TEAM_NAME', 'OPP_TEAM_NAME', 'MATCHUP', 'OPP_MATCHUP', 'WL', 'OPP_WL', 'GAME_DATE', 'OPP_GAME_DATE', 'MIN', 'OPP_MIN']) 

# Adding minutes back as a single column
games['MIN'] = minutes

# Print Head
pd.set_option('display.max_columns', None)
games.head()

Unnamed: 0,TEAM_ID,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,OPP_PTS,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_STL,OPP_BLK,OPP_TOV,OPP_PF,OPP_PLUS_MINUS,MIN
28,1610612737,98,33,75,0.44,7,24,0.292,25,30,0.833,9,37,46,17,8,2,17,19,2.0,96,35,83,0.422,14,37,0.378,12,15,0.8,7,32,39,28,10,7,17,29,-2.0,238
53,1610612737,103,33,74,0.446,11,26,0.423,26,36,0.722,5,43,48,21,8,5,16,23,10.0,93,32,84,0.381,7,27,0.259,22,31,0.71,6,38,44,15,8,7,13,26,-10.0,240
102,1610612737,100,32,78,0.41,11,31,0.355,25,29,0.862,3,45,48,24,10,4,16,26,14.0,86,27,82,0.329,4,20,0.2,28,36,0.778,11,39,50,17,9,2,18,25,-14.0,240
118,1610612737,91,31,84,0.369,9,28,0.321,20,24,0.833,9,40,49,18,9,7,15,14,7.0,84,31,85,0.365,9,33,0.273,13,17,0.765,8,41,49,18,8,6,18,19,-7.0,241
143,1610612737,92,33,75,0.44,6,22,0.273,20,22,0.909,8,36,44,23,6,9,22,24,-9.0,101,36,92,0.391,10,30,0.333,19,25,0.76,15,29,44,26,8,2,13,16,9.0,240


In [5]:
# Adding Advanced Stats to enhance model performance, formulas were gathered from various sources. 

# Efficient Field Goal Percentage
games['EFG%'] = (games['FGM'] + (.5 * games['FG3M'])) / games['FGA']
games['OPP_EFG%'] = (games['OPP_FGM'] + (.5 * games['OPP_FG3M'])) / games['OPP_FGA']

# Block Percentage
games['BLK%'] = (games['BLK'] / (games['OPP_FGA']-games['OPP_FG3A']))
games['OPP_BLK%'] = (games['OPP_BLK'] / (games['FGA']-games['FG3A']))

# Turnover Percentage
games['TOV%'] = games['TOV'] / (games['FGA'] + 0.44 * games['FTA'] + games['TOV'])
games['OPP_TOV%'] = games['OPP_TOV'] / (games['OPP_FGA'] + 0.44 * games['OPP_FTA'] + games['OPP_TOV'])

#Offensive Rebound Percentage
games['ORB%'] = games['OREB'] / (games['OREB'] + games['OPP_DREB'])
games['OPP_ORB%'] = games['OPP_OREB'] / (games['OPP_OREB'] + games['DREB'])

#Defensive Rebound Percentage
games['DREB%'] = games['DREB'] / (games['OPP_OREB'] + games['DREB'])
games['OPP_DREB%'] = games['OPP_DREB'] / (games['OREB'] + games['OPP_DREB'])

# Possessions
#games["POSS"] = 0.5*((games["FGA"] + 0.4*games["FTA"] - 1.07*(games["OREB"]/(games["OREB"] + games["OREB_other"])) * (games["FGA"] - games["FGM"]) + games["TOV"]) + games["FGA_other"] + 0.4*games["FTA_other"] - 1.07 * (games["OREB_other"] / (games["OREB_other"] + games["DREB"])) * (games["FGA_other"] - games["FGM_other]) + games["TOV_other"]))

# ***Placeholder POSS***
games['POSS'] = 0.96*((games['FGA']) + games['TOV'] + 0.44 * games['FTA'] - games['OREB'])
games['OPP_POSS'] = 0.96*((games['OPP_FGA']) + games['OPP_TOV'] + 0.44 * games['OPP_FTA'] - games['OPP_OREB'])

# Steals Percentage
games['STL%'] = (games['STL'] / games['OPP_POSS'])
games['OPP_STL%'] = (games['OPP_STL'] / games['POSS'])

# Free Throw Rate
games['FTR'] = games['FTM'] / games['FGA']
games['OPP_FTR'] = games['OPP_FTM'] / games['OPP_FGA']

# True Shooting (Requires True Shooting Attempts)
tsa = games['FGA'] + 0.44 * games['FTA']
OPP_tsa = games['OPP_FGA'] + 0.44 * games['OPP_FTA']
games['TS'] = games['PTS'] / (2 * tsa)
games['OPP_TS'] = games['OPP_PTS'] / (2 * OPP_tsa)

# Assist Rate
games['ASTR'] = games['AST'] / (games['FGA'] + (.44 * games['FTA']) + games['AST'] + games['TOV'])
games['OPP_ASTR'] = games['OPP_AST'] / (games['OPP_FGA'] + (.44 * games['OPP_FTA']) + games['OPP_AST'] + games['OPP_TOV'])

# Total Rebound Percentage
games['TRB%'] = (games['REB'] * (games['REB'] / 5)) / (games['MIN'] * (games['REB'] + games['OPP_REB']))
games['OPP_TRB%'] = (games['OPP_REB'] * (games['OPP_REB'] / 5)) / (games['MIN'] * (games['OPP_REB'] + games['REB']))

# PACE
games['PACE'] = 48 * (games['POSS'] + games['OPP_POSS']) / (2 * (games['MIN'] / 5))
games['OPP_PACE'] = 48 * (games['OPP_POSS'] + games['POSS']) / (2 * (games['MIN'] / 5))

# Offensive Rating
games['ORTG'] = (games['PTS'] / games['POSS'])
games['OPP_ORTG'] = (games['OPP_PTS'] / games['OPP_POSS'])

# Defensive Rating
games['DRTG'] = (games['OPP_PTS'] / games['POSS'])
games['OPP_DRTG'] = (games['PTS'] / games['OPP_POSS'])

In [6]:
# chaning stuff
games_averages = pd.concat([games.rolling(10).mean() for _, d in games.groupby('TEAM_ID')])

# Converting entire DF to numeric
games_averages = games_averages.apply(pd.to_numeric, errors='coerce')

# Print Head
pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_columns', None)
games_averages.head(30)

Unnamed: 0,TEAM_ID,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,OPP_PTS,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_STL,OPP_BLK,OPP_TOV,OPP_PF,OPP_PLUS_MINUS,MIN,EFG%,OPP_EFG%,BLK%,OPP_BLK%,TOV%,OPP_TOV%,ORB%,OPP_ORB%,DREB%,OPP_DREB%,POSS,OPP_POSS,STL%,OPP_STL%,FTR,OPP_FTR,TS,OPP_TS,ASTR,OPP_ASTR,TRB%,OPP_TRB%,PACE,OPP_PACE,ORTG,OPP_ORTG,DRTG,OPP_DRTG
28,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
53,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
102,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
118,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
143,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
171,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
196,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
238,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
263,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
272,1610612737.0,94.6,33.9,79.9,0.42,7.4,23.4,0.31,19.4,24.6,0.8,8.2,37.9,46.1,21.8,8.2,5.2,17.0,20.8,1.1,93.5,33.2,83.8,0.4,8.6,28.2,0.29,18.5,24.8,0.76,9.2,34.8,44.0,21.2,8.4,5.0,14.8,22.9,-1.1,240.0,0.47,0.45,0.09,0.09,0.16,0.14,0.19,0.19,0.81,0.81,95.54,96.3,0.09,0.09,0.25,0.22,0.52,0.5,0.17,0.16,0.02,0.02,95.92,95.92,0.99,0.97,0.98,0.98


In [7]:
# games DF will now have the average stats of a teams most recent 10 games
games = games_average

# # Normalize Data
# realcols = list(games.columns.values)

# for col in realcols:
#   mean = games[col].mean()
#   std = games[col].std()
#   games[col] = (games[col] - mean)/std

# Sending data to CSV
games.to_csv('games_updated.csv', index = False)

# Print head
pd.set_option('display.max_columns', None)
games.head()

Unnamed: 0,TEAM_ID,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,OPP_PTS,OPP_FGM,OPP_FGA,OPP_FG_PCT,OPP_FG3M,OPP_FG3A,OPP_FG3_PCT,OPP_FTM,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_STL,OPP_BLK,OPP_TOV,OPP_PF,OPP_PLUS_MINUS,MIN,EFG%,OPP_EFG%,BLK%,OPP_BLK%,TOV%,OPP_TOV%,ORB%,OPP_ORB%,DREB%,OPP_DREB%,POSS,OPP_POSS,STL%,OPP_STL%,FTR,OPP_FTR,TS,OPP_TS,ASTR,OPP_ASTR,TRB%,OPP_TRB%,PACE,OPP_PACE,ORTG,OPP_ORTG,DRTG,OPP_DRTG
28,1610612737,98,33,75,0.44,7,24,0.29,25,30,0.83,9,37,46,17,8,2,17,19,2.0,96,35,83,0.42,14,37,0.38,12,15,0.8,7,32,39,28,10,7,17,29,-2.0,238,0.49,0.51,0.04,0.14,0.16,0.16,0.22,0.16,0.84,0.78,92.35,95.62,0.08,0.11,0.33,0.14,0.56,0.54,0.14,0.21,0.02,0.02,94.77,94.77,1.06,1.0,1.04,1.02
53,1610612737,103,33,74,0.45,11,26,0.42,26,36,0.72,5,43,48,21,8,5,16,23,10.0,93,32,84,0.38,7,27,0.26,22,31,0.71,6,38,44,15,8,7,13,26,-10.0,240,0.52,0.42,0.09,0.15,0.15,0.12,0.12,0.12,0.88,0.88,96.81,100.45,0.08,0.08,0.35,0.26,0.57,0.48,0.17,0.12,0.02,0.02,98.63,98.63,1.06,0.93,0.96,1.03
102,1610612737,100,32,78,0.41,11,31,0.35,25,29,0.86,3,45,48,24,10,4,16,26,14.0,86,27,82,0.33,4,20,0.2,28,36,0.78,11,39,50,17,9,2,18,25,-14.0,240,0.48,0.35,0.06,0.04,0.15,0.16,0.07,0.2,0.8,0.93,99.61,100.65,0.1,0.09,0.32,0.34,0.55,0.44,0.18,0.13,0.02,0.02,100.13,100.13,1.0,0.85,0.86,0.99
118,1610612737,91,31,84,0.37,9,28,0.32,20,24,0.83,9,40,49,18,9,7,15,14,7.0,84,31,85,0.36,9,33,0.27,13,17,0.77,8,41,49,18,8,6,18,19,-7.0,241,0.42,0.42,0.13,0.11,0.14,0.16,0.18,0.17,0.83,0.82,96.54,98.38,0.09,0.08,0.24,0.15,0.48,0.45,0.14,0.14,0.02,0.02,97.05,97.05,0.94,0.85,0.87,0.92
143,1610612737,92,33,75,0.44,6,22,0.27,20,22,0.91,8,36,44,23,6,9,22,24,-9.0,101,36,92,0.39,10,30,0.33,19,25,0.76,15,29,44,26,8,2,13,16,9.0,240,0.48,0.45,0.15,0.04,0.21,0.11,0.22,0.29,0.71,0.78,94.73,96.96,0.06,0.08,0.27,0.21,0.54,0.49,0.18,0.18,0.02,0.02,95.85,95.85,0.97,1.04,1.07,0.95


In [8]:
# Given a matchup of two teams, those teams recent x box scores, and those teams x + 1 spread; make ML model. 

# [recent 15 games box scores averaged], [point spread of 16th game]
# [recent 15 games box scores averaged], [point spread of 17th game]
# [recent 15 games box scores averaged], [point spread of 18th game]
# [recent 15 games box scores averaged], [point spread of 19th game]
# [recent 15 games box scores averaged], [point spread of 20th game]
# [recent 15 games box scores averaged], [point spread of 21st game]


[games 1-15 box scores: team1, team 2][point spread of 16th game: team1, team2]

model_df = 



SyntaxError: invalid syntax (<ipython-input-8-d3d5650f9717>, line 11)