In [316]:
import pandas as pd
from nba_api.stats.endpoints import leaguegamelog
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib


In [317]:
seasons = ['2019-20','2020-21', '2021-22', '2022-23', '2023-24', '2024-25']
appended_data = []

for season in seasons:
    print(f"getting {season} ")
    log = leaguegamelog.LeagueGameLog(season = season, season_type_all_star='Regular Season')
    appended_data.append(log.get_data_frames()[0])
    
df = pd.concat(appended_data)
df.head()


getting 2019-20 
getting 2020-21 
getting 2021-22 
getting 2022-23 
getting 2023-24 
getting 2024-25 


Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE
0,22019,1610612740,NOP,New Orleans Pelicans,21900001,2019-10-22,NOP @ TOR,L,265,43,...,37,53,30,4,9,19,34,122,-8,1
1,22019,1610612746,LAC,LA Clippers,21900002,2019-10-22,LAC vs. LAL,W,240,42,...,34,45,24,8,5,14,25,112,10,1
2,22019,1610612761,TOR,Toronto Raptors,21900001,2019-10-22,TOR vs. NOP,W,265,42,...,41,57,23,7,3,17,24,130,8,1
3,22019,1610612747,LAL,Los Angeles Lakers,21900002,2019-10-22,LAL @ LAC,L,240,37,...,32,41,20,4,7,15,24,102,-10,1
4,22019,1610612738,BOS,Boston Celtics,21900008,2019-10-23,BOS @ PHI,L,240,33,...,31,41,18,4,2,11,29,93,-14,1


In [318]:
df["Target"] = df["WL"].map({"L": 0, "W":1})
df["GAME_DATE"] = pd.to_datetime(df["GAME_DATE"])
df = df.sort_values(by=["TEAM_ID", "GAME_DATE"])




In [319]:
features = ['PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF']

for col in features:
    df[f'rolling_{col}'] = df.groupby(["TEAM_ID"])[col].transform(lambda x: x.shift(1).rolling(20, min_periods = 1).mean())

df[['TEAM_ABBREVIATION', 'GAME_DATE', 'PTS', 'rolling_PTS']].head(15)


Unnamed: 0,TEAM_ABBREVIATION,GAME_DATE,PTS,rolling_PTS
26,ATL,2019-10-24,117,
50,ATL,2019-10-26,103,117.0
93,ATL,2019-10-28,103,110.0
102,ATL,2019-10-29,97,107.666667
134,ATL,2019-10-31,97,105.0
192,ATL,2019-11-05,108,103.4
213,ATL,2019-11-06,93,104.166667
230,ATL,2019-11-08,109,102.571429
262,ATL,2019-11-10,113,103.375
297,ATL,2019-11-12,125,104.444444


In [320]:
home_df = df[df["MATCHUP"].str.contains('vs.')].copy()
away_df = df[df["MATCHUP"].str.contains('@')].copy()
game_df = pd.merge(home_df, away_df, on="GAME_ID",suffixes=('_home', '_away'))
game_df

Unnamed: 0,SEASON_ID_home,TEAM_ID_home,TEAM_ABBREVIATION_home,TEAM_NAME_home,GAME_ID,GAME_DATE_home,MATCHUP_home,WL_home,MIN_home,FGM_home,...,rolling_FTA_away,rolling_FT_PCT_away,rolling_OREB_away,rolling_DREB_away,rolling_REB_away,rolling_AST_away,rolling_STL_away,rolling_BLK_away,rolling_TOV_away,rolling_PF_away
0,22019,1610612737,ATL,Atlanta Hawks,0021900028,2019-10-26,ATL vs. ORL,W,240,43,...,19.00,0.579000,7.000000,39.000,46.000000,24.000000,12.000000,4.000000,13.000,18.000000
1,22019,1610612737,ATL,Atlanta Hawks,0021900043,2019-10-28,ATL vs. PHI,L,240,36,...,33.00,0.694500,9.000000,41.000,50.000000,27.000000,10.000000,5.000000,19.500,30.000000
2,22019,1610612737,ATL,Atlanta Hawks,0021900066,2019-10-31,ATL vs. MIA,L,240,36,...,33.75,0.721250,9.750000,39.000,48.750000,23.750000,8.500000,6.500000,22.500,27.000000
3,22019,1610612737,ATL,Atlanta Hawks,0021900099,2019-11-05,ATL vs. SAS,W,240,43,...,28.50,0.790333,11.833333,38.500,50.333333,22.833333,6.333333,5.333333,16.000,20.166667
4,22019,1610612737,ATL,Atlanta Hawks,0021900105,2019-11-06,ATL vs. CHI,L,240,30,...,23.25,0.718625,10.250000,31.625,41.875000,22.875000,8.875000,3.750000,13.625,21.125000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7049,22024,1610612766,CHA,Charlotte Hornets,0022401045,2025-03-25,CHA vs. ORL,L,240,37,...,24.80,0.789600,10.900000,31.050,41.950000,21.650000,8.050000,5.750000,12.950,20.250000
7050,22024,1610612766,CHA,Charlotte Hornets,0022401089,2025-03-31,CHA vs. UTA,W,240,43,...,21.75,0.720550,10.800000,34.550,45.350000,25.600000,6.200000,3.350000,17.300,18.150000
7051,22024,1610612766,CHA,Charlotte Hornets,0022401118,2025-04-04,CHA vs. SAC,L,240,33,...,19.75,0.800000,10.500000,32.000,42.500000,25.750000,7.600000,3.850000,13.400,18.150000
7052,22024,1610612766,CHA,Charlotte Hornets,0022401133,2025-04-06,CHA vs. CHI,L,240,41,...,21.95,0.798850,10.400000,36.150,46.550000,30.050000,7.600000,4.600000,13.800,17.300000


In [321]:
rolling_cols = [col for col in game_df.columns if "rolling" in col]
info_cols = ['GAME_DATE_home', 'GAME_ID', 'TEAM_ABBREVIATION_home', 'TEAM_ABBREVIATION_away']
target_col = ['Target_home']
game_df_clean = game_df[info_cols + rolling_cols + target_col].copy()
game_df_clean.head()

Unnamed: 0,GAME_DATE_home,GAME_ID,TEAM_ABBREVIATION_home,TEAM_ABBREVIATION_away,rolling_PTS_home,rolling_FGM_home,rolling_FGA_home,rolling_FG_PCT_home,rolling_FG3M_home,rolling_FG3A_home,...,rolling_FT_PCT_away,rolling_OREB_away,rolling_DREB_away,rolling_REB_away,rolling_AST_away,rolling_STL_away,rolling_BLK_away,rolling_TOV_away,rolling_PF_away,Target_home
0,2019-10-26,21900028,ATL,ORL,117.0,44.0,86.0,0.512,11.0,31.0,...,0.579,7.0,39.0,46.0,24.0,12.0,4.0,13.0,18.0,1
1,2019-10-28,21900043,ATL,PHI,110.0,43.5,85.0,0.512,10.0,30.5,...,0.6945,9.0,41.0,50.0,27.0,10.0,5.0,19.5,30.0,0
2,2019-10-31,21900066,ATL,MIA,105.0,39.5,84.25,0.46875,10.0,31.75,...,0.72125,9.75,39.0,48.75,23.75,8.5,6.5,22.5,27.0,0
3,2019-11-05,21900099,ATL,SAS,103.4,38.8,85.0,0.4568,9.4,32.2,...,0.790333,11.833333,38.5,50.333333,22.833333,6.333333,5.333333,16.0,20.166667,1
4,2019-11-06,21900105,ATL,CHI,104.166667,39.5,85.833333,0.460333,9.666667,31.833333,...,0.718625,10.25,31.625,41.875,22.875,8.875,3.75,13.625,21.125,0


In [322]:
game_df_clean = game_df_clean.dropna()
game_df_clean = game_df_clean.sort_values(by="GAME_DATE_home")
X = game_df_clean.drop(columns=['GAME_DATE_home', 'GAME_ID', 'TEAM_ABBREVIATION_home', 'TEAM_ABBREVIATION_away', 'Target_home'])
y = game_df_clean['Target_home']
split_index = int(len(X) * 0.8)

X_train = X.iloc[:split_index]
y_train = y.iloc[:split_index]
X_test = X.iloc[split_index:]
y_test = y.iloc[split_index:]

print(f"Training on {len(X_train)} games (The Past)")
print(f"Testing on {len(X_test)} games (The Future)")

X_train


Training on 5630 games (The Past)
Testing on 1408 games (The Future)


Unnamed: 0,rolling_PTS_home,rolling_FGM_home,rolling_FGA_home,rolling_FG_PCT_home,rolling_FG3M_home,rolling_FG3A_home,rolling_FG3_PCT_home,rolling_FTM_home,rolling_FTA_home,rolling_FT_PCT_home,...,rolling_FTA_away,rolling_FT_PCT_away,rolling_OREB_away,rolling_DREB_away,rolling_REB_away,rolling_AST_away,rolling_STL_away,rolling_BLK_away,rolling_TOV_away,rolling_PF_away
5411,95.00,32.00,83.00,0.38600,10.00,27.0,0.37000,21.00,31.0,0.67700,...,18.00,0.83300,11.00,36.00,47.00,26.00,6.00,3.00,17.00,32.00
6119,101.00,38.00,89.00,0.42700,5.00,32.0,0.15600,20.00,30.0,0.66700,...,22.00,0.81800,20.00,29.00,49.00,25.00,11.00,4.00,12.00,20.00
705,122.00,43.00,102.00,0.42200,19.00,45.0,0.42200,17.00,20.0,0.85000,...,33.00,0.81800,5.00,41.00,46.00,17.00,6.00,5.00,20.00,22.00
233,93.00,33.00,90.00,0.36700,7.00,26.0,0.26900,20.00,34.0,0.58800,...,38.00,0.84200,16.00,41.00,57.00,23.00,7.00,3.00,17.00,24.00
1411,108.00,34.00,81.00,0.42000,18.00,32.0,0.56300,22.00,27.0,0.81500,...,18.00,0.83300,9.00,35.00,44.00,31.00,13.00,9.00,14.00,25.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5836,110.20,41.90,90.90,0.46080,11.80,34.7,0.34035,14.60,19.4,0.76225,...,19.70,0.83685,8.90,33.65,42.55,26.85,8.45,7.30,12.15,17.95
4897,105.50,39.40,87.85,0.44845,11.00,31.4,0.34630,15.70,19.7,0.80295,...,21.75,0.84305,8.70,31.95,40.65,24.60,7.45,4.55,13.30,16.65
2544,120.60,45.45,87.90,0.51710,12.85,31.7,0.40415,16.85,21.5,0.77130,...,17.70,0.80075,10.95,30.45,41.40,24.35,7.55,5.45,11.75,19.60
2776,107.90,40.45,86.15,0.47040,11.70,32.7,0.35650,15.30,19.5,0.78800,...,19.20,0.78580,10.00,34.35,44.35,28.15,8.55,5.35,13.15,17.15


In [323]:
model = XGBClassifier(n_estimators=81, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2%}")

Model Accuracy: 63.42%


In [324]:
# Save the model to a file called 'nba_model.pkl'
joblib.dump(model, 'nba_model.pkl') 

print("Model saved successfully!")

Model saved successfully!
