In [1]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import teamgamelog
import pandas as pd

In [2]:
team_dict = teams.get_teams()
season = "2024-25" # Current NBA season
nba_stats = pd.DataFrame()

In [3]:
# Getting NBA stats needed
for team in team_dict:
    team_name = team['full_name']
    team_id = team['id']
    team_log = teamgamelog.TeamGameLog(team_id=team_id, season = season)
    team_stats = team_log.get_data_frames()[0]
    team_stats['Team'] = team_name

    nba_stats = pd.concat([nba_stats, team_stats], ignore_index=True)

In [4]:
# Editing the data
duplicate_nba_stats = nba_stats.copy()
overall_nba_stats = pd.merge(nba_stats, duplicate_nba_stats, on='Game_ID', suffixes=('','_Opponent'))
overall_nba_stats = overall_nba_stats[overall_nba_stats['Team_ID'] != overall_nba_stats['Team_ID_Opponent']]

In [5]:
overall_nba_stats = overall_nba_stats.drop(['MIN','FGM','FGA','FG3M','FG3A','FTM','FTA','DREB','GAME_DATE_Opponent','MATCHUP_Opponent','WL_Opponent','W_Opponent','L_Opponent','MIN_Opponent','FGM_Opponent','FGA_Opponent','FG3M_Opponent','FG3A_Opponent','FTM_Opponent','FTA_Opponent','DREB_Opponent'],axis=1)
overall_nba_stats = overall_nba_stats.dropna()

In [6]:
overall_nba_stats['W'] = overall_nba_stats['W'] + overall_nba_stats['L']
overall_nba_stats.rename(columns={'W': 'Games_Played'}, inplace=True)

In [7]:
#Get Number of Wins in last 10 Games
last_10_game = []
for i in range(len(overall_nba_stats)):
    team_name = overall_nba_stats.iloc[i]['Team']
    win = 0
    total = 0
    for j in range(0,9):
        if (i+j >= len(overall_nba_stats)):
            break
        if (overall_nba_stats.iloc[i+j]['Team'] == team_name):
            total = total+1
            if (overall_nba_stats.iloc[i+j]['WL'] == 'W'):
                win = win+1
    last_10_game_winPct = win/total if total>0 else 0
    last_10_game.append(last_10_game_winPct)
overall_nba_stats['Last_10_Game_W_PCT'] = last_10_game
    

In [8]:
# Head-To-Head Record
h2h_record=[]
for i in range(len(overall_nba_stats)):
    opponent_name = overall_nba_stats.iloc[i]['Team_Opponent']
    win = 0
    total = 0 
    games_played = int(overall_nba_stats.iloc[i]['Games_Played'])
    for j in range(games_played):
        if (overall_nba_stats.iloc[i+j]['Team_Opponent']==opponent_name):
            total = total+1
            if (overall_nba_stats.iloc[i+j]['WL']=='W'):
                win=win+1
    h2h_winPct = win/total
    h2h_record.append(h2h_winPct)
overall_nba_stats['Head_To_Head_W_PCT'] = h2h_record

In [9]:
# Back-To-Back Games
overall_nba_stats['GAME_DATE'] = pd.to_datetime(overall_nba_stats['GAME_DATE'], format = '%b %d, %Y')
back_to_back = []
for i in range(len(overall_nba_stats)):
    team_name = overall_nba_stats.iloc[i]['Team']
    if (i+1>=len(overall_nba_stats)):
        back_to_back.append(0)
        break
    day_difference = (overall_nba_stats.iloc[i]['GAME_DATE'] - overall_nba_stats.iloc[i+1]['GAME_DATE']).days
    if (day_difference == 1 and overall_nba_stats.iloc[i+1]['Team'] == team_name):
        back_to_back.append(1)
        continue
    back_to_back.append(0)
overall_nba_stats['Back-To-Back'] = back_to_back                                             

In [10]:
# Home/Away
overall_nba_stats['Home-Away'] = overall_nba_stats['MATCHUP'].apply(lambda x: '1' if 'vs.' in x else '0')

In [11]:
from nba_api.stats.endpoints import commonteamroster
from nba_api.stats.endpoints import playergamelog

# Top 3 Player playing
nba_plus_minus = []
for team in team_dict:
    team_name = team['full_name']
    team_id = team['id']
    team_roster = commonteamroster.CommonTeamRoster(team_id=team_id)
    roster = team_roster.get_data_frames()[0]
    team_plus_minus = []
    for i in range(len(roster)):
        player_id = roster.iloc[i]['PLAYER_ID']
        player_name = roster.iloc[i]['PLAYER']
        player_gamelog = playergamelog.PlayerGameLog(player_id=player_id, season='2024-25')
        gamelog_df = player_gamelog.get_data_frames()[0]
        gamelog_df['PLUS_MINUS'] = gamelog_df['PLUS_MINUS'].astype(float)
        player_plus_minus = gamelog_df['PLUS_MINUS'].sum()
        team_plus_minus.append([player_name,player_id,player_plus_minus])
    nba_plus_minus.append([team_name,team_id,team_plus_minus])

In [12]:
top_players = []
for team in nba_plus_minus:
    team_name = team[0]
    players = team[2]
    team_id = team[1]
    sort_player = sorted(players, key=lambda x:x[2], reverse = True)
    top_3_plus_minus = sort_player[:3]
    top_players.append([team_name, team_id, top_3_plus_minus])

In [13]:
from nba_api.stats.endpoints import BoxScoreTraditionalV2

def is_player_playing(box_score,game_id, player_id):
    player_stats = box_score.player_stats.get_dict()['data']
    for player in player_stats:
        if player[4] == player_id:
            return True
    return False

In [14]:
top_3_playing = [[],[],[]]
top_3_players = [[],[],[]]
top_3_opponent_playing = [[],[],[]]
top_3_opponent_players = [[],[],[]]
for i in range(len(overall_nba_stats)):
    game_id = overall_nba_stats.iloc[i]['Game_ID']
    team_name = overall_nba_stats.iloc[i]['Team']
    opponent_team_name = overall_nba_stats.iloc[i]['Team_Opponent']
    box_score = BoxScoreTraditionalV2(game_id=game_id, timeout=2000)
    team_details = next((team for team in top_players if team[0] == team_name), None)
    team_opponent_details = next((team for team in top_players if team[0] == opponent_team_name), None)
    for j in range(3):
        player_team = team_details[2][j]
        player_id_team = player_team[1]
        player_opponent = team_opponent_details[2][j]
        player_id_opponent = player_opponent[1]
        is_playing_team = is_player_playing(box_score,game_id, player_id_team)
        is_playing_opponent = is_player_playing(box_score,game_id,player_id_opponent)
        top_3_players[j].append(player_team[0])
        top_3_opponent_players[j].append(player_opponent[0])
        if (is_playing_team):
            top_3_playing[j].append(1)
        else:
            top_3_playing[j].append(0)
        if (is_playing_opponent):
            top_3_opponent_playing[j].append(1)
        else:
            top_3_opponent_playing[j].append(0)
        
overall_nba_stats['No.1 Plus Minus Player'] = top_3_players[0]
overall_nba_stats['No.2 Plus Minus Player'] = top_3_players[1]
overall_nba_stats['No.3 Plus Minus Player'] = top_3_players[2]
overall_nba_stats['No.1 Plus Minus Playing'] = top_3_playing[0]
overall_nba_stats['No.2 Plus Minus Playing'] = top_3_playing[1]
overall_nba_stats['No.3 Plus Minus Playing'] = top_3_playing[2]
overall_nba_stats['No.1 Plus Minus Opponent Player'] = top_3_opponent_players[0]
overall_nba_stats['No.2 Plus Minus Opponent Player'] = top_3_opponent_players[1]
overall_nba_stats['No.3 Plus Minus Opponent Player'] = top_3_opponent_players[2]
overall_nba_stats['No.1 Plus Minus Opponent Playing'] = top_3_opponent_playing[0]
overall_nba_stats['No.2 Plus Minus Opponent Playing'] = top_3_opponent_playing[1]
overall_nba_stats['No.3 Plus Minus Opponent Playing'] = top_3_opponent_playing[2]

In [15]:
# Changing data to numerical
overall_nba_stats['WL'] = overall_nba_stats['WL'].apply(lambda x: 1 if x=='W' else 0)

In [16]:
# Set X and y
X = overall_nba_stats.drop(['Game_ID','WL','GAME_DATE','MATCHUP','Games_Played','L','Team','Team_Opponent','No.1 Plus Minus Player','No.2 Plus Minus Player','No.3 Plus Minus Player','No.1 Plus Minus Opponent Player','No.2 Plus Minus Opponent Player','No.3 Plus Minus Opponent Player'], axis=1)
y = overall_nba_stats['WL']

In [17]:
from sklearn.preprocessing import MinMaxScaler

# Scale data
scaler = MinMaxScaler()
columns_scale = ['Team_ID','OREB','REB','AST','STL','BLK','TOV','PF','PTS','Team_ID_Opponent','OREB_Opponent','REB_Opponent','AST_Opponent','STL_Opponent','BLK_Opponent','TOV_Opponent','PF_Opponent','PTS_Opponent']
X[columns_scale] = scaler.fit_transform(X[columns_scale])

In [18]:
import joblib
joblib.dump(scaler,"scaler.pkl")

['scaler.pkl']

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [94]:
class Model(nn.Module):
    # Input Layer (36 features)
    # Hidden Layer 1 (16 neurons)
    # Hidden Layer 2 (16 neurons)
    # Hidden Layer 3 (16 neurons)
    # Output Layer (2 - Win or Loss)
    def __init__(self, in_features=36, h1=16, h2=16, h3=16, out_features=2):
        super().__init__()
        self.fc1 = nn.Linear(in_features,h1)
        self.fc2 = nn.Linear(h1,h2)
        self.fc3 = nn.Linear(h2,h3)
        self.out = nn.Linear(h3,out_features)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.out(x)
        return x

In [95]:
# Create Model
model = Model()

In [27]:
# Train Test Split
from sklearn.model_selection import train_test_split
X = X.values
y = y.values
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

AttributeError: 'numpy.ndarray' object has no attribute 'values'

In [23]:
import numpy as np
# Convert Tensors
X_train = np.array(X_train, dtype=np.float32)
X_test = np.array(X_test, dtype=np.float32)  
y_train = np.array(y_train, dtype=np.float32)  
y_test = np.array(y_test, dtype=np.float32) 

X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

In [96]:
# Set Criterion and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [97]:
# Train Model
epochs = 100
losses = []
for i in range(epochs):
    y_pred = model.forward(X_train)
    loss = criterion(y_pred, y_train)
    losses.append(loss.detach().numpy())
    if i % 10 == 0:
        print(f'Epoch: {i} and loss: {loss}')

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Epoch: 0 and loss: 0.6874901056289673
Epoch: 10 and loss: 0.6783396601676941
Epoch: 20 and loss: 0.6646040678024292
Epoch: 30 and loss: 0.6453679800033569
Epoch: 40 and loss: 0.615996778011322
Epoch: 50 and loss: 0.5709676146507263
Epoch: 60 and loss: 0.5083140134811401
Epoch: 70 and loss: 0.4278066158294678
Epoch: 80 and loss: 0.33742424845695496
Epoch: 90 and loss: 0.25011831521987915


In [98]:
# Save Model
torch.save(model.state_dict(), 'nba_prediction_model.pt')

In [75]:
# To load Model
# new_model = Model()
# new_model.load_state_dict(torch.load('nba_prediction_model.pt'))