In [93]:
import pandas as pd
import numpy as np
import json
import time
import requests

from tabulate import tabulate
from datetime import datetime, timezone
from dateutil import parser

from nba_api.stats.static import teams
from nba_api.stats.endpoints import LeagueGameLog
from nba_api.live.nba.endpoints import scoreboard

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
import pickle

In [94]:
# Get a list of all NBA teams
all_teams = teams.get_teams()

# Function to print list of teams
def print_teams(team_list):
    print(tabulate(team_list, headers='keys', tablefmt='grid'))

print_teams(all_teams)

+------------+------------------------+----------------+---------------+---------------+----------------------+----------------+
|         id | full_name              | abbreviation   | nickname      | city          | state                |   year_founded |
| 1610612737 | Atlanta Hawks          | ATL            | Hawks         | Atlanta       | Georgia              |           1949 |
+------------+------------------------+----------------+---------------+---------------+----------------------+----------------+
| 1610612738 | Boston Celtics         | BOS            | Celtics       | Boston        | Massachusetts        |           1946 |
+------------+------------------------+----------------+---------------+---------------+----------------------+----------------+
| 1610612739 | Cleveland Cavaliers    | CLE            | Cavaliers     | Cleveland     | Ohio                 |           1970 |
+------------+------------------------+----------------+---------------+---------------+---------

In [95]:
# Query NBA scoreboard and list games in local time zone
def get_games():
    board = scoreboard.ScoreBoard()
    print("ScoreBoardDate: " + board.score_board_date)
    games = board.games.get_dict()
    for game in games:
        gameTimeLTZ = parser.parse(game["gameTimeUTC"]).replace(tzinfo=timezone.utc).astimezone(tz=None)

        print(f"{game['gameId']}: {game['awayTeam']['teamName']} (ID: {game['awayTeam']['teamId']}) vs. {game['homeTeam']['teamName']} (ID: {game['homeTeam']['teamId']}) @ {gameTimeLTZ}")

get_games()

ScoreBoardDate: 2024-03-11
0022300933: Hornets (ID: 1610612766) vs. Pistons (ID: 1610612765) @ 2024-03-12 02:00:00+03:00
0022300934: Suns (ID: 1610612756) vs. Cavaliers (ID: 1610612739) @ 2024-03-12 02:30:00+03:00
0022300935: Mavericks (ID: 1610612742) vs. Bulls (ID: 1610612741) @ 2024-03-12 03:00:00+03:00
0022300936: Warriors (ID: 1610612744) vs. Spurs (ID: 1610612759) @ 2024-03-12 03:00:00+03:00
0022300937: Raptors (ID: 1610612761) vs. Nuggets (ID: 1610612743) @ 2024-03-12 04:00:00+03:00
0022300938: Celtics (ID: 1610612738) vs. Trail Blazers (ID: 1610612757) @ 2024-03-12 05:30:00+03:00


In [96]:
# Specify the season you're interested in
season = '2023-24'

# Request the game logs for the specified season
game_log = LeagueGameLog(season=season)

# Get the data
game_log_data = game_log.get_data_frames()[0]

# Drop irrelevant columns
columns_to_drop = ['SEASON_ID', 'GAME_ID', 'GAME_DATE', 'VIDEO_AVAILABLE', 'MIN', 'MATCHUP', 'TEAM_NAME', 'TEAM_ABBREVIATION']
game_log_data.drop(columns=columns_to_drop, inplace=True)

# Print game log data
print(tabulate(game_log_data.tail(10), headers='keys', tablefmt='grid'))

+------+------------+------+-------+-------+----------+--------+--------+-----------+-------+-------+----------+--------+--------+-------+-------+-------+-------+-------+------+-------+--------------+
|      |    TEAM_ID | WL   |   FGM |   FGA |   FG_PCT |   FG3M |   FG3A |   FG3_PCT |   FTM |   FTA |   FT_PCT |   OREB |   DREB |   REB |   AST |   STL |   BLK |   TOV |   PF |   PTS |   PLUS_MINUS |
| 1912 | 1610612764 | W    |    39 |    81 |    0.481 |     16 |     39 |     0.41  |    16 |    22 |    0.727 |      6 |     34 |    40 |    35 |     5 |     5 |    15 |   18 |   110 |            2 |
+------+------------+------+-------+-------+----------+--------+--------+-----------+-------+-------+----------+--------+--------+-------+-------+-------+-------+-------+------+-------+--------------+
| 1913 | 1610612748 | L    |    43 |    95 |    0.453 |     10 |     33 |     0.303 |    12 |    18 |    0.667 |     16 |     35 |    51 |    20 |     9 |     6 |    13 |   18 |   108 |           

In [97]:
label_encoder = LabelEncoder()

# Encode the 'WL' column
game_log_data['WL_encoded'] = label_encoder.fit_transform(game_log_data['WL'])

# Drop the original 'WL' column
game_log_data.drop(columns=['WL'], inplace=True)

# Check for missing values
missing_values = game_log_data.isnull().sum()
print("Missing Values:")
print(missing_values)



Missing Values:
TEAM_ID       0
FGM           0
FGA           0
FG_PCT        0
FG3M          0
FG3A          0
FG3_PCT       0
FTM           0
FTA           0
FT_PCT        0
OREB          0
DREB          0
REB           0
AST           0
STL           0
BLK           0
TOV           0
PF            0
PTS           0
PLUS_MINUS    0
WL_encoded    0
dtype: int64


In [98]:
# Separate features (X) and target variable (y)
X = game_log_data.drop(columns=['PTS'])
y = game_log_data['PTS']

# Display the first few rows of features and target variable
print("Features (X):")
print(X.head())
print("\nTarget Variable (y):")
print(y.head())

Features (X):
      TEAM_ID  FGM  FGA  FG_PCT  FG3M  FG3A  FG3_PCT  FTM  FTA  FT_PCT  OREB  \
0  1610612747   41   90   0.456    10    29    0.345   15   20   0.750    13   
1  1610612743   48   91   0.527    14    34    0.412    9   12   0.750     9   
2  1610612756   42   95   0.442    11    33    0.333   13   17   0.765    17   
3  1610612744   36  101   0.356    10    43    0.233   22   28   0.786    18   
4  1610612741   39   94   0.415    12    42    0.286   14   18   0.778    13   

   DREB  REB  AST  STL  BLK  TOV  PF  PLUS_MINUS  WL_encoded  
0    31   44   23    5    4   12  18         -12           0  
1    33   42   29    9    6   12  15          12           1  
2    43   60   23    5    7   19  22           4           1  
3    31   49   19   11    6   11  23          -4           0  
4    29   42   26   10    3   13  16         -20           0  

Target Variable (y):
0    107
1    119
2    108
3    104
4    104
Name: PTS, dtype: int64


In [99]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the XGBoost regressor
xgb_regressor = XGBRegressor()

# Train the model on the training data
xgb_regressor.fit(X_train, y_train)

# Predict the target variable on the test data
y_pred = xgb_regressor.predict(X_test)



In [100]:
# Calculate mean squared error
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate root mean squared error (RMSE)
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)

# Calculate mean absolute error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error:", mae)

# Calculate R-squared (R2) score
r2 = r2_score(y_test, y_pred)
print("R-squared (R2) Score:", r2)

Mean Squared Error: 2.5624124823449566
Root Mean Squared Error: 1.600753723201966
Mean Absolute Error: 1.1053284087738433
R-squared (R2) Score: 0.9855628013839608


In [101]:
# Save the trained model to a file
with open('xgboost_regression_model.pkl', 'wb') as f:
    pickle.dump(xgb_regressor, f)

# Load the trained model from the file
with open('xgboost_regression_model.pkl', 'rb') as f:
    xgb_regressor = pickle.load(f)

In [102]:
# Prepare input data for prediction
fixtures_data = []
board = scoreboard.ScoreBoard()
games = board.games.get_dict()
for game in games:
    home_team_id = game['homeTeam']['teamId']
    away_team_id = game['awayTeam']['teamId']

    home_team_stats = game_log_data[game_log_data['TEAM_ID'] == home_team_id]
    away_team_stats = game_log_data[game_log_data['TEAM_ID'] == away_team_id]

    fixture_data_home = {
        'TEAM_ID': home_team_id,
        'REB': home_team_stats['REB'].values[0],  # Use .values[0] to get the scalar value
        'FT_PCT': home_team_stats['FT_PCT'].values[0],
        'OREB': home_team_stats['OREB'].values[0],
        'TOV': home_team_stats['TOV'].values[0],
        'FTA': home_team_stats['FTA'].values[0],
        'BLK': home_team_stats['BLK'].values[0],
        'FG3A': home_team_stats['FG3A'].values[0],
        'FG_PCT': home_team_stats['FG_PCT'].values[0],
        'FTM': home_team_stats['FTM'].values[0],
        'PLUS_MINUS': home_team_stats['PLUS_MINUS'].values[0],
        'FGM': home_team_stats['FGM'].values[0],
        'FG3M': home_team_stats['FG3M'].values[0],
        'STL': home_team_stats['STL'].values[0],
        'FGA': home_team_stats['FGA'].values[0],
        'DREB': home_team_stats['DREB'].values[0],
        'PF': home_team_stats['PF'].values[0],
        'AST': home_team_stats['AST'].values[0],
        'FG3_PCT': home_team_stats['FG3_PCT'].values[0],
        'WL_encoded': home_team_stats['WL_encoded'].values[0]
    }
    fixtures_data.append(fixture_data_home)

    fixture_data_away = {
        'TEAM_ID': away_team_id,
        'REB': away_team_stats['REB'].values[0],
        'FT_PCT': away_team_stats['FT_PCT'].values[0],
        'OREB': away_team_stats['OREB'].values[0],
        'TOV': away_team_stats['TOV'].values[0],
        'FTA': away_team_stats['FTA'].values[0],
        'BLK': away_team_stats['BLK'].values[0],
        'FG3A': away_team_stats['FG3A'].values[0],
        'FG_PCT': away_team_stats['FG_PCT'].values[0],
        'FTM': away_team_stats['FTM'].values[0],
        'PLUS_MINUS': away_team_stats['PLUS_MINUS'].values[0],
        'FGM': away_team_stats['FGM'].values[0],
        'FG3M': away_team_stats['FG3M'].values[0],
        'STL': away_team_stats['STL'].values[0],
        'FGA': away_team_stats['FGA'].values[0],
        'DREB': away_team_stats['DREB'].values[0],
        'PF': away_team_stats['PF'].values[0],
        'AST': away_team_stats['AST'].values[0],
        'FG3_PCT': away_team_stats['FG3_PCT'].values[0],
        'WL_encoded': away_team_stats['WL_encoded'].values[0]
    }
    fixtures_data.append(fixture_data_away)

print(tabulate(fixtures_data, headers='keys', tablefmt='grid'))


+------------+-------+----------+--------+-------+-------+-------+--------+----------+-------+--------------+-------+--------+-------+-------+--------+------+-------+-----------+--------------+
|    TEAM_ID |   REB |   FT_PCT |   OREB |   TOV |   FTA |   BLK |   FG3A |   FG_PCT |   FTM |   PLUS_MINUS |   FGM |   FG3M |   STL |   FGA |   DREB |   PF |   AST |   FG3_PCT |   WL_encoded |
| 1610612765 |    56 |    0.6   |     17 |    17 |    15 |    13 |     32 |    0.446 |     9 |           -1 |    41 |     11 |     3 |    92 |     39 |   23 |    28 |     0.344 |            0 |
+------------+-------+----------+--------+-------+-------+-------+--------+----------+-------+--------------+-------+--------+-------+-------+--------+------+-------+-----------+--------------+
| 1610612766 |    51 |    0.731 |     12 |    19 |    26 |     3 |     37 |    0.5   |    19 |            6 |    43 |     11 |     5 |    86 |     39 |   21 |    34 |     0.297 |            1 |
+------------+-------+--------

In [103]:
# Predict points for each fixture
predicted_points = []
for fixture_data in fixtures_data:
    # Prepare input data for prediction (excluding 'TEAM_ID')
    X_fixture = pd.DataFrame(fixture_data, index=[0])  # Convert to DataFrame for prediction
    
    # Reorder features in X_fixture to match the expected order
    X_fixture = X_fixture[['TEAM_ID', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS', 'WL_encoded']]
    
    # Predict points using the loaded model
    predicted_point = xgb_regressor.predict(X_fixture)[0]
    predicted_points.append(predicted_point)

# Print predicted points for each fixture
print("Predicted Points for Today's Fixtures:")
for idx, fixture_data in enumerate(fixtures_data):
    print(f"Team ID: {fixture_data['TEAM_ID']}, Predicted Points: {predicted_points[idx]}")


Predicted Points for Today's Fixtures:
Team ID: 1610612765, Predicted Points: 101.98263549804688
Team ID: 1610612766, Predicted Points: 116.13172149658203
Team ID: 1610612739, Predicted Points: 113.89226531982422
Team ID: 1610612756, Predicted Points: 108.07414245605469
Team ID: 1610612741, Predicted Points: 103.80243682861328
Team ID: 1610612742, Predicted Points: 126.09562683105469
Team ID: 1610612759, Predicted Points: 119.00860595703125
Team ID: 1610612744, Predicted Points: 104.03842163085938
Team ID: 1610612743, Predicted Points: 119.40325927734375
Team ID: 1610612761, Predicted Points: 97.12644958496094
Team ID: 1610612757, Predicted Points: 110.88034057617188
Team ID: 1610612738, Predicted Points: 107.87094116210938


In [104]:
for idx, game in enumerate(games):
    home_team_name = game['homeTeam']['teamName']
    home_team_id = game['homeTeam']['teamId']
    away_team_name = game['awayTeam']['teamName']
    away_team_id = game['awayTeam']['teamId']

    # Find predicted points for the current fixture
    predicted_score_home = predicted_points[idx * 2]
    predicted_score_away = predicted_points[idx * 2 + 1]

    print(f"{home_team_name} (ID: {home_team_id}) vs. {away_team_name} (ID: {away_team_id}): Predicted Score - {predicted_score_home} : {predicted_score_away}")


Pistons (ID: 1610612765) vs. Hornets (ID: 1610612766): Predicted Score - 101.98263549804688 : 116.13172149658203
Cavaliers (ID: 1610612739) vs. Suns (ID: 1610612756): Predicted Score - 113.89226531982422 : 108.07414245605469
Bulls (ID: 1610612741) vs. Mavericks (ID: 1610612742): Predicted Score - 103.80243682861328 : 126.09562683105469
Spurs (ID: 1610612759) vs. Warriors (ID: 1610612744): Predicted Score - 119.00860595703125 : 104.03842163085938
Nuggets (ID: 1610612743) vs. Raptors (ID: 1610612761): Predicted Score - 119.40325927734375 : 97.12644958496094
Trail Blazers (ID: 1610612757) vs. Celtics (ID: 1610612738): Predicted Score - 110.88034057617188 : 107.87094116210938


In [105]:
# Prepare data for tabulate
table_data = []
for idx, game in enumerate(games):
    home_team_name = game['homeTeam']['teamName']
    home_team_id = game['homeTeam']['teamId']
    away_team_name = game['awayTeam']['teamName']
    away_team_id = game['awayTeam']['teamId']

    # Find predicted points for the current fixture
    predicted_score_home = predicted_points[idx * 2]
    predicted_score_away = predicted_points[idx * 2 + 1]

    table_data.append([f"{home_team_name} (ID: {home_team_id})", f"{away_team_name} (ID: {away_team_id})", predicted_score_home, predicted_score_away])

# Print table
print(tabulate(table_data, headers=["Home Team", "Away Team", "Predicted Score Home", "Predicted Score Away"], tablefmt="grid"))


+--------------------------------+----------------------------+------------------------+------------------------+
| Home Team                      | Away Team                  |   Predicted Score Home |   Predicted Score Away |
| Pistons (ID: 1610612765)       | Hornets (ID: 1610612766)   |                101.983 |               116.132  |
+--------------------------------+----------------------------+------------------------+------------------------+
| Cavaliers (ID: 1610612739)     | Suns (ID: 1610612756)      |                113.892 |               108.074  |
+--------------------------------+----------------------------+------------------------+------------------------+
| Bulls (ID: 1610612741)         | Mavericks (ID: 1610612742) |                103.802 |               126.096  |
+--------------------------------+----------------------------+------------------------+------------------------+
| Spurs (ID: 1610612759)         | Warriors (ID: 1610612744)  |                119.009 |