In [12]:
import pandas as pd
import numpy as np
pd.set_option('display.width', 1000)  # adjust the value as needed
games_df = pd.read_csv('./data/games.csv', low_memory=False)
games_details_df = pd.read_csv('./data/games_details.csv', low_memory=False)
# players_df = pd.read_csv('./data/players.csv')
# ranking_df = pd.read_csv('./data/ranking.csv')
# teams_df = pd.read_csv('./data/teams.csv')

In [13]:
print(games_details_df.head())
print(games_df.head())

    GAME_ID     TEAM_ID TEAM_ABBREVIATION    TEAM_CITY  PLAYER_ID     PLAYER_NAME NICKNAME START_POSITION COMMENT    MIN  ...  OREB  DREB  REB  AST  STL  BLK   TO   PF   PTS  PLUS_MINUS
0  22200477  1610612759               SAS  San Antonio    1629641  Romeo Langford    Romeo              F     NaN  18:06  ...   1.0   1.0  2.0  0.0  1.0  0.0  2.0  5.0   2.0        -2.0
1  22200477  1610612759               SAS  San Antonio    1631110   Jeremy Sochan   Jeremy              F     NaN  31:01  ...   6.0   3.0  9.0  6.0  1.0  0.0  2.0  1.0  23.0       -14.0
2  22200477  1610612759               SAS  San Antonio    1627751    Jakob Poeltl    Jakob              C     NaN  21:42  ...   1.0   3.0  4.0  1.0  1.0  0.0  2.0  4.0  13.0        -4.0
3  22200477  1610612759               SAS  San Antonio    1630170   Devin Vassell    Devin              G     NaN  30:20  ...   0.0   9.0  9.0  5.0  3.0  0.0  2.0  1.0  10.0       -18.0
4  22200477  1610612759               SAS  San Antonio    1630200     

In [21]:
# Create margin of victory column
games_df["MOV"] = games_df["PTS_home"] - games_df["PTS_away"]
close_games = games_df["MOV"].abs() <=5
print('Number of close games:', close_games.sum())



Number of close games: 14816


Below is a rough outline of the final recommender system that I will be building in this notebook. It takes in the schedule for upcoming games, and outputs the recommended games that will be the closest and most exciting to watch based on the historical data from NBA games.

In [6]:
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import f1_score

# Load the trained model
model = NearestNeighbors(n_neighbors=5, metric='cosine')
model.load('nba_game_recommender.pkl')

# Load the schedule for the upcoming night's games
schedule = pd.read_csv('nba_schedule.csv')

# Extract relevant features for each game
features = []
for index, row in schedule.iterrows():
    game_id = row['Game_ID']
    home_team = row['Home_Team']
    away_team = row['Away_Team']
    # Extract features from historical data
    feature_vector = get_features(game_id, home_team, away_team)
    features.append(feature_vector)

# Use the model to predict the most exciting game(s) to watch
distances, indices = model.kneighbors(features)
recommended_games = []
for i, dist in enumerate(distances):
    if dist < 0.5:  # Arbitrarily set a threshold

    GAME_ID     TEAM_ID TEAM_ABBREVIATION    TEAM_CITY  PLAYER_ID  \
0  22200477  1610612759               SAS  San Antonio    1629641   
1  22200477  1610612759               SAS  San Antonio    1631110   
2  22200477  1610612759               SAS  San Antonio    1627751   
3  22200477  1610612759               SAS  San Antonio    1630170   
4  22200477  1610612759               SAS  San Antonio    1630200   

      PLAYER_NAME NICKNAME START_POSITION COMMENT    MIN  ...  OREB  DREB  \
0  Romeo Langford    Romeo              F     NaN  18:06  ...   1.0   1.0   
1   Jeremy Sochan   Jeremy              F     NaN  31:01  ...   6.0   3.0   
2    Jakob Poeltl    Jakob              C     NaN  21:42  ...   1.0   3.0   
3   Devin Vassell    Devin              G     NaN  30:20  ...   0.0   9.0   
4       Tre Jones      Tre              G     NaN  27:44  ...   0.0   2.0   

   REB  AST  STL  BLK   TO   PF   PTS  PLUS_MINUS  
0  2.0  0.0  1.0  0.0  2.0  5.0   2.0        -2.0  
1  9.0  6.0  1.0  