# Installing all needed dataframes

In [None]:
!pip install nba_api

In [None]:
from nba_api.stats.endpoints import BoxScoreAdvancedV2, LeagueDashTeamStats, LeagueGameFinder, LeagueDashPlayerStats, PlayByPlayV2, LeagueLineupViz, TeamPlayerOnOffDetails, TeamPlayerOnOffSummary
import pandas as pd
from tqdm import tqdm
import time
from tenacity import retry, stop_after_attempt, wait_exponential
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)

In [None]:
season = "2023-24"
game_id = "0022300881"
team_id = "1610612741"

In [None]:
def fetch_team_stats(season):
    team_stats = LeagueDashTeamStats(season=season).get_data_frames()[0]
    return team_stats

def fetch_game_stats(game_id):
    game_stats = LeagueGameFinder(season_nullable=season).get_data_frames()[0]
    return game_stats

def fetch_player_stats(season):
    player_stats = LeagueDashPlayerStats(season=season).get_data_frames()[0]
    return player_stats

def fetch_lineup_data(season, minutes_min):
    lineup_data = LeagueLineupViz(season=season, measure_type_detailed_defense="Advanced", minutes_min=minutes_min).get_data_frames()[0]
    return lineup_data

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=30))
def fetch_substitution_events(game_id):
    play_by_play = PlayByPlayV2(game_id=game_id).get_data_frames()[0]
    substitution_events = play_by_play[play_by_play["EVENTMSGTYPE"] == 8]
    return substitution_events

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_team_on_off_details(team_id, season):
    team_on_off_details = TeamPlayerOnOffDetails(team_id=team_id, season=season).get_data_frames()[0]
    return team_on_off_details

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_player_on_details(team_id, season):
    player_on_details = TeamPlayerOnOffSummary(team_id=team_id, season=season).get_data_frames()[1]
    return player_on_details

@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=2, min=5, max=40))
def fetch_player_off_details(team_id, season):
    player_off_details = TeamPlayerOnOffSummary(team_id=team_id, season=season).get_data_frames()[2]
    return player_off_details

In [None]:
team_stats = fetch_team_stats(season=season)
game_stats = fetch_game_stats(game_id=game_id)
player_stats = fetch_player_stats(season=season)
lineup_data = fetch_lineup_data(season=season, minutes_min=5)

In [None]:
all_team_on_off_details = pd.DataFrame()
all_player_on_details = pd.DataFrame()
all_player_off_details = pd.DataFrame()
for team in tqdm(team_stats[:100].iterrows()):
  try:
    team_on_off_details = fetch_team_on_off_details(team_id=team[1]['TEAM_ID'], season=season)
    all_team_on_off_details = pd.concat([all_team_on_off_details, team_on_off_details])

    player_on_details = fetch_player_on_details(team_id=team[1]['TEAM_ID'], season=season)
    all_player_on_details = pd.concat([all_player_on_details, player_on_details])

    player_off_details = fetch_player_off_details(team_id=team[1]['TEAM_ID'], season=season)
    all_player_off_details = pd.concat([all_player_off_details, player_off_details])

  except Exception as e:
    print(f"Skipping team {team[1]['TEAM_ID']} after retries: {e}")

all_team_on_off_details.to_csv("team_on_off_details.csv")
all_player_on_details.to_csv("player_on_details.csv")
all_player_off_details.to_csv("player_off_details.csv")

In [None]:
all_substitution_events = pd.DataFrame()
n = 0
for game in tqdm(game_stats.iterrows()):
  if n == 100:
    break
  n += 1
  try:
    substitution_events = fetch_substitution_events(game_id=game[1]['GAME_ID'])
    all_substitution_events = pd.concat([all_substitution_events, substitution_events])
  except Exception as e:
    print(f"Skipping game {game[1]['GAME_ID']} after retries: {e}")

all_substitution_events.to_csv("substitution_events.csv")

61it [03:51, 28.41s/it]

Skipping game 0042300213 after retries: RetryError[<Future at 0x79b50d546510 state=finished raised ReadTimeout>]


62it [05:26, 48.15s/it]

Skipping game 0042300233 after retries: RetryError[<Future at 0x79b50d735490 state=finished raised ReadTimeout>]


63it [07:00, 61.99s/it]

Skipping game 0042300222 after retries: RetryError[<Future at 0x79b50d628d50 state=finished raised ReadTimeout>]


94it [10:46, 15.97s/it]

Skipping game 0042300115 after retries: RetryError[<Future at 0x79b50d65b3d0 state=finished raised ReadTimeout>]


95it [12:20, 30.60s/it]

In [None]:
all_player_on_details[all_player_on_details['TEAM_ID'] == 1610612744]

In [None]:
game = game_stats.loc[game_stats['TEAM_ID'] == 1610612744].reset_index(drop=True)
game.loc[0]

In [None]:
all_substitution_events.PLAYER1_TEAM_ID/10

In [None]:

def plot_on_off_rating_difference(players_on_details, player_off_details, team_id, rating='NET_RATING'):
  """
  Plots the difference in on/off ratings for each player in a given team.

  Args:
      players_on_details: DataFrame of player on-court details.
      player_off_details: DataFrame of player off-court details.
      team_id: ID of the team to plot.
      rating: Rating to use
      (default is 'NET_RATING')
  """

  players_on_details = players_on_details.loc[players_on_details['TEAM_ID'] == team_id]
  player_off_details = player_off_details.loc[player_off_details['TEAM_ID'] == team_id]

  # Merge the dataframes
  player_on_off_details = pd.merge(players_on_details, player_off_details, on='VS_PLAYER_ID', suffixes=('_on', '_off'))

  # Calculate the difference in rating
  player_on_off_details['RATING_DIFF'] = player_on_off_details[f'{rating}_on'] - player_on_off_details[f'{rating}_off']

  # Create the plot
  plt.figure(figsize=(12, 8))
  sns.barplot(x='VS_PLAYER_NAME_on', y='RATING_DIFF', data=player_on_off_details)
  plt.xticks(rotation=90)
  plt.xlabel('Player Name')
  plt.ylabel('On/Off Rating Difference')
  plt.title(f'Difference in On/Off {rating.upper()} for Each Player in ')
  plt.tight_layout()
  plt.show()

In [None]:
plot_on_off_rating_difference(player_on_details, player_off_details, team_id=team_id, rating='NET_RATING')

In [None]:
def plot_team_rating_over_substitutions(substitution_events, player_on_details, player_off_details, team_on_off_details, game_id):
    """
    Plots the team's rating over time, updating it at each substitution.

    Args:
        substitution_events: DataFrame of substitution events.
        player_on_details: DataFrame of player on-court details.
        player_off_details: DataFrame of player off-court details.
    """

    # Placeholder for team rating (replace with your actual rating calculation)

    substitution_events = substitution_events.loc[substitution_events['GAME_ID'] == game_id]

    team_ids = substitution_events['PLAYER1_TEAM_ID'].unique()
    team_names = substitution_events['PLAYER1_TEAM_NAME'].unique()
    substitution_times_list = []
    team_ratings_list = []

    for i, team_id in enumerate(team_ids):
        team_on_off_details = team_on_off_details.loc[team_on_off_details['TEAM_ID'] == team_id]
        player_on_details = player_on_details.loc[player_on_details['TEAM_ID'] == team_id]
        player_off_details = player_off_details.loc[player_off_details['TEAM_ID'] == team_id]

        team_rating = team_on_off_details['NET_RATING'].values[0]
        team_ratings_over_time = []
        substitution_times = []

        for index, row in substitution_events.iterrows():
            substitution_times.append(row['PCTIMESTRING'])

            incoming_player = row['PLAYER1_ID']
            outgoing_player = row['PLAYER2_ID']

            incoming_player_rating = player_on_details[player_on_details['VS_PLAYER_ID'] == incoming_player]['NET_RATING'].values[0]
            outgoing_player_rating = player_off_details[player_off_details['VS_PLAYER_ID'] == outgoing_player]['NET_RATING'].values[0]

            team_rating = team_rating + incoming_player_rating - outgoing_player_rating

            team_ratings_over_time.append(team_rating)
        substitution_times_list.append(substitution_times)
        team_ratings_list.append(team_ratings_over_time)

    fig, (ax1, ax2) = plt.subplots(2)

    # Create the plot
    fig.suptitle('Team Rating Over Time (Substitutions)')
    ax1.plot(substitution_times_list[0], team_ratings_list[0], marker='o')
    ax1.set_xlabel('Time')
    ax1.set_ylabel(f'{team_names[0]} Rating')
    ax1.set_title(f'{team_names[0]} Rating Over Time (Substitutions)')

    ax2.plot(substitution_times_list[1], team_ratings_list[1], marker='o')
    ax2.set_xlabel('Time')
    ax2.set_ylabel(f'{team_names[1]} Rating')
    ax2.set_title(f'{team_names[1]} Rating Over Time (Substitutions)')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()