# Prequisite

In [1]:
!pip install --no-cache-dir --force-reinstall numpy==1.26.4 pandas==2.2.2

Collecting numpy==1.26.4
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m62.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas==2.2.2
  Downloading pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting python-dateutil>=2.8.2 (from pandas==2.2.2)
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting pytz>=2020.1 (from pandas==2.2.2)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas==2.2.2)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas==2.2.2)
  Downloading six-1.17.0-py2.py3-none-any.whl.metadata (1.7 kB)
Downloading num

In [1]:
!pip install nba_api

Collecting nba_api
  Downloading nba_api-1.9.0-py3-none-any.whl.metadata (5.8 kB)
Downloading nba_api-1.9.0-py3-none-any.whl (284 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.9/284.9 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nba_api
Successfully installed nba_api-1.9.0


In [39]:
import pandas as pd
import numpy as np
from collections import defaultdict
from nba_api.stats.endpoints import PlayByPlayV2, GameRotation

# Game Configuration

In [40]:
game_id = '0022300001'

In [41]:
# Load play-by-play data
pbp = PlayByPlayV2(game_id=game_id).get_data_frames()[0]

# Load player rotation data
rotation = GameRotation(game_id=game_id)
home_df = rotation.home_team.get_data_frame()
away_df = rotation.away_team.get_data_frame()

# Add team labels
home_df['TEAM_SIDE'] = 'home'
away_df['TEAM_SIDE'] = 'away'
rotation_df = pd.concat([home_df, away_df], ignore_index=True)

# Build Timeline from Sub Events

In [42]:
# Create a timeline of substitution events
events = []
for _, row in rotation_df.iterrows():
    player = f"{row['PLAYER_FIRST']} {row['PLAYER_LAST']}"
    team = row['TEAM_SIDE']
    events.append({'time': row['IN_TIME_REAL'], 'player': player, 'team': team, 'action': 'in'})
    events.append({'time': row['OUT_TIME_REAL'], 'player': player, 'team': team, 'action': 'out'})
events = sorted(events, key=lambda x: x['time'])

# Build Lineup vs Lineup Segments

In [43]:
lineup_segments = []
current_lineups = {'home': set(), 'away': set()}
prev_time = 0

for event in events:
    current_time = event['time']

    if all(len(lineup) == 5 for lineup in current_lineups.values()):
        lineup_segments.append({
            'start_time': prev_time,
            'end_time': current_time,
            'home_lineup': tuple(sorted(current_lineups['home'])),
            'away_lineup': tuple(sorted(current_lineups['away'])),
            'duration': current_time - prev_time
        })

    team = event['team']
    player = event['player']
    if event['action'] == 'in':
        current_lineups[team].add(player)
    else:
        current_lineups[team].discard(player)

    prev_time = current_time

lineup_df = pd.DataFrame(lineup_segments)
lineup_df[['start_time', 'end_time', 'duration']] = lineup_df[['start_time', 'end_time', 'duration']] / 10

## Add Period and Clock Format

In [44]:
def seconds_to_pctimestring(seconds):
    m, s = divmod(int(seconds), 60)
    return f"{m}:{s:02}"

def get_period_and_clock(seconds):
    period = int(seconds // 720) + 1
    sec_into_period = seconds % 720
    return period, seconds_to_pctimestring(720 - sec_into_period)

lineup_df[['period', 'start_pctimestring']] = lineup_df['start_time'].apply(lambda x: pd.Series(get_period_and_clock(x)))
lineup_df[['end_period', 'end_pctimestring']] = lineup_df['end_time'].apply(lambda x: pd.Series(get_period_and_clock(x)))

# Match Lineups to PBP Events

In [45]:
def pctimestring_to_seconds(t):
    m, s = map(int, t.split(":"))
    return m * 60 + s

def convert_to_game_time(period, pctimestring):
    return (period - 1) * 720 + (720 - pctimestring_to_seconds(pctimestring))

pbp['GAME_CLOCK_SEC'] = pbp.apply(lambda row: convert_to_game_time(row['PERIOD'], row['PCTIMESTRING']), axis=1)

def find_lineup(row, lineup_df):
    t = row['GAME_CLOCK_SEC']
    is_sub = row['EVENTMSGTYPE'] == 8
    for _, seg in lineup_df.iterrows():
        if (seg['start_time'] <= t < seg['end_time']) if is_sub else (seg['start_time'] < t <= seg['end_time']):
            return pd.Series([seg['home_lineup'], seg['away_lineup']])
    return pd.Series([None, None])

pbp[['HOME_LINEUP', 'AWAY_LINEUP']] = pbp.apply(lambda row: find_lineup(row, lineup_df), axis=1)

# Build Final Event Timeline

In [46]:
important_event_types = list(range(1, 14))
lineup_timeline = []

for _, row in pbp.iterrows():
    if row['EVENTMSGTYPE'] in important_event_types and isinstance(row['HOME_LINEUP'], tuple):
        lineup_timeline.append({
            'EVENTNUM': row['EVENTNUM'],
            'PERIOD': row['PERIOD'],
            'TIME': row['PCTIMESTRING'],
            'EVENT_TYPE': row['EVENTMSGTYPE'],
            'SCORE': row['SCORE'],
            'HOME_DESCRIPTION': str(row['HOMEDESCRIPTION']) if pd.notna(row['HOMEDESCRIPTION']) else '',
            'AWAY_DESCRIPTION': str(row['VISITORDESCRIPTION']) if pd.notna(row['VISITORDESCRIPTION']) else '',
            'HOME_LINEUP': row['HOME_LINEUP'],
            'AWAY_LINEUP': row['AWAY_LINEUP'],
            'PLAYER1_NAME': row['PLAYER1_NAME'],
            'PLAYER2_NAME': row['PLAYER2_NAME'],
            'PLAYER3_NAME': row['PLAYER3_NAME']
        })

lineup_event_df = pd.DataFrame(lineup_timeline)
lineup_event_df.head()

Unnamed: 0,EVENTNUM,PERIOD,TIME,EVENT_TYPE,SCORE,HOME_DESCRIPTION,AWAY_DESCRIPTION,HOME_LINEUP,AWAY_LINEUP,PLAYER1_NAME,PLAYER2_NAME,PLAYER3_NAME
0,7,1,11:41,1,0 - 2,Turner 2' Cutting Dunk Shot (2 PTS) (Haliburto...,,"(Bennedict Mathurin, Bruce Brown, Myles Turner...","(Darius Garland, Donovan Mitchell, Evan Mobley...",Myles Turner,Tyrese Haliburton,
1,9,1,11:23,2,,,MISS Mobley 8' Turnaround Jump Shot,"(Bennedict Mathurin, Bruce Brown, Myles Turner...","(Darius Garland, Donovan Mitchell, Evan Mobley...",Evan Mobley,,
2,10,1,11:20,4,,Turner REBOUND (Off:0 Def:1),,"(Bennedict Mathurin, Bruce Brown, Myles Turner...","(Darius Garland, Donovan Mitchell, Evan Mobley...",Myles Turner,,
3,11,1,11:15,2,,MISS Brown 2' Running Layup,,"(Bennedict Mathurin, Bruce Brown, Myles Turner...","(Darius Garland, Donovan Mitchell, Evan Mobley...",Bruce Brown,,
4,12,1,11:12,4,,Brown REBOUND (Off:1 Def:0),,"(Bennedict Mathurin, Bruce Brown, Myles Turner...","(Darius Garland, Donovan Mitchell, Evan Mobley...",Bruce Brown,,


# Lineup vs Lineup Stats & Player Contributions

## Step 1: Aggregate Events into Lineup Segments

Each segment represents a time window with the same 5v5 lineups. We'll:
 - Track team stats: points, 3pt/2pt/FT made, rebounds, assists, TOs, etc.
 - Track player stats: for each player involved in that lineup

In [48]:
lineup_segments = []
segment = None
prev_home = None
prev_away = None
last_shot_team = None

SKIP_KEYWORDS = ['SUB', 'Jump Ball', 'Delay', 'Offensive', 'Timeout', 'Rebound']

for _, row in lineup_event_df.iterrows():
    home_lineup = tuple(sorted(row['HOME_LINEUP']))
    away_lineup = tuple(sorted(row['AWAY_LINEUP']))

    # Start new segment if lineups changed
    if segment is None or home_lineup != prev_home or away_lineup != prev_away:
        if segment:
            segment['end_event'] = row['EVENTNUM']
            segment['end_time'] = row['TIME']
            lineup_segments.append(segment)

        segment = {
            'home_lineup': home_lineup,
            'away_lineup': away_lineup,
            'start_event': row['EVENTNUM'],
            'start_time': row['TIME'],
            'period': row['PERIOD'],
            'team_stats': {'home': defaultdict(int), 'away': defaultdict(int)},
            'player_stats': defaultdict(lambda: defaultdict(int)),
            'events': []
        }

        prev_home = home_lineup
        prev_away = away_lineup

    segment['events'].append({
        'EVENTNUM': row['EVENTNUM'],
        'TIME': row['TIME'],
        'HOME_DESCRIPTION': row['HOME_DESCRIPTION'],
        'AWAY_DESCRIPTION': row['AWAY_DESCRIPTION']
    })

    # Process event descriptions
    for team_side, desc in [('home', row['HOME_DESCRIPTION']), ('away', row['AWAY_DESCRIPTION'])]:
        if not desc or any(skip in desc for skip in SKIP_KEYWORDS):
            continue

        lineup = home_lineup if team_side == 'home' else away_lineup
        players_on_court = set(lineup)

        def inc(stat, players, value=1):
            segment['team_stats'][team_side][stat] += value
            for p in players:
                if p in players_on_court:
                    segment['player_stats'][p][stat] += value

        # Detect player involvement
        involved_players = [row['PLAYER1_NAME'], row['PLAYER2_NAME'], row['PLAYER3_NAME']]
        involved_players = [p for p in involved_players if isinstance(p, str)]

        # Scoring
        if '3PT' in desc and 'MISS' not in desc:
            inc('3pt_made', [row['PLAYER1_NAME']])
            inc('points', [row['PLAYER1_NAME']], 3)
        elif 'Free Throw' in desc and 'MISS' not in desc:
            inc('ft_made', [row['PLAYER1_NAME']])
            inc('points', [row['PLAYER1_NAME']], 1)
        elif 'MISS' not in desc and any(kw in desc for kw in ['Fadeaway', 'Dunk', 'Layup', 'Jump Shot', 'Hook Shot']):
            inc('2pt_made', [row['PLAYER1_NAME']])
            inc('points', [row['PLAYER1_NAME']], 2)
        elif 'MISS' in desc:
            last_shot_team = team_side

        # Other actions
        if 'AST' in desc:
            inc('assists', [row['PLAYER2_NAME']])
        if 'Turnover' in desc:
            inc('turnovers', [row['PLAYER1_NAME']])
        if 'STL' in desc:
            inc('steals', [row['PLAYER2_NAME']])
        if 'BLK' in desc:
            inc('blocks', [row['PLAYER2_NAME']])
        if '.FOUL' in desc:
            inc('fouls', [row['PLAYER1_NAME']])

        # Rebounds
        if 'REBOUND' in desc:
            rebound_team = team_side
            rebounder = row['PLAYER1_NAME']
            if last_shot_team:
                if rebound_team == last_shot_team:
                    inc('off_rebounds', [rebounder])
                else:
                    inc('def_rebounds', [rebounder])
            last_shot_team = None

# Final flush
if segment:
    segment['end_event'] = row['EVENTNUM']
    segment['end_time'] = row['TIME']
    lineup_segments.append(segment)

## Step 2: Build Lineup vs Lineup DataFrame


In [49]:
lineup_vs_lineup_data = []

for seg in lineup_segments:
    row = {
        'period': seg['period'],
        'start_event': seg['start_event'],
        'end_event': seg['end_event'],
        'start_time': seg['start_time'],
        'end_time': seg['end_time'],
        'home_lineup': seg['home_lineup'],
        'away_lineup': seg['away_lineup']
    }

    for team in ['home', 'away']:
        for stat, val in seg['team_stats'][team].items():
            row[f'{team}_{stat}'] = val

    lineup_vs_lineup_data.append(row)

lineup_vs_lineup_df = pd.DataFrame(lineup_vs_lineup_data)

## Step 3: Player Stats per Segment

In [50]:
player_stats_data = []

for seg in lineup_segments:
    for player, stats in seg['player_stats'].items():
        row = {
            'player': player,
            'period': seg['period'],
            'start_event': seg['start_event'],
            'end_event': seg['end_event'],
            'start_time': seg['start_time'],
            'end_time': seg['end_time'],
            'home_lineup': seg['home_lineup'],
            'away_lineup': seg['away_lineup']
        }

        for stat, val in stats.items():
            row[stat] = val

        player_stats_data.append(row)

player_stats_df = pd.DataFrame(player_stats_data)