# Efficiency of Missed Second Rebounds

For poor free throw shooters, is it a better strategy to *intentionally* miss the second free throw and aim for the offensive rebound and new possession? Analysis of play-by-play data using [nba_api](https://github.com/swar/nba_api).

In [1]:
import numpy as np
import pandas as pd

In [254]:
#Get a list of team IDs
from nba_api.stats.static import teams

nba_teams = teams.get_teams()

# Select the dictionary for the Pacers, which contains their team ID
team_ids = [team['id'] for team in nba_teams]
print(f'team IDs found: {len(team_ids)}')

team IDs found: 30


In [785]:
# Query for the regular season games of the Cavs (team no. 3 alphabetically)
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.library.parameters import Season
from nba_api.stats.library.parameters import SeasonType

gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_ids[2], 
                            season_nullable=Season.default,
                            season_type_nullable=SeasonType.regular)  

games_dict = gamefinder.get_normalized_dict()
games = games_dict['LeagueGameFinderResults']
game_IDs = [game['GAME_ID'] for game in games]

In [875]:
games

[{'SEASON_ID': '22022',
  'TEAM_ID': 1610612739,
  'TEAM_ABBREVIATION': 'CLE',
  'TEAM_NAME': 'Cleveland Cavaliers',
  'GAME_ID': '0022200645',
  'GAME_DATE': '2023-01-14',
  'MATCHUP': 'CLE @ MIN',
  'WL': 'L',
  'MIN': 241,
  'PTS': 102,
  'FGM': 37,
  'FGA': 76,
  'FG_PCT': 0.487,
  'FG3M': 8,
  'FG3A': 28,
  'FG3_PCT': 0.286,
  'FTM': 20,
  'FTA': 23,
  'FT_PCT': 0.87,
  'OREB': 8,
  'DREB': 32,
  'REB': 40,
  'AST': 24,
  'STL': 5,
  'BLK': 2,
  'TOV': 18,
  'PF': 17,
  'PLUS_MINUS': -8.0},
 {'SEASON_ID': '22022',
  'TEAM_ID': 1610612739,
  'TEAM_ABBREVIATION': 'CLE',
  'TEAM_NAME': 'Cleveland Cavaliers',
  'GAME_ID': '0022200631',
  'GAME_DATE': '2023-01-12',
  'MATCHUP': 'CLE @ POR',
  'WL': 'W',
  'MIN': 240,
  'PTS': 119,
  'FGM': 44,
  'FGA': 89,
  'FG_PCT': 0.494,
  'FG3M': 7,
  'FG3A': 22,
  'FG3_PCT': 0.318,
  'FTM': 24,
  'FTA': 31,
  'FT_PCT': 0.774,
  'OREB': 12,
  'DREB': 32,
  'REB': 44,
  'AST': 28,
  'STL': 6,
  'BLK': 1,
  'TOV': 11,
  'PF': 19,
  'PLUS_MINUS': 6.0

In [896]:
game_homeTeam = [game['MATCHUP'][-3:] if len(game['MATCHUP']) == 9 else game['MATCHUP'][0:3] for game in games]
game_homeTeam[18]

'CLE'

In [877]:
testString = 'CLE @ MIN'
testString[-3:]

'MIN'

In [791]:
game_IDs[18]

'0022200381'

In [2]:
game_id = "0022200552"

In [3]:
# Query for the play by play of that most recent regular season game
from nba_api.stats.endpoints import playbyplayv2
df = playbyplayv2.PlayByPlayV2(game_id).get_data_frames()[0]


#Convert the game clock time to seconds and re-order
df['PCTIME_SECONDS'] = df['PCTIMESTRING'].map(lambda x: int(x.split(":")[0])*60 + int(x.split(":")[1]))
df = df.sort_values(['PERIOD','PCTIME_SECONDS','EVENTNUM'], ascending=[True,False,True])
df = df.reset_index(drop=True)

#Update the SCORE column to fill in blanks
df.at[0,"SCORE"] = "0 - 0"
df["SCORE"] = df["SCORE"].fillna(method="ffill")

In [792]:
# Query for the play by play of that most recent regular season game
from nba_api.stats.endpoints import playbyplayv2
df_viol = playbyplayv2.PlayByPlayV2("0022200381").get_data_frames()[0]


#Convert the game clock time to seconds and re-order
df_viol['PCTIME_SECONDS'] = df_viol['PCTIMESTRING'].map(lambda x: int(x.split(":")[0])*60 + int(x.split(":")[1]))
df_viol = df_viol.sort_values(['PERIOD','PCTIME_SECONDS','EVENTNUM'], ascending=[True,False,True])
df_viol = df_viol.reset_index(drop=True)

#Update the SCORE column to fill in blanks
df_viol.at[0,"SCORE"] = "0 - 0"
df_viol["SCORE"] = df["SCORE"].fillna(method="ffill")

In [794]:
df_viol[df_viol['EVENTMSGTYPE'] == 5]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
8,22200381,15,5,1,1,7:42 PM,10:57,Allen Bad Pass Turnover (P1.T1),,Huerter STEAL (1 STL),...,SAC,0,0,,,,,,1,657
17,22200381,28,5,1,1,7:46 PM,9:42,Allen Bad Pass Turnover (P2.T2),,Sabonis STEAL (1 STL),...,SAC,0,0,,,,,,1,582
26,22200381,40,5,4,1,7:48 PM,8:30,,,Mitchell Traveling Turnover (P1.T1),...,,1,0,,,,,,1,510
32,22200381,47,5,11,1,7:49 PM,7:30,,,Kings Turnover: Shot Clock (T#2),...,,1,0,,,,,,1,450
39,22200381,56,5,37,1,7:50 PM,6:42,,,Huerter Offensive Foul Turnover (P1.T3),...,,1,0,,,,,,1,402
54,22200381,80,5,11,1,7:55 PM,4:56,,,Kings Turnover: Shot Clock (T#4),...,,1,0,,,,,,1,296
55,22200381,81,5,39,1,7:56 PM,4:48,Garland Step Out of Bounds Turnover (P1.T3),,,...,,1,0,,,,,,1,288
69,22200381,100,5,1,1,7:59 PM,3:47,Allen Bad Pass Turnover (P3.T4),,Huerter STEAL (2 STL),...,SAC,0,0,,,,,,1,227
85,22200381,125,5,4,1,8:03 PM,2:26,,,Barnes Traveling Turnover (P1.T5),...,,1,0,,,,,,1,146
92,22200381,135,5,2,1,8:05 PM,1:44,Okoro Lost Ball Turnover (P1.T5),,Dellavedova STEAL (1 STL),...,SAC,0,0,,,,,,1,104


In [789]:
df_viol.iloc[204:209]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
204,22200645,298,6,1,2,9:05 PM,1:23,McDaniels P.FOUL (P3.T4) (K.Scott),,,...,CLE,1,0,,,,,,1,83
205,22200645,300,7,5,2,9:06 PM,1:16,Nowell Violation:Kicked Ball (T.Maddox),,,...,,1,0,,,,,,0,76
206,22200645,301,8,0,2,9:06 PM,1:16,SUB: Anderson FOR McDaniels,,,...,MIN,0,0,,,,,,0,76
207,22200645,303,1,52,2,9:06 PM,1:07,,,Allen 1' Alley Oop Dunk (11 PTS) (Mitchell 2 AST),...,CLE,0,0,,,,,,1,67
208,22200645,305,1,9,2,9:07 PM,0:45,Reid 1' Driving Dunk (6 PTS) (Rivers 1 AST),,,...,MIN,0,0,,,,,,1,45


In [258]:
#Find missed last free throws
missed_FTs = df.index[df['EVENTMSGACTIONTYPE'].isin([10,12,15]) & df['SCORE'].isna()].tolist()

for freethrow in missed_FTs:
    if df.loc[freethrow]['PLAYER1_ID'] == df.loc[freethrow+1]['PLAYER1_ID'] and df.loc[freethrow+1]['EVENTMSGTYPE'] == 4:
        print(f'self-offensive rebound - EVENTNUM: {df.loc[freethrow]["EVENTNUM"]}')

self-offensive rebound - EVENTNUM: 701


In [594]:
#Find offensive rebounds
missedShots = df.index[df['EVENTMSGTYPE'] == 2].tolist()

OREBs = []
for shot in missedShots:
    if df.loc[shot]['PLAYER1_TEAM_ID'] == df.loc[shot+1]['PLAYER1_TEAM_ID'] and df.loc[shot+1]['EVENTMSGTYPE'] == 4:
        print(f'offensive rebound - LOC: {shot}')
        OREBs.append(shot+1)

offensive rebound - LOC: 71
offensive rebound - LOC: 103
offensive rebound - LOC: 114
offensive rebound - LOC: 176
offensive rebound - LOC: 275
offensive rebound - LOC: 290
offensive rebound - LOC: 302
offensive rebound - LOC: 313
offensive rebound - LOC: 353
offensive rebound - LOC: 388
offensive rebound - LOC: 415
offensive rebound - LOC: 422
offensive rebound - LOC: 436
offensive rebound - LOC: 466
offensive rebound - LOC: 470
offensive rebound - LOC: 500
offensive rebound - LOC: 515
offensive rebound - LOC: 525


In [263]:
missedShots

[3,
 5,
 11,
 21,
 24,
 26,
 28,
 30,
 59,
 64,
 71,
 80,
 82,
 84,
 86,
 89,
 99,
 103,
 111,
 114,
 120,
 136,
 138,
 149,
 151,
 155,
 157,
 161,
 166,
 168,
 171,
 176,
 181,
 183,
 185,
 188,
 195,
 208,
 222,
 228,
 230,
 236,
 240,
 245,
 247,
 249,
 251,
 264,
 267,
 275,
 284,
 290,
 297,
 302,
 313,
 321,
 335,
 340,
 353,
 365,
 367,
 376,
 378,
 386,
 388,
 390,
 398,
 411,
 413,
 415,
 417,
 422,
 428,
 436,
 439,
 452,
 460,
 466,
 470,
 495,
 500,
 503,
 505,
 508,
 510,
 512,
 515,
 518,
 522,
 525,
 528,
 543]

In [268]:
#Find offensive rebounds for cleaned DF
missedShots2 = df2.index[df2['EVENTMSGTYPE'] == 2].tolist()

for shot in missedShots2:
    if (df2.iloc[shot]['PLAYER1_TEAM_ID'] == df2.iloc[shot+1]['PLAYER1_TEAM_ID']):
        print(f'offensive rebound - LOC: {shot}')

offensive rebound - LOC: 69
offensive rebound - LOC: 98
offensive rebound - LOC: 107
offensive rebound - LOC: 164
offensive rebound - LOC: 168
offensive rebound - LOC: 255
offensive rebound - LOC: 262
offensive rebound - LOC: 277
offensive rebound - LOC: 289
offensive rebound - LOC: 300
offensive rebound - LOC: 340
offensive rebound - LOC: 364
offensive rebound - LOC: 373
offensive rebound - LOC: 400
offensive rebound - LOC: 407
offensive rebound - LOC: 421
offensive rebound - LOC: 424
offensive rebound - LOC: 449
offensive rebound - LOC: 453
offensive rebound - LOC: 482
offensive rebound - LOC: 496
offensive rebound - LOC: 506


In [270]:
#Find offensive rebounds for cleaned DF
missedShots2 = df2.index[df2['EVENTMSGTYPE'] == 2].tolist()

ORBs_2 = []
for shot in missedShots2:
    if (df2.iloc[shot]['PLAYER1_TEAM_ID'] == df2.iloc[shot+1]['PLAYER1_TEAM_ID']) and (df2.iloc[shot+1]['EVENTMSGTYPE'] == 4):
        print(f'offensive rebound - LOC: {shot}')
        ORBs_2.append(shot+1)

offensive rebound - LOC: 69
offensive rebound - LOC: 98
offensive rebound - LOC: 107
offensive rebound - LOC: 168
offensive rebound - LOC: 262
offensive rebound - LOC: 277
offensive rebound - LOC: 289
offensive rebound - LOC: 300
offensive rebound - LOC: 340
offensive rebound - LOC: 373
offensive rebound - LOC: 400
offensive rebound - LOC: 407
offensive rebound - LOC: 421
offensive rebound - LOC: 449
offensive rebound - LOC: 453
offensive rebound - LOC: 482
offensive rebound - LOC: 496
offensive rebound - LOC: 506


In [281]:
play_loc = 70 + 1


# Go through df for first relevant event:
# EVENTMSGTYPE ==  1 - Made shot
#              ==  2 - Missed shot
#              ==  4 - Rebound
#              ==  6 - Foul
#              == 13 - Period End

while not df2.iloc[play_loc]['EVENTMSGTYPE'] in [1,2,4,6,13]:
    play_loc += 1
    
print(f'LOC: {play_loc}, EVENTMSGTYPE: {df2.iloc[play_loc]["EVENTMSGTYPE"]}')

LOC: 71, EVENTMSGTYPE: 6


In [456]:
def eventScore(start,end,df):
    
    start_score = df.iloc[start]['SCORE'].split(" - ")
    end_score = df.iloc[end]['SCORE'].split(" - ")
    
    diff_score = [int(x) - int(y) for x, y in zip(end_score, start_score)]
    
    return max(diff_score)

### Random Functions

In [583]:
def extraRows(df_slice, df):
    
    # For a slice of a dataframe, shows the next two rows after each original row in the slice
    # Messy utility that gives more context to individual plays matching a criteria
    
    nextRows = df_slice.index.to_list()
    nextRows_new = [row + 1 for row in nextRows] + nextRows
    nextRows_new += [row + 2 for row in nextRows]
    nextRows_new.sort()
    
    return df.iloc[nextRows_new]

In [543]:
# Example of using the extraRows function
extraRows(df[df['EVENTMSGTYPE'] == 6],df)

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
7,0022200552,12,6,2,1,7:12 PM,11:05,,,Williams S.FOUL (P1.T1) (T.Brothers),...,CLE,1,0,,,,,,1,665
8,0022200552,14,3,11,1,7:13 PM,11:05,Mitchell Free Throw 1 of 2 (1 PTS),,,...,,0,0,,,,,,1,665
9,0022200552,15,3,12,1,7:13 PM,11:05,MISS Mitchell Free Throw 2 of 2,,,...,,0,0,,,,,,1,665
13,0022200552,22,6,2,1,7:14 PM,10:38,,,DeRozan S.FOUL (P1.T2) (S.Mehta),...,CLE,1,0,,,,,,1,638
14,0022200552,24,3,10,1,7:14 PM,10:38,MISS Allen Free Throw 1 of 1,,,...,,0,0,,,,,,1,638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
516,0022200552,767,9,1,5,9:59 PM,0:41,CAVALIERS Timeout: Regular (Full 9 Short 0),,,...,,0,0,,,,,,0,41
517,0022200552,769,18,6,5,9:59 PM,0:41,,Instant Replay1st OT (9:59 PM EST),,...,,1,0,,,,,,1,41
521,0022200552,774,6,1,5,10:01 PM,0:31,,,Jones Jr. P.FOUL (P4.PN) (J.DeRosa),...,CLE,1,0,,,,,,1,31
522,0022200552,776,3,11,5,10:01 PM,0:31,Mitchell Free Throw 1 of 2 (70 PTS),,,...,,0,0,,,,,,1,31


### Turnovers and Period Ends
Where `EVENTMSGTYPE == 5` or `EVENTMSGTYPE == 13`

In [506]:
def quickPoss(play_loc, df):
    
    possession = {"start_loc":   play_loc,
                  "end_loc":     play_loc,
                  "score":       0,
                  "play_end":    False,
                  "description": ""}
    
    if df.iloc[play_loc]['EVENTMSGACTIONTYPE'] == 5:
        possession["play_end"] = True
        possession["description"] = "Turnover"
        
    elif df.iloc[play_loc]['EVENTMSGACTIONTYPE'] == 13:
        possession["play_end"] = True
        possession["description"] = "Quarter end"
        
    return possession

### Fouls
Where `EVENTMSGTYPE == 6`

In [618]:
# Function for EVENTMSGTYPE == 6 (foul)
def checkFT(play_loc, df):
    
    # Initialise the possession info to be returned
    start_loc = play_loc
    possession = {"start_loc":   start_loc,
                  "end_loc":     start_loc,
                  "score":       0,
                  "play_end":    False,
                  "description": "Nothing yet..."}
    
    # Check if it's an offensive foul, i.e. a turnover
    if df.iloc[play_loc]['EVENTMSGACTIONTYPE'] == 4:
        possession["description"] = "Offensive foul turnover"
        possession["play_end"] = True
        possession["end_loc"] = play_loc + 1 #Offensive foul turnovers are described in two events
        return possession
    
    # For non-shooting fouls, check if FTs are taken
    play_loc += 1
    while df.iloc[play_loc]['PCTIME_SECONDS'] == df.iloc[play_loc - 1]['PCTIME_SECONDS']:

        if (df.iloc[play_loc]['EVENTMSGACTIONTYPE'] in [10,12,15]):

            # Check if the last FT was missed and which team got the rebound
            if (df.iloc[play_loc+1]['EVENTMSGTYPE'] == 4):
                possession = checkRebound(play_loc, possession, df)

            # Otherwise it was a made FT
            else:
                possession["play_end"] = True
                possession["description"] = "Made last FT"

            # Add the score from the made FTs (if any)
            diff_score = eventScore(start_loc,play_loc,df)
            possession["score"] += diff_score

            possession["end_loc"] = play_loc
            return possession

        else:
            play_loc += 1

    # If no last FTs are found with the same time code, it was a non-shooting foul
    possession["description"] = "Non-Shooting foul"
    possession["end_loc"] = play_loc + 1
    return possession

In [619]:
# Testing the checkFT function

poss = checkFT(211,df)

print(f'{poss["description"]}\nScore: {poss["score"]}')
if poss["play_end"]:
    print('Possession ended')
df.iloc[poss['start_loc']:poss['end_loc']+1]

Made last FT
Score: 1
Possession ended


Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
211,22200552,308,6,2,2,8:06 PM,0:39,,,Caruso S.FOUL (P2.T4) (J.DeRosa),...,CLE,1,0,,,,,,1,39
212,22200552,310,3,10,2,8:06 PM,0:39,Mitchell Free Throw 1 of 1 (16 PTS),,,...,,0,0,,,,,,1,39


In [504]:
# Looking at offensive fouls

off_fouls = df[df['EVENTMSGACTIONTYPE'] == 4].index.to_list()
off_fouls += [foul + 1 for foul in off_fouls]
off_fouls.sort()

df.iloc[off_fouls]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
110,22200552,178,5,4,2,7:43 PM,10:31,Lopez Traveling Turnover (P1.T4),,,...,,1,0,,,,,,1,631
111,22200552,179,8,0,2,7:44 PM,10:31,,,SUB: Dosunmu FOR Caruso,...,CHI,0,0,,,,,,0,631
156,22200552,243,6,4,2,7:55 PM,5:23,,,Vucevic OFF.Foul (P2) (S.Mehta),...,CLE,1,0,,,,,,1,323
157,22200552,245,5,37,2,7:55 PM,5:23,,,Vucevic Offensive Foul Turnover (P1.T2),...,,1,0,,,,,,1,323
205,22200552,313,6,4,2,8:07 PM,0:08,Mitchell OFF.Foul (P2) (T.Brothers),,,...,CHI,1,0,,,,,,1,8
206,22200552,315,5,37,2,8:07 PM,0:08,Mitchell Offensive Foul Turnover (P3.T8),,,...,,1,0,,,,,,1,8
292,22200552,447,6,4,3,8:47 PM,3:34,,,LaVine OFF.Foul (P3) (T.Brothers),...,CLE,1,0,,,,,,1,214
293,22200552,449,5,37,3,8:47 PM,3:34,,,LaVine Offensive Foul Turnover (P3.T5),...,,1,0,,,,,,1,214


In [601]:
missed_FTs

[9, 15, 97, 129, 146, 280, 426, 433, 457, 464, 488]

### Made Field Goals
Basically just checking for and-1s and adding the points from the shot

In [481]:
# Function for EVENTMSGTYPE == 1 (made FG)
def checkFG_made(play_loc,df):
    
    FG_score = eventScore(play_loc-1,play_loc,df)
    
    # Check if it's a shooting foul too, excluding when the next play is a foul from the same team
    if (df.iloc[play_loc + 1]['EVENTMSGTYPE'] == 6) and not (df.iloc[play_loc]['PLAYER1_TEAM_ID'] == df.iloc[play_loc + 1]['PLAYER1_TEAM_ID']):
        possession = checkFT(play_loc, df)
    
    # If just a made shot, return a standard possession dict with the score added
    else:
        possession = {"start_loc":   play_loc,
                      "end_loc":     play_loc,
                      "score":       0,
                      "play_end":    False,
                      "description": ""}
        
    possession['score'] += FG_score
    
    return possession    

In [612]:
# Testing the checkFG_made function
poss = checkFG_made(210,df)

print(f'{poss["description"]}\nScore: {poss["score"]}')
df.iloc[poss['start_loc']:poss['end_loc']+1]

211
212
last FT found
Made last FT
Score: 4


Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
210,22200552,307,1,80,2,8:06 PM,0:39,Mitchell 27' 3PT Step Back Jump Shot (15 PTS),,,...,,0,0,,,,,,1,39
211,22200552,308,6,2,2,8:06 PM,0:39,,,Caruso S.FOUL (P2.T4) (J.DeRosa),...,CLE,1,0,,,,,,1,39
212,22200552,310,3,10,2,8:06 PM,0:39,Mitchell Free Throw 1 of 1 (16 PTS),,,...,,0,0,,,,,,1,39
213,22200552,311,1,1,2,8:07 PM,0:29,,,LaVine 28' 3PT Jump Shot (10 PTS) (DeRozan 2 AST),...,CHI,0,0,,,,,,1,29


In [621]:
df.iloc[205:230]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
205,22200552,301,4,1,2,8:05 PM,1:21,,,Bulls Rebound,...,,0,0,,,,,,0,81
206,22200552,302,3,12,2,8:05 PM,1:21,,,DeRozan Free Throw 2 of 2 (18 PTS),...,,0,0,,,,,,1,81
207,22200552,303,1,75,2,8:05 PM,1:02,Mitchell 2' Driving Finger Roll Layup (12 PTS)...,,,...,CLE,0,0,,,,,,1,62
208,22200552,305,2,1,2,8:06 PM,0:46,,,MISS Vucevic 24' 3PT Jump Shot,...,,0,0,,,,,,1,46
209,22200552,306,4,0,2,8:06 PM,0:44,Mitchell REBOUND (Off:0 Def:1),,,...,,0,0,,,,,,1,44
210,22200552,307,1,80,2,8:06 PM,0:39,Mitchell 27' 3PT Step Back Jump Shot (15 PTS),,,...,,0,0,,,,,,1,39
211,22200552,308,6,2,2,8:06 PM,0:39,,,Caruso S.FOUL (P2.T4) (J.DeRosa),...,CLE,1,0,,,,,,1,39
212,22200552,310,3,10,2,8:06 PM,0:39,Mitchell Free Throw 1 of 1 (16 PTS),,,...,,0,0,,,,,,1,39
213,22200552,311,1,1,2,8:07 PM,0:29,,,LaVine 28' 3PT Jump Shot (10 PTS) (DeRozan 2 AST),...,CHI,0,0,,,,,,1,29
214,22200552,313,6,4,2,8:07 PM,0:08,Mitchell OFF.Foul (P2) (T.Brothers),,,...,CHI,1,0,,,,,,1,8


### Missed FGs

In [586]:
def checkRebound(play_loc, possession, df):

    if df.iloc[play_loc]['PLAYER1_TEAM_ID'] == df.iloc[play_loc+1]['PLAYER1_TEAM_ID']:
        possession["description"] = "Miss - OREB"
    else:
        possession["description"] = "Miss - DREB"
        possession["play_end"] = True
        
    return possession

In [623]:
# Testing the checkRebound function
play_loc = 208

poss = {"start_loc":   play_loc,
        "end_loc":     play_loc,
        "score":       0,
        "play_end":    False,
        "description": ""}


poss = checkRebound(play_loc,poss,df)

print(f'{poss["description"]}\nScore: {poss["score"]}')
df.iloc[poss['start_loc']:poss['end_loc']+1]

Miss - DREB
Score: 0


Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
208,22200552,305,2,1,2,8:06 PM,0:46,,,MISS Vucevic 24' 3PT Jump Shot,...,,0,0,,,,,,1,46


In [589]:
df.iloc[3:6]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
3,22200552,8,2,101,1,7:12 PM,11:17,MISS Mitchell 6' Driving Floating Jump Shot,,,...,,0,0,,,,,,1,677
4,22200552,9,4,0,1,7:12 PM,11:15,,,Vucevic REBOUND (Off:0 Def:1),...,,0,0,,,,,,1,675
5,22200552,10,2,41,1,7:12 PM,11:10,,,MISS Dosunmu 1' Running Layup,...,,0,0,,,,,,1,670


In [779]:
extraRows(df[(df['EVENTMSGTYPE'] == 7)],df)

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
362,22200552,530,7,3,4,9:04 PM,11:19,,,Lane,...,,0,0,,,,,,0,679
363,22200552,531,7,3,4,9:05 PM,11:19,,,Lane,...,,0,0,,,,,,0,679
363,22200552,531,7,3,4,9:05 PM,11:19,,,Lane,...,,0,0,,,,,,0,679
364,22200552,532,3,15,4,9:05 PM,11:19,Mitchell Free Throw 3 of 3 (42 PTS),,,...,,0,0,,,,,,1,679
364,22200552,532,3,15,4,9:05 PM,11:19,Mitchell Free Throw 3 of 3 (42 PTS),,,...,,0,0,,,,,,1,679
365,22200552,533,2,79,4,9:05 PM,11:01,,,MISS DeRozan 17' Pullup Jump Shot,...,,0,0,,,,,,1,661
385,22200552,563,7,5,4,9:09 PM,8:45,Mitchell Violation:Kicked Ball (T.Brothers),,,...,,1,0,,,,,,0,525
386,22200552,564,2,80,4,9:10 PM,8:29,,,MISS LaVine 22' Step Back Jump Shot,...,,0,0,,,,,,1,509
387,22200552,565,4,0,4,9:10 PM,8:27,Love REBOUND (Off:0 Def:11),,,...,,0,0,,,,,,1,507


In [595]:
OREBs

[72,
 104,
 115,
 177,
 276,
 291,
 303,
 314,
 354,
 389,
 416,
 423,
 437,
 467,
 471,
 501,
 516,
 526]

### Option 1 - Possession Class

In [805]:
class Possession:
    def __init__(self, start=1, df=df, complete=False, description=""):
        self.df = df
        self.start = start
        self.end = start
        self.complete = complete
        self.score = 0
        self.description = description
        self.time_elapsed = 0
        
        self.team = self.df.iloc[self.start]['PLAYER1_TEAM_ID']
        
    def __repr__(self):
        return f'Possession: loc {self.start}-{self.end}; {self.description}'
    
    def view(self):
        return self.df.iloc[self.start : self.end + 1]
    
    
    def possession(self):
        while not self.complete:
            
            event_msg = self.df.iloc[self.end]['EVENTMSGTYPE']
            
            if event_msg == 1:
                self.madeFG()
            elif event_msg == 2:
                self.checkRebound()
            elif event_msg == 6:
                self.checkFoul()
            elif event_msg in [5,13]:
                self.endPossession()
            else:
                self.end += 1
        
        self.time_elapsed = self.df.iloc[self.start]['PCTIME_SECONDS'] - self.df.iloc[self.end]['PCTIME_SECONDS']
    
    
    def madeFG(self):
    
        # If it's a shooting foul, go through the FT loop
        if (self.df.iloc[self.end + 1]['EVENTMSGTYPE'] == 6) and (self.df.iloc[self.end]['PCTIME_SECONDS'] == self.df.iloc[self.end + 1]['PCTIME_SECONDS']):
            self.checkFoul()

        # If just a made shot, add the score and end the possession
        else:
            self.eventScore()
            self.complete = True
    
    
    def checkFoul(self):

        # Check if it's an offensive foul, i.e. a turnover
        if self.df.iloc[self.end]['EVENTMSGACTIONTYPE'] == 4:
            self.description = "Offensive foul turnover"
            self.complete = True
            self.end += 1 #Offensive foul turnovers are described in two events
            return

        # Check if FTs are taken
        self.end += 1
        while self.df.iloc[self.end]['PCTIME_SECONDS'] == self.df.iloc[self.end - 1]['PCTIME_SECONDS']:

            if self.df.iloc[self.end]['EVENTMSGACTIONTYPE'] in [10,12,15]:

                # Check if the last FT was missed and which team got the rebound
                if (self.df.iloc[self.end + 1]['EVENTMSGTYPE'] == 4):
                    self.checkRebound()

                # Otherwise it was a made FT
                else:
                    self.complete = True
                    self.description = "Made last FT"

                # Add the score from the made FTs (if any)
                self.eventScore()
                return

            else:
                self.end += 1

        # If no last FTs are found with the same time code, it was a non-shooting foul
        self.description = "Non-Shooting foul"
        self.end += 1
    
    
    def endPossession(self):
        if self.df.iloc[self.end]['EVENTMSGTYPE'] == 5:
            self.complete = True
            self.description = "Turnover"
        
        elif self.df.iloc[self.end]['EVENTMSGTYPE'] == 13:
            self.complete = True
            self.description = "Quarter end"
    
    
    def checkRebound(self):
        
        if self.df.iloc[self.end]['PLAYER1_TEAM_ID'] == self.df.iloc[self.end+1]['PLAYER1_TEAM_ID']:
            self.description = "Miss - OREB"
        else:
            self.description = "Miss - DREB"
            self.complete = True
    
    
    def eventScore(self):
    
        start_score = self.df.iloc[self.start]['SCORE'].split(" - ")
        end_score = self.df.iloc[self.end]['SCORE'].split(" - ")

        diff_score = [int(x) - int(y) for x, y in zip(end_score, start_score)]

        self.score += max(diff_score)
    

In [806]:
poss1 = Possession(209)

In [807]:
poss1.possession()

In [799]:
poss1.view()

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
209,22200552,306,4,0,2,8:06 PM,0:44,Mitchell REBOUND (Off:0 Def:1),,,...,,0,0,,,,,,1,44
210,22200552,307,1,80,2,8:06 PM,0:39,Mitchell 27' 3PT Step Back Jump Shot (15 PTS),,,...,,0,0,,,,,,1,39
211,22200552,308,6,2,2,8:06 PM,0:39,,,Caruso S.FOUL (P2.T4) (J.DeRosa),...,CLE,1,0,,,,,,1,39
212,22200552,310,3,10,2,8:06 PM,0:39,Mitchell Free Throw 1 of 1 (16 PTS),,,...,,0,0,,,,,,1,39


In [810]:
poss1.score

4

In [754]:
poss2 = Possession(213)

In [755]:
poss2.possession()

looping
made FG
should be done here


In [756]:
poss2.view()

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
213,22200552,311,1,1,2,8:07 PM,0:29,,,LaVine 28' 3PT Jump Shot (10 PTS) (DeRozan 2 AST),...,CHI,0,0,,,,,,1,29


In [757]:
poss3 = Possession(214)

In [758]:
poss3.possession()

looping
foul


In [759]:
poss3.view()

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER2_TEAM_ABBREVIATION,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS
214,22200552,313,6,4,2,8:07 PM,0:08,Mitchell OFF.Foul (P2) (T.Brothers),,,...,CHI,1,0,,,,,,1,8
215,22200552,315,5,37,2,8:07 PM,0:08,Mitchell Offensive Foul Turnover (P3.T8),,,...,,1,0,,,,,,1,8


### Option 2 - New df Column(s)

In [4]:
df['EVENTMSGTYPE_1'] = df['EVENTMSGTYPE'].shift(-1)
df['EVENTMSGACTIONTYPE_1'] = df['EVENTMSGACTIONTYPE'].shift(-1)
df['PCTIME_SECONDS_1'] = df['PCTIME_SECONDS'].shift(-1)
df['PLAYER1_TEAM_ID_1'] = df['PLAYER1_TEAM_ID'].shift(-1)
df['SCORE_1'] = df['SCORE'].shift(1)
df.at[0,'SCORE_1'] = "0 - 0"

In [5]:
df.head(5)

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS,EVENTMSGTYPE_1,EVENTMSGACTIONTYPE_1,PCTIME_SECONDS_1,PLAYER1_TEAM_ID_1,SCORE_1
0,22200552,2,12,0,1,7:11 PM,12:00,,Start of 1st Period (7:11 PM EST),,...,,,,0,720,10.0,0.0,720.0,1610613000.0,0 - 0
1,22200552,4,10,0,1,7:11 PM,12:00,Jump Ball Allen vs. Vucevic: Tip to Dosunmu,,,...,Chicago,Bulls,CHI,1,720,1.0,1.0,701.0,1610613000.0,0 - 0
2,22200552,7,1,1,1,7:12 PM,11:41,,,DeRozan 24' 3PT Jump Shot (3 PTS),...,,,,1,701,2.0,101.0,677.0,1610613000.0,0 - 0
3,22200552,8,2,101,1,7:12 PM,11:17,MISS Mitchell 6' Driving Floating Jump Shot,,,...,,,,1,677,4.0,0.0,675.0,1610613000.0,3 - 0
4,22200552,9,4,0,1,7:12 PM,11:15,,,Vucevic REBOUND (Off:0 Def:1),...,,,,1,675,2.0,41.0,670.0,1610613000.0,3 - 0


In [6]:
# df['new_column_name'] = df.apply(lambda x: my_function(x['value_1'], x['value_2']), axis=1)

def possEndFG(loc, df):
    # Check if it's a shooting foul: the next play is a foul with the same time code
    if (df.iloc[loc]['EVENTMSGTYPE_1'] == 6) and (df.iloc[loc]['PCTIME_SECONDS'] == df.iloc[loc]['PCTIME_SECONDS_1']):
        return False
    else:
        return True
    
def possEndRebound(loc, df):
    # Check for offensive rebound: the next play (i.e. the rebound after a miss) is by the same team
    if df.iloc[loc]['PLAYER1_TEAM_ID'] == df.iloc[loc]['PLAYER1_TEAM_ID_1']:
        return False
    else:
        return True

def possEndFT(loc, df):
    # Check for offensive rebound 
    if df.iloc[loc]['EVENTMSGACTIONTYPE'] in [10,12,15]:
        if (df.iloc[loc]['EVENTMSGTYPE_1'] == 4):
            return possEndRebound(loc, df)
        else:
            return True
    else:
        return False

possOutcomesDict = {
    '1': possEndFG,
    '2': possEndRebound,
    '3': possEndFT
}

def possEndCheck(loc, df):
    
    if df.iloc[loc]['EVENTMSGTYPE'] in [1,2,3]:
        #print(f"running {possOutcomesDict[str(df.iloc[loc]['EVENTMSGTYPE'])]}")
        return possOutcomesDict[str(df.iloc[loc]['EVENTMSGTYPE'])](loc, df)
    elif df.iloc[loc]['EVENTMSGTYPE'] in [5,13]:
        return True
    else:
        return False

In [7]:
possEndCheck(8, df)

False

In [1057]:
possEnd = []
for loc in range(len(df)):
    possEnd_row = possEndCheck(loc, df)
    possEnd.append(possEnd_row)

In [8]:
possEnd = [possEndCheck(loc, df) for loc in range(len(df))]

In [1058]:
df["POSSESSION_END"] = possEnd

In [923]:
teamInPoss = [df.loc[1]["PLAYER3_TEAM_ABBREVIATION"]]

In [924]:
teamInPoss

['CHI']

In [1059]:
currentTeam = [df.loc[1]["PLAYER3_TEAM_ABBREVIATION"]][0]
teamNames = list(filter(lambda x: x is not None, df["PLAYER1_TEAM_ABBREVIATION"].unique().tolist()))
switch = True

teamInPoss = []

for idx, loc in enumerate(range(len(df))):
    
    teamInPoss.append(currentTeam)

    if df.iloc[loc]["POSSESSION_END"]:
        currentTeam = teamNames[int(not switch)]
        switch = not switch

In [1060]:
df["POSSESSION_TEAM_ABBREVIATION"] = teamInPoss

In [941]:
df[(df["POSSESSION_END"]) & (df["POSSESSION_TEAM_ABBREVIATION"] == "CLE")]

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,PCTIME_SECONDS,EVENTMSGTYPE_1,EVENTMSGACTIONTYPE_1,PCTIME_SECONDS_1,PLAYER1_TEAM_ID_1,POSSESSION_END,POSSESSION_TEAM_ABBREVIATION
3,0022200552,8,2,101,1,7:12 PM,11:17,MISS Mitchell 6' Driving Floating Jump Shot,,,...,,,1,677,4.0,0.0,675.0,1.610613e+09,True,CLE
8,0022200552,14,3,11,1,7:13 PM,11:05,Mitchell Free Throw 1 of 2 (1 PTS),,,...,,,1,665,3.0,12.0,665.0,1.610613e+09,True,CLE
13,0022200552,20,1,98,1,7:14 PM,10:38,Allen 2' Cutting Layup Shot (2 PTS) (Love 1 AST),,,...,,,1,638,6.0,2.0,638.0,1.610613e+09,True,CLE
18,0022200552,27,5,1,1,7:15 PM,9:58,LeVert Bad Pass Turnover (P1.T1),,DeRozan STEAL (1 STL),...,,,1,598,1.0,41.0,594.0,1.610613e+09,True,CLE
21,0022200552,33,2,1,1,7:16 PM,9:33,MISS LeVert 26' 3PT Jump Shot,,,...,,,1,573,4.0,0.0,568.0,1.610613e+09,True,CLE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
524,0022200552,751,1,80,5,9:56 PM,1:49,Mitchell 27' 3PT Step Back Jump Shot (66 PTS) ...,,,...,,,1,109,2.0,1.0,100.0,1.610613e+09,True,CLE
528,0022200552,756,2,1,5,9:56 PM,1:12,MISS LeVert 24' 3PT Jump Shot,,,...,,,1,72,4.0,0.0,71.0,1.610613e+09,True,CLE
533,0022200552,764,1,79,5,9:58 PM,0:44,Mitchell 27' 3PT Pullup Jump Shot (69 PTS),,,...,,,1,44,6.0,2.0,41.0,1.610613e+09,True,CLE
541,0022200552,776,3,11,5,10:01 PM,0:31,Mitchell Free Throw 1 of 2 (70 PTS),,,...,,,1,31,3.0,12.0,31.0,1.610613e+09,True,CLE


In [1075]:
#Find offensive rebounds
missedShots = df.index[df['EVENTMSGTYPE'] == 2].tolist()

CLE_OREB = []
for shot in missedShots:
    if df.loc[shot]['PLAYER1_TEAM_ABBREVIATION'] == "CLE" and df.loc[shot+1]['PLAYER1_TEAM_ABBREVIATION'] == "CLE" and df.loc[shot+1]['EVENTMSGTYPE'] == 4:
        #print(f'offensive rebound - LOC: {shot}')
        CLE_OREB.append(shot+1)

In [1037]:
# df['new_column_name'] = df.apply(lambda x: my_function(x['value_1'], x['value_2']), axis=1)

def eventScore(before,after):

    start_score = str(before).split(" - ")
    end_score = str(after).split(" - ")

    diff_score = [int(x)-int(y) for x, y in zip(end_score, start_score)]

    return max(diff_score)

df['SCORE_CHANGE'] = df.apply(lambda row: eventScore(row['SCORE_1'],row['SCORE']), axis=1)

In [1012]:
eventScore(8, df)

1

In [1101]:
df[df['PERIOD'] == 4].tail(15)

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,EVENTMSGTYPE_1,EVENTMSGACTIONTYPE_1,PCTIME_SECONDS_1,PLAYER1_TEAM_ID_1,POSSESSION_END,POSSESSION_TEAM_ABBREVIATION,PLAYER1_TEAM_ID_0,SCORE_1,SCORE_CHANGE,SCORE_0
483,22200552,692,6,1,4,9:42 PM,0:04,,,Dosunmu P.FOUL (P5.PN) (T.Brothers),...,3.0,11.0,4.0,1610613000.0,False,CHI,1610613000.0,130 - 127,0,
484,22200552,694,3,11,4,9:42 PM,0:04,Mitchell Free Throw 1 of 2 (56 PTS),,,...,8.0,0.0,4.0,1610613000.0,False,CHI,1610613000.0,130 - 127,1,
485,22200552,695,8,0,4,9:42 PM,0:04,SUB: Lopez FOR Osman,,,...,8.0,0.0,4.0,1610613000.0,False,CHI,1610613000.0,130 - 128,0,
486,22200552,696,8,0,4,9:42 PM,0:04,,,SUB: Williams FOR Dosunmu,...,8.0,0.0,4.0,1610613000.0,False,CHI,1610613000.0,130 - 128,0,
487,22200552,697,8,0,4,9:42 PM,0:04,,,SUB: Drummond FOR White,...,3.0,12.0,4.0,1610613000.0,False,CHI,1610613000.0,130 - 128,0,
488,22200552,701,3,12,4,9:42 PM,0:04,MISS Mitchell Free Throw 2 of 2,,,...,4.0,0.0,3.0,1610613000.0,False,CHI,1610613000.0,130 - 128,0,
489,22200552,702,4,0,4,9:42 PM,0:03,Mitchell REBOUND (Off:3 Def:4),,,...,1.0,72.0,3.0,1610613000.0,False,CHI,1610613000.0,130 - 128,0,
490,22200552,703,1,72,4,9:44 PM,0:03,Mitchell 2' Putback Layup (58 PTS),,,...,9.0,1.0,3.0,,True,CHI,1610613000.0,130 - 128,2,
491,22200552,705,9,1,4,9:44 PM,0:03,,,Bulls Timeout: Regular (Reg.6 Short 0),...,8.0,0.0,3.0,1610613000.0,False,CLE,1610613000.0,130 - 130,0,
492,22200552,706,8,0,4,9:44 PM,0:03,SUB: Osman FOR Lopez,,,...,8.0,0.0,3.0,1610613000.0,False,CLE,,130 - 130,0,


In [1042]:
df.drop(['SCORE_0'],axis=1)

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PCTIME_SECONDS,EVENTMSGTYPE_1,EVENTMSGACTIONTYPE_1,PCTIME_SECONDS_1,PLAYER1_TEAM_ID_1,POSSESSION_END,POSSESSION_TEAM_ABBREVIATION,PLAYER1_TEAM_ID_0,SCORE_1,SCORE_CHANGE
0,0022200552,2,12,0,1,7:11 PM,12:00,,Start of 1st Period (7:11 PM EST),,...,720,10.0,0.0,720.0,1.610613e+09,False,CHI,,0 - 0,0
1,0022200552,4,10,0,1,7:11 PM,12:00,Jump Ball Allen vs. Vucevic: Tip to Dosunmu,,,...,720,1.0,1.0,701.0,1.610613e+09,False,CHI,,0 - 0,0
2,0022200552,7,1,1,1,7:12 PM,11:41,,,DeRozan 24' 3PT Jump Shot (3 PTS),...,701,2.0,101.0,677.0,1.610613e+09,True,CHI,1.610613e+09,0 - 0,3
3,0022200552,8,2,101,1,7:12 PM,11:17,MISS Mitchell 6' Driving Floating Jump Shot,,,...,677,4.0,0.0,675.0,1.610613e+09,True,CLE,1.610613e+09,3 - 0,0
4,0022200552,9,4,0,1,7:12 PM,11:15,,,Vucevic REBOUND (Off:0 Def:1),...,675,2.0,41.0,670.0,1.610613e+09,False,CHI,1.610613e+09,3 - 0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
541,0022200552,776,3,11,5,10:01 PM,0:31,Mitchell Free Throw 1 of 2 (70 PTS),,,...,31,3.0,12.0,31.0,1.610613e+09,False,CLE,1.610613e+09,134 - 143,1
542,0022200552,777,3,12,5,10:02 PM,0:31,Mitchell Free Throw 2 of 2 (71 PTS),,,...,31,2.0,79.0,25.0,1.610613e+09,,CLE,1.610613e+09,134 - 144,1
543,0022200552,778,2,79,5,10:02 PM,0:25,,,MISS LaVine 36' 3PT Pullup Jump Shot,...,25,4.0,0.0,22.0,1.610613e+09,True,CLE,1.610613e+09,134 - 145,0
544,0022200552,779,4,0,5,10:02 PM,0:22,Allen REBOUND (Off:6 Def:2),,,...,22,13.0,0.0,0.0,,False,CHI,1.610613e+09,134 - 145,0


In [1061]:
possessionChanges = df[df['POSSESSION_END'] == True].index.tolist()

In [1062]:
possessionChanges

[2,
 3,
 5,
 9,
 11,
 15,
 17,
 18,
 19,
 21,
 23,
 24,
 26,
 28,
 30,
 34,
 35,
 36,
 37,
 38,
 42,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 58,
 59,
 62,
 63,
 64,
 66,
 69,
 70,
 79,
 80,
 82,
 84,
 86,
 89,
 91,
 92,
 93,
 94,
 97,
 99,
 101,
 108,
 109,
 110,
 111,
 116,
 117,
 119,
 120,
 122,
 126,
 129,
 131,
 132,
 136,
 138,
 140,
 146,
 149,
 151,
 153,
 154,
 155,
 157,
 159,
 160,
 161,
 164,
 166,
 168,
 170,
 171,
 175,
 180,
 181,
 183,
 185,
 187,
 188,
 190,
 195,
 201,
 202,
 206,
 207,
 208,
 212,
 213,
 215,
 218,
 222,
 224,
 228,
 230,
 235,
 236,
 238,
 239,
 240,
 244,
 245,
 247,
 249,
 251,
 253,
 254,
 255,
 256,
 258,
 259,
 263,
 264,
 266,
 267,
 269,
 273,
 274,
 280,
 282,
 283,
 284,
 289,
 296,
 297,
 301,
 304,
 306,
 311,
 312,
 317,
 318,
 319,
 321,
 323,
 331,
 334,
 335,
 339,
 340,
 342,
 343,
 344,
 349,
 350,
 351,
 355,
 364,
 365,
 367,
 372,
 373,
 374,
 375,
 376,
 378,
 382,
 383,
 386,
 390,
 395,
 397,
 398,
 400,
 405,
 406,
 410,
 411,
 4

In [1080]:
possScore = []

for i in range(len(possessionChanges)-1):
    possScore.append(df.iloc[possessionChanges[i] + 1:possessionChanges[i+1] + 1]["SCORE_CHANGE"].sum())

In [1081]:
possessionChanges = np.array(possessionChanges)

In [1082]:
possessionChanges

array([  2,   3,   5,   9,  11,  15,  17,  18,  19,  21,  23,  24,  26,
        28,  30,  34,  35,  36,  37,  38,  42,  46,  47,  48,  49,  50,
        51,  52,  58,  59,  62,  63,  64,  66,  69,  70,  79,  80,  82,
        84,  86,  89,  91,  92,  93,  94,  97,  99, 101, 108, 109, 110,
       111, 116, 117, 119, 120, 122, 126, 129, 131, 132, 136, 138, 140,
       146, 149, 151, 153, 154, 155, 157, 159, 160, 161, 164, 166, 168,
       170, 171, 175, 180, 181, 183, 185, 187, 188, 190, 195, 201, 202,
       206, 207, 208, 212, 213, 215, 218, 222, 224, 228, 230, 235, 236,
       238, 239, 240, 244, 245, 247, 249, 251, 253, 254, 255, 256, 258,
       259, 263, 264, 266, 267, 269, 273, 274, 280, 282, 283, 284, 289,
       296, 297, 301, 304, 306, 311, 312, 317, 318, 319, 321, 323, 331,
       334, 335, 339, 340, 342, 343, 344, 349, 350, 351, 355, 364, 365,
       367, 372, 373, 374, 375, 376, 378, 382, 383, 386, 390, 395, 397,
       398, 400, 405, 406, 410, 411, 413, 417, 421, 426, 428, 43

In [1083]:
possessionChanges[possessionChanges > 72].min()

79

In [1077]:
CLE_OREB

[72, 104, 276, 303, 314, 389, 416, 423, 437, 471, 501, 516]

In [1086]:
CLE_OREB_bounds = [[reb, possessionChanges[possessionChanges > reb].min()+1] for reb in CLE_OREB]

In [1087]:
points = 0
for reb in CLE_OREB_bounds:
    points += df.iloc[reb[0]:reb[1]]["SCORE_CHANGE"].sum()

In [1089]:
points / len(CLE_OREB)

1.5833333333333333

## Testing `nba_api` Endpoints

In [None]:
# Query for the regular season games of the Cavs (team no. 3 alphabetically)
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.library.parameters import Season
from nba_api.stats.library.parameters import SeasonType

gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_ids[2], 
                            season_nullable=Season.default,
                            season_type_nullable=SeasonType.regular)  

games_dict = gamefinder.get_normalized_dict()
games = games_dict['LeagueGameFinderResults']
game_IDs = [game['GAME_ID'] for game in games]

In [1218]:
from nba_api.stats.endpoints import leagueleaders
from nba_api.stats.library.parameters import Season

leagueLead = leagueleaders.LeagueLeaders(league_id='00',
                                         per_mode48='PerGame',
                                         scope='S',
                                         season=Season.default,
                                         season_type_all_star='Regular Season',
                                         stat_category_abbreviation='PTS')

leaders = leagueLead.league_leaders.get_data_frame()

In [1217]:
leaders.iloc[:30].to_csv('league_leaders.csv',index=False,mode='a')

In [None]:
base_url = 'https://stats.nba.com/stats/{endpoint}'
endpoint = 'leagueleaders'
url = 'https://stats.nba.com/stats/leagueleaders?ActiveFlag=&LeagueID=00&PerMode=PerGame&Scope=S&Season=2022-23&SeasonType=Regular+Season&StatCategory=PTS'


In [1112]:
from nba_api.stats.endpoints import videoevents
from nba_api.stats.library.parameters import Season

game_id = "0022200552" # CHI @ CLE 02/01/23
game_event_id = "703" # D. Mitchell missed FT putback

gameLog = videoevents.VideoEvents(game_id=game_id,game_event_id=game_event_id)

In [1118]:
from nba_api.stats.endpoints import videoevents

game_id = "0022200552" # CHI @ CLE 02/01/23
game_event_id = "703" # D. Mitchell missed FT putback

vidEvent = videoevents.VideoEvents(game_id=game_id,game_event_id=game_event_id)

In [1130]:
vidEvent = vidEvent.get_normalized_dict()
vidEventDict = {
    'uuid': vidEvent['Meta']['videoUrls'][0]['uuid'],
    'game_id': vidEvent['playlist'][0]['gi'],
    'game_event_id': vidEvent['playlist'][0]['ei'],
    'year': vidEvent['playlist'][0]['y'],
    'month': vidEvent['playlist'][0]['m'],
    'day': vidEvent['playlist'][0]['d']
}

{'Meta': {'videoUrls': [{'uuid': '4549dfbf-fde2-4dcc-8065-afade5ada267',
    'dur': None,
    'stt': None,
    'stp': None,
    'sth': None,
    'stw': None,
    'mtt': None,
    'mtp': None,
    'mth': None,
    'mtw': None,
    'ltt': None,
    'ltp': None,
    'lth': None,
    'ltw': None}]},
 'playlist': [{'gi': '0022200552',
   'ei': 703,
   'y': 2023,
   'm': '01',
   'd': '02',
   'gc': '2023-01-02/CHICLE',
   'p': 4,
   'dsc': "Mitchell 2' Putback Layup (58 PTS)",
   'ha': 'CLE',
   'va': 'CHI',
   'hpb': 128,
   'hpa': 130,
   'vpb': 130,
   'vpa': 130,
   'pta': 0}]}

In [1127]:
vid_UUID = vidEvent.get_normalized_dict()['Meta']['videoUrls'][0]['uuid']

In [1126]:
#vidEndpoint = f'https://videos.nba.com/nba/pbp/media/{vid_year}/{vid_month}/{vid_day}/{game_id}/{game_event_id}/{vid_UUID}_{vid_resolution}.mp4'

vidEndpoint = 'https://videos.nba.com/nba/pbp/media/{}/{}/{}/{}/{}/{}_{}.mp4'



In [1128]:
vidEndpoint.format('2023','01','02','0022200552','703',vid_UUID,'1280x720')

'https://videos.nba.com/nba/pbp/media/2023/01/02/0022200552/703/4549dfbf-fde2-4dcc-8065-afade5ada267_1280x720.mp4'

In [9]:
from nba_api.stats.endpoints import leagueleaders
from nba_api.stats.library.parameters import Season


def get_data(cat, per_mode):

  leagueLead = leagueleaders.LeagueLeaders(league_id='00',
                                         per_mode48=per_mode,
                                         scope='S',
                                         season=Season.default,
                                         season_type_all_star='Regular Season',
                                         stat_category_abbreviation=cat)
  
  # Export just the top 30 to csv
  leaders = leagueLead.league_leaders.get_data_frame()
  csv_name = 'NBA_Leaders_' + cat + per_mode + '.csv'
  leaders.iloc[:30].to_csv(csv_name,index=False)


# Extract and export the info for each of the listed categories
categories = ['PTS', 'REB', 'AST', 'FG3_PCT']

for cat in categories:
  if cat == 'PTS':
    get_data(cat, 'PerGame')
    get_data(cat, 'Totals')
  else:
    per_mode = 'PerGame'
    get_data(cat, per_mode)