In [1]:
import pandas as pd
from nba_api.stats.endpoints import playbyplayv2
from nba_api.stats.library.eventmsgtype import EventMsgType
from nba_api.stats.library.playbyplayregex import re_block, re_field_goal_made, re_field_goal_missed

In [2]:
pbp = playbyplayv2.PlayByPlayV2(game_id="0021500295")



In [3]:
df: pd.DataFrame = pbp.get_data_frames()[0]

# Remove everything besides shots
df = df[df["EVENTMSGTYPE"].isin([EventMsgType.FIELD_GOAL_MADE.value, EventMsgType.FIELD_GOAL_MISSED.value])]

In [4]:
# Based on https://github.com/swar/nba_api/blob/master/docs/examples/PlayByPlay.ipynb
# Rewritten with vectorization to avoid slow loops

df["blocked"] = False
df["blocked_by"] = pd.NA
df.head()

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,blocked,blocked_by
2,21500009,2,1,66,1,7:45 PM,11:43,Valanciunas 13' Jump Bank Shot (2 PTS) (DeRoza...,,,...,0,0,,,,,,1,False,
3,21500009,3,2,78,1,7:45 PM,11:28,,,MISS George 12' Floating Jump Shot,...,0,0,,,,,,1,False,
5,21500009,5,2,41,1,7:45 PM,11:22,MISS Lowry 2' Running Layup,,G. Hill BLOCK (1 BLK),...,5,201588,George Hill,1610613000.0,Indiana,Pacers,IND,1,False,
7,21500009,7,1,1,1,7:45 PM,11:10,,,Ellis 18' Jump Shot (2 PTS) (Mahinmi 1 AST),...,0,0,,,,,,1,False,
8,21500009,8,1,5,1,7:46 PM,10:45,Valanciunas 3' Layup (4 PTS) (Carroll 1 AST),,,...,0,0,,,,,,1,False,


In [5]:
# This will tell us which description to look inside
df.loc[(df["HOMEDESCRIPTION"].notnull()) & (df["HOMEDESCRIPTION"].str.match(re_block)), ["blocked", "blocked_by"]] = [True, "HOME"]
df.loc[(df["VISITORDESCRIPTION"].notnull()) & (df["VISITORDESCRIPTION"].str.match(re_block)), ["blocked", "blocked_by"]] = [True, "VISITOR"]

# Replace "HOME" and "VISITOR" with the player name (can we combine this with the above?)
df.loc[(df["blocked_by"] == "HOME"), ["blocked_by"]] = df["HOMEDESCRIPTION"].str.extract(re_block)["player"]
df.loc[(df["blocked_by"] == "VISITOR"), ["blocked_by"]] = df["VISITORDESCRIPTION"].str.extract(re_block)["player"]
df

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,blocked,blocked_by
2,0021500009,2,1,66,1,7:45 PM,11:43,Valanciunas 13' Jump Bank Shot (2 PTS) (DeRoza...,,,...,0,0,,,,,,1,False,
3,0021500009,3,2,78,1,7:45 PM,11:28,,,MISS George 12' Floating Jump Shot,...,0,0,,,,,,1,False,
5,0021500009,5,2,41,1,7:45 PM,11:22,MISS Lowry 2' Running Layup,,G. Hill BLOCK (1 BLK),...,5,201588,George Hill,1.610613e+09,Indiana,Pacers,IND,1,True,G. Hill
7,0021500009,7,1,1,1,7:45 PM,11:10,,,Ellis 18' Jump Shot (2 PTS) (Mahinmi 1 AST),...,0,0,,,,,,1,False,
8,0021500009,8,1,5,1,7:46 PM,10:45,Valanciunas 3' Layup (4 PTS) (Carroll 1 AST),,,...,0,0,,,,,,1,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
479,0021500009,577,1,79,4,10:08 PM,0:50,DeRozan 16' Pullup Jump Shot (23 PTS),,,...,0,0,,,,,,1,False,
480,0021500009,578,2,101,4,10:08 PM,0:39,,,MISS Ellis 7' Driving Floating Jump Shot,...,0,0,,,,,,1,False,
486,0021500009,588,2,1,4,10:11 PM,0:28,,,MISS Miles 21' Jump Shot,...,0,0,,,,,,1,False,
493,0021500009,598,2,1,4,10:13 PM,0:20,,,MISS George 25' 3PT Jump Shot,...,0,0,,,,,,1,False,


In [6]:
# Get shot type using regex on description
# Replace spaces with underscores and capitalize shot type
# We can't do (nonnull()) & (EVENTMSGTYPE == 1) because the other description may be a block
df["shot_type"] = pd.NA
df.loc[(df["HOMEDESCRIPTION"].notnull()) & (df["HOMEDESCRIPTION"].str.match(re_field_goal_made)), ["shot_type"]] = df["HOMEDESCRIPTION"].str.extract(re_field_goal_made)["field_goal_type"].str.rstrip().str.replace(' ', '_').str.upper()
df.loc[(df["HOMEDESCRIPTION"].notnull()) & (df["HOMEDESCRIPTION"].str.match(re_field_goal_missed)), ["shot_type"]] = df["HOMEDESCRIPTION"].str.extract(re_field_goal_missed)["field_goal_type"].str.rstrip().str.replace(' ', '_').str.upper()
df.loc[(df["VISITORDESCRIPTION"].notnull()) & (df["VISITORDESCRIPTION"].str.match(re_field_goal_made)), ["shot_type"]] = df["VISITORDESCRIPTION"].str.extract(re_field_goal_made)["field_goal_type"].str.rstrip().str.replace(' ', '_').str.upper()
df.loc[(df["VISITORDESCRIPTION"].notnull()) & (df["VISITORDESCRIPTION"].str.match(re_field_goal_missed)), ["shot_type"]] = df["VISITORDESCRIPTION"].str.extract(re_field_goal_missed)["field_goal_type"].str.rstrip().str.replace(' ', '_').str.upper()
df.head()
df["shot_type"].unique()

array(['JUMP_BANK_SHOT', 'FLOATING_JUMP_SHOT', 'RUNNING_LAYUP',
       'JUMP_SHOT', 'LAYUP', 'PULLUP_JUMP_SHOT',
       'STEP_BACK_BANK_JUMP_SHOT', '3PT_JUMP_SHOT',
       'TURNAROUND_JUMP_SHOT', 'TIP_LAYUP_SHOT', 'DUNK',
       '3PT_PULLUP_JUMP_SHOT', 'RUNNING_DUNK',
       'DRIVING_FINGER_ROLL_LAYUP', 'PUTBACK_LAYUP',
       'STEP_BACK_JUMP_SHOT', 'HOOK_SHOT', '3PT_STEP_BACK_JUMP_SHOT',
       '3PT_RUNNING_JUMP_SHOT', 'CUTTING_LAYUP_SHOT', 'DRIVING_LAYUP',
       'FADEAWAY_JUMPER', 'TURNAROUND_HOOK_SHOT',
       'DRIVING_FLOATING_JUMP_SHOT', '3PT_PULLUP_BANK_SHOT',
       'DRIVING_FLOATING_BANK_JUMP_SHOT', 'HOOK_BANK_SHOT',
       'DRIVING_REVERSE_LAYUP', 'RUNNING_JUMP_SHOT',
       'CUTTING_FINGER_ROLL_LAYUP_SHOT', 'RUNNING_FINGER_ROLL_LAYUP',
       'CUTTING_DUNK_SHOT', 'TURNAROUND_FADEAWAY_SHOT'], dtype=object)