In [1]:
import pandas as pd
from nba_api.stats.endpoints import playbyplayv2
from nba_api.stats.library.eventmsgtype import EventMsgType
from nba_api.stats.library.playbyplayregex import re_block, re_field_goal_made, re_field_goal_missed

In [2]:
pbp = playbyplayv2.PlayByPlayV2(game_id="0021500295")



In [3]:
df: pd.DataFrame = pbp.get_data_frames()[0]

# Remove everything besides shots
df = df[df["EVENTMSGTYPE"].isin([EventMsgType.FIELD_GOAL_MADE.value, EventMsgType.FIELD_GOAL_MISSED.value])]

In [4]:
# Based on https://github.com/swar/nba_api/blob/master/docs/examples/PlayByPlay.ipynb
# Rewritten with vectorization to avoid slow loops

df["blocked"] = False
df["blocked_by"] = pd.NA
df.head()

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,blocked,blocked_by
4,21500295,4,1,1,1,8:43 PM,11:34,,,Lopez 11' Jump Shot (2 PTS) (Calderon 1 AST),...,0,0,,,,,,1,False,
5,21500295,5,1,1,1,8:43 PM,11:13,Antetokounmpo 15' Jump Shot (2 PTS),,,...,0,0,,,,,,1,False,
9,21500295,11,2,1,1,8:44 PM,10:49,MISS Monroe 6' Jump Shot,,,...,0,0,,,,,,1,False,
11,21500295,13,2,1,1,8:44 PM,10:41,,,MISS Anthony 24' 3PT Jump Shot,...,0,0,,,,,,1,False,
13,21500295,15,2,5,1,8:44 PM,10:34,MISS Antetokounmpo 2' Layup,,Lopez BLOCK (1 BLK),...,5,201577,Robin Lopez,1610613000.0,New York,Knicks,NYK,1,False,


In [5]:
# This will tell us which description to look inside
df.loc[(df["HOMEDESCRIPTION"].notnull()) & (df["HOMEDESCRIPTION"].str.match(re_block)), ["blocked", "blocked_by"]] = [True, "HOME"]
df.loc[(df["VISITORDESCRIPTION"].notnull()) & (df["VISITORDESCRIPTION"].str.match(re_block)), ["blocked", "blocked_by"]] = [True, "VISITOR"]

# Replace "HOME" and "VISITOR" with the player name (can we combine this with the above?)
df.loc[(df["blocked_by"] == "HOME"), ["blocked_by"]] = df["HOMEDESCRIPTION"].str.extract(re_block)["player"]
df.loc[(df["blocked_by"] == "VISITOR"), ["blocked_by"]] = df["VISITORDESCRIPTION"].str.extract(re_block)["player"]
df

Unnamed: 0,GAME_ID,EVENTNUM,EVENTMSGTYPE,EVENTMSGACTIONTYPE,PERIOD,WCTIMESTRING,PCTIMESTRING,HOMEDESCRIPTION,NEUTRALDESCRIPTION,VISITORDESCRIPTION,...,PERSON3TYPE,PLAYER3_ID,PLAYER3_NAME,PLAYER3_TEAM_ID,PLAYER3_TEAM_CITY,PLAYER3_TEAM_NICKNAME,PLAYER3_TEAM_ABBREVIATION,VIDEO_AVAILABLE_FLAG,blocked,blocked_by
4,0021500295,4,1,1,1,8:43 PM,11:34,,,Lopez 11' Jump Shot (2 PTS) (Calderon 1 AST),...,0,0,,,,,,1,False,
5,0021500295,5,1,1,1,8:43 PM,11:13,Antetokounmpo 15' Jump Shot (2 PTS),,,...,0,0,,,,,,1,False,
9,0021500295,11,2,1,1,8:44 PM,10:49,MISS Monroe 6' Jump Shot,,,...,0,0,,,,,,1,False,
11,0021500295,13,2,1,1,8:44 PM,10:41,,,MISS Anthony 24' 3PT Jump Shot,...,0,0,,,,,,1,False,
13,0021500295,15,2,5,1,8:44 PM,10:34,MISS Antetokounmpo 2' Layup,,Lopez BLOCK (1 BLK),...,5,201577,Robin Lopez,1.610613e+09,New York,Knicks,NYK,1,True,Lopez
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
442,0021500295,516,2,1,4,10:50 PM,1:37,,,MISS Grant 26' 3PT Jump Shot,...,0,0,,,,,,1,False,
444,0021500295,518,1,42,4,10:50 PM,1:15,Antetokounmpo 1' Driving Layup (16 PTS) (Middl...,,,...,0,0,,,,,,1,False,
445,0021500295,519,2,1,4,10:50 PM,1:07,Antetokounmpo BLOCK (2 BLK),,MISS Grant 3' Jump Shot,...,4,203507,Giannis Antetokounmpo,1.610613e+09,Milwaukee,Bucks,MIL,1,True,Antetokounmpo
455,0021500295,530,2,5,4,10:52 PM,0:39,Monroe BLOCK (1 BLK),,MISS Grant 3' Layup,...,4,202328,Greg Monroe,1.610613e+09,Milwaukee,Bucks,MIL,1,True,Monroe


In [6]:
# Get shot type using regex on description
# Replace spaces with underscores and capitalize shot type
# We can't do (nonnull()) & (EVENTMSGTYPE == 1) because the other description may be a block
df["shot_type"] = pd.NA
df.loc[(df["HOMEDESCRIPTION"].notnull()) & (df["HOMEDESCRIPTION"].str.match(re_field_goal_made)), ["shot_type"]] = df["HOMEDESCRIPTION"].str.extract(re_field_goal_made)["field_goal_type"].str.rstrip().str.replace(' ', '_').str.upper()
df.loc[(df["HOMEDESCRIPTION"].notnull()) & (df["HOMEDESCRIPTION"].str.match(re_field_goal_missed)), ["shot_type"]] = df["HOMEDESCRIPTION"].str.extract(re_field_goal_missed)["field_goal_type"].str.rstrip().str.replace(' ', '_').str.upper()
df.loc[(df["VISITORDESCRIPTION"].notnull()) & (df["VISITORDESCRIPTION"].str.match(re_field_goal_made)), ["shot_type"]] = df["VISITORDESCRIPTION"].str.extract(re_field_goal_made)["field_goal_type"].str.rstrip().str.replace(' ', '_').str.upper()
df.loc[(df["VISITORDESCRIPTION"].notnull()) & (df["VISITORDESCRIPTION"].str.match(re_field_goal_missed)), ["shot_type"]] = df["VISITORDESCRIPTION"].str.extract(re_field_goal_missed)["field_goal_type"].str.rstrip().str.replace(' ', '_').str.upper()
df.head()
df["shot_type"].unique()

array(['JUMP_SHOT', '3PT_JUMP_SHOT', 'LAYUP', 'RUNNING_LAYUP',
       'REVERSE_LAYUP', 'TIP_LAYUP_SHOT', 'TURNAROUND_JUMP_SHOT',
       'HOOK_SHOT', 'FLOATING_JUMP_SHOT', 'DUNK', 'FINGER_ROLL_LAYUP',
       'DRIVING_LAYUP', 'STEP_BACK_JUMP_SHOT', 'CUTTING_LAYUP_SHOT',
       'RUNNING_JUMP_SHOT', 'RUNNING_DUNK', 'PUTBACK_LAYUP',
       'PULLUP_JUMP_SHOT', 'RUNNING_REVERSE_LAYUP',
       'RUNNING_FINGER_ROLL_LAYUP', 'REVERSE_DUNK', 'FADEAWAY_JUMPER',
       'DRIVING_REVERSE_LAYUP'], dtype=object)