# Actions Data Exploration
In this notebook we'll explore the different `actionType`s that compose the data obtained from the live `PlayByPlay` endpoint.

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
SEASON_ID = '22024'
GAME_ID = '0022400630'

In [3]:
data_path = Path("~/MBAI/data").expanduser()
season_path = data_path / f"rs{SEASON_ID}"
game_path = season_path / f"g{GAME_ID}"
pbp_path = game_path / "playbyplay"

In [4]:
df = pd.read_parquet(pbp_path / "actions.parquet")

In [5]:
df

  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()
  has_large_values = (abs_vals > 1e6).any()


Unnamed: 0,clock,timeActual,period,actionType,subType,personId,x,y,possession,scoreHome,...,jumpBallRecoveredPersonId,jumpBallWonPersonId,jumpBallLostPersonId,shotDistance,shotResult,blockPersonId,assistPersonId,officialId,foulDrawnPersonId,stealPersonId
0,0 days 00:12:00,2025-01-25 00:10:48.500000+00:00,1,period,start,,,,,0,...,,,,,,,,,,
1,0 days 00:11:57,2025-01-25 00:10:50.500000+00:00,1,jumpball,recovered,203995,,,1610612766,0,...,203995,1631109,1642270,,,,,,,
2,0 days 00:11:37,2025-01-25 00:11:10.200000+00:00,1,2pt,DUNK,1631109,8.882812,51.96875,1610612766,0,...,,,,3.250000,False,1642270,,,,
3,0 days 00:11:37,2025-01-25 00:11:10.200000+00:00,1,block,,1642270,,,1610612766,0,...,,,,,,,,,,
4,0 days 00:11:34,2025-01-25 00:11:13.200000+00:00,1,rebound,defensive,1642270,,,1610612757,0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0 days 00:00:10.700000,2025-01-25 02:30:23.300000+00:00,4,2pt,DUNK,1631109,94.062500,51.71875,1610612766,95,...,,,,0.930176,True,,1630182,,,
607,0 days 00:00:03.300000,2025-01-25 02:30:35.800000+00:00,4,2pt,DUNK,1641739,7.441406,52.68750,1610612757,95,...,,,,2.199219,True,,1629014,,,
608,0 days 00:00:00,2025-01-25 02:30:44.700000+00:00,4,2pt,Layup,1631109,94.437500,47.06250,1610612766,97,...,,,,1.469727,True,,1641733,,,
609,0 days 00:00:00,2025-01-25 02:30:53.700000+00:00,4,period,end,,,,1610612757,97,...,,,,,,,,,,


In [7]:
df['actionType'].unique()

<StringArray>
[      'period',     'jumpball',          '2pt',        'block',
      'rebound',          '3pt',         'foul', 'substitution',
      'timeout',     'turnover',        'steal',    'freethrow',
    'violation',         'game']
Length: 14, dtype: string

In [8]:
action_subtypes = {
    action: action_df['subType'].unique().tolist()
        for action, action_df in df.groupby('actionType')
}

action_subtypes

{'2pt': ['DUNK', 'Jump Shot', 'Layup', 'Hook'],
 '3pt': ['Jump Shot'],
 'block': [''],
 'foul': ['personal', 'offensive', 'technical'],
 'freethrow': ['1 of 2', '2 of 2', '1 of 1'],
 'game': ['end'],
 'jumpball': ['recovered'],
 'period': ['start', 'end'],
 'rebound': ['defensive', 'offensive'],
 'steal': [''],
 'substitution': ['out', 'in'],
 'timeout': ['full'],
 'turnover': ['out-of-bounds',
  'traveling',
  'lost ball',
  'bad pass',
  'offensive foul',
  'shot clock'],
 'violation': ['kicked ball',
  'defensive goaltending',
  'lane',
  'delay-of-game']}

In [9]:
df_by_action = df.groupby('actionType')

In [10]:
period_df = df_by_action.get_group('period').dropna(how='all', axis=1)
period_df

Unnamed: 0,clock,timeActual,period,actionType,subType,possession,scoreHome,scoreAway
0,0 days 00:12:00,2025-01-25 00:10:48.500000+00:00,1,period,start,,0,0
122,0 days 00:00:00,2025-01-25 00:31:45.400000+00:00,1,period,end,1610612766.0,30,22
129,0 days 00:12:00,2025-01-25 00:35:10.900000+00:00,2,period,start,1610612757.0,30,22
272,0 days 00:00:00,2025-01-25 01:03:32.200000+00:00,2,period,end,1610612757.0,54,50
279,0 days 00:12:00,2025-01-25 01:18:44.200000+00:00,3,period,start,1610612757.0,54,50
436,0 days 00:00:00,2025-01-25 01:51:09+00:00,3,period,end,1610612757.0,75,73
447,0 days 00:12:00,2025-01-25 01:53:43.100000+00:00,4,period,start,1610612766.0,75,73
609,0 days 00:00:00,2025-01-25 02:30:53.700000+00:00,4,period,end,1610612757.0,97,102


In [11]:
game_df = df_by_action.get_group('game').dropna(how='all', axis=1)
game_df

Unnamed: 0,clock,timeActual,period,actionType,subType,scoreHome,scoreAway
610,0 days,2025-01-25 02:30:57.900000+00:00,4,game,end,97,102


In [12]:
jumpball_df = df_by_action.get_group('jumpball').dropna(how='all', axis=1)
jumpball_df

Unnamed: 0,clock,timeActual,period,actionType,subType,personId,possession,scoreHome,scoreAway,teamId,descriptor,jumpBallRecoveredPersonId,jumpBallWonPersonId,jumpBallLostPersonId
1,0 days 00:11:57,2025-01-25 00:10:50.500000+00:00,1,jumpball,recovered,203995,1610612766,0,0,1610612766,startperiod,203995,1631109,1642270


In [13]:
substitution_df = df_by_action.get_group('substitution').dropna(how='all', axis=1)
substitution_df

Unnamed: 0,clock,timeActual,period,actionType,subType,personId,possession,scoreHome,scoreAway,teamId
39,0 days 00:07:11,2025-01-25 00:15:59.700000+00:00,1,substitution,out,1628998,1610612757,10,12,1610612766
40,0 days 00:07:11,2025-01-25 00:15:59.700000+00:00,1,substitution,out,1642270,1610612757,10,12,1610612757
41,0 days 00:07:11,2025-01-25 00:15:59.700000+00:00,1,substitution,out,1629014,1610612757,10,12,1610612757
42,0 days 00:07:11,2025-01-25 00:15:59.700000+00:00,1,substitution,in,1629006,1610612757,10,12,1610612766
43,0 days 00:07:11,2025-01-25 00:15:59.700000+00:00,1,substitution,in,1630703,1610612757,10,12,1610612757
...,...,...,...,...,...,...,...,...,...,...
597,0 days 00:00:17.600000,2025-01-25 02:28:15.800000+00:00,4,substitution,in,1631217,1610612757,93,99,1610612766
600,0 days 00:00:17.600000,2025-01-25 02:29:25.600000+00:00,4,substitution,out,1629006,1610612766,93,100,1610612766
601,0 days 00:00:17.600000,2025-01-25 02:29:25.600000+00:00,4,substitution,out,1631217,1610612766,93,100,1610612766
602,0 days 00:00:17.600000,2025-01-25 02:29:25.600000+00:00,4,substitution,in,1642354,1610612766,93,100,1610612766


In [14]:
timeout_df = df_by_action.get_group('timeout').dropna(how='all', axis=1)
timeout_df

Unnamed: 0,clock,timeActual,period,actionType,subType,possession,scoreHome,scoreAway,teamId
51,0 days 00:06:59,2025-01-25 00:16:48+00:00,1,timeout,full,1610612766,10,12,1610612766
70,0 days 00:04:39,2025-01-25 00:22:19.900000+00:00,1,timeout,full,1610612757,24,12,1610612757
151,0 days 00:09:35,2025-01-25 00:38:06.700000+00:00,2,timeout,full,1610612757,37,24,1610612757
206,0 days 00:05:53,2025-01-25 00:48:02.600000+00:00,2,timeout,full,1610612766,42,36,1610612766
296,0 days 00:10:20,2025-01-25 01:22:19.700000+00:00,3,timeout,full,1610612757,54,58,1610612766
406,0 days 00:02:49,2025-01-25 01:42:57.200000+00:00,3,timeout,full,1610612766,70,66,1610612757
509,0 days 00:06:40,2025-01-25 02:03:24.300000+00:00,4,timeout,full,1610612766,81,88,1610612766
532,0 days 00:04:33,2025-01-25 02:09:43.700000+00:00,4,timeout,full,1610612757,83,90,1610612757
539,0 days 00:03:31,2025-01-25 02:14:32.700000+00:00,4,timeout,full,1610612757,86,92,1610612757
543,0 days 00:03:08,2025-01-25 02:16:44.700000+00:00,4,timeout,full,1610612766,86,92,1610612766


In [15]:
foul_df = df_by_action.get_group('foul').dropna(how='all', axis=1)
foul_df

Unnamed: 0,clock,timeActual,period,actionType,subType,personId,possession,scoreHome,scoreAway,teamId,descriptor,officialId,foulDrawnPersonId
38,0 days 00:07:11,2025-01-25 00:15:44.600000+00:00,1,foul,personal,1641733,1610612757,10,12,1610612766,,1626301,203924.0
50,0 days 00:06:59,2025-01-25 00:16:38+00:00,1,foul,personal,1631133,1610612766,10,12,1610612757,,1628487,1631109.0
63,0 days 00:05:12,2025-01-25 00:21:17.200000+00:00,1,foul,personal,1630166,1610612766,19,12,1610612757,,1628487,203995.0
91,0 days 00:02:41,2025-01-25 00:27:31.900000+00:00,1,foul,personal,1629006,1610612757,24,17,1610612766,,204059,1630703.0
107,0 days 00:01:16,2025-01-25 00:29:19.800000+00:00,1,foul,personal,1629610,1610612757,28,22,1610612766,,1628487,1630703.0
110,0 days 00:00:57.100000,2025-01-25 00:29:53.700000+00:00,1,foul,personal,1630625,1610612766,28,22,1610612757,shooting,204059,1631209.0
130,0 days 00:11:47,2025-01-25 00:35:12.400000+00:00,2,foul,personal,1631209,1610612757,30,22,1610612766,shooting,1628487,1629014.0
135,0 days 00:11:27,2025-01-25 00:35:42.900000+00:00,2,foul,offensive,1630166,1610612757,30,24,1610612757,charge,204059,1631209.0
158,0 days 00:09:18,2025-01-25 00:41:19.100000+00:00,2,foul,personal,203995,1610612757,37,24,1610612766,shooting,204059,1630166.0
177,0 days 00:07:37,2025-01-25 00:44:20.400000+00:00,2,foul,personal,1630182,1610612757,37,31,1610612766,shooting,204059,1630166.0


In [16]:
violation_df = df_by_action.get_group('violation').dropna(how='all', axis=1)
violation_df

Unnamed: 0,clock,timeActual,period,actionType,subType,personId,possession,scoreHome,scoreAway,teamId,officialId
142,0 days 00:10:40,2025-01-25 00:36:37.600000+00:00,2,violation,kicked ball,1630166.0,1610612766,32,24,1610612757,1626301.0
271,0 days 00:00:19.300000,2025-01-25 01:02:56.600000+00:00,2,violation,defensive goaltending,1641739.0,1610612766,54,50,1610612757,1626301.0
348,0 days 00:06:09,2025-01-25 01:33:30.900000+00:00,3,violation,lane,1631133.0,1610612766,62,63,1610612757,
579,0 days 00:00:30.500000,2025-01-25 02:24:29.400000+00:00,4,violation,delay-of-game,,1610612766,90,96,1610612757,204059.0


In [17]:
turnover_df = df_by_action.get_group('turnover').dropna(how='all', axis=1)
turnover_df

Unnamed: 0,clock,timeActual,period,actionType,subType,personId,possession,scoreHome,scoreAway,teamId,descriptor,officialId,stealPersonId
81,0 days 00:03:37,2025-01-25 00:26:11+00:00,1,turnover,out-of-bounds,1629006.0,1610612766,24,17,1610612766,bad pass,,
84,0 days 00:03:20,2025-01-25 00:26:42.600000+00:00,1,turnover,traveling,1630625.0,1610612757,24,17,1610612757,,204059.0,
89,0 days 00:02:47,2025-01-25 00:27:25.900000+00:00,1,turnover,lost ball,1631217.0,1610612766,24,17,1610612766,,,1631133.0
113,0 days 00:00:33.900000,2025-01-25 00:31:02.400000+00:00,1,turnover,bad pass,1631133.0,1610612757,30,22,1610612757,,,1631217.0
136,0 days 00:11:27,2025-01-25 00:35:42.900000+00:00,2,turnover,offensive foul,1630166.0,1610612757,30,24,1610612757,,204059.0,
143,0 days 00:10:27,2025-01-25 00:37:07.700000+00:00,2,turnover,bad pass,203995.0,1610612766,32,24,1610612766,,,1630166.0
145,0 days 00:10:12,2025-01-25 00:37:22+00:00,2,turnover,lost ball,1629014.0,1610612757,32,24,1610612757,,,1629006.0
162,0 days 00:08:54,2025-01-25 00:42:26.900000+00:00,2,turnover,traveling,203995.0,1610612766,37,25,1610612766,,204059.0,
166,0 days 00:08:17,2025-01-25 00:43:14.500000+00:00,2,turnover,bad pass,1629610.0,1610612766,37,28,1610612766,,,1629014.0
181,0 days 00:07:06,2025-01-25 00:45:35.900000+00:00,2,turnover,bad pass,1630166.0,1610612757,39,33,1610612757,,,1631109.0


In [18]:
steal_df = df_by_action.get_group('steal').dropna(how='all', axis=1)
steal_df

Unnamed: 0,clock,timeActual,period,actionType,subType,personId,possession,scoreHome,scoreAway,teamId
90,0 days 00:02:47,2025-01-25 00:27:25.900000+00:00,1,steal,,1631133,1610612757,24,17,1610612757
114,0 days 00:00:33.900000,2025-01-25 00:31:02.400000+00:00,1,steal,,1631217,1610612766,30,22,1610612766
144,0 days 00:10:27,2025-01-25 00:37:07.700000+00:00,2,steal,,1630166,1610612757,32,24,1610612757
146,0 days 00:10:12,2025-01-25 00:37:22+00:00,2,steal,,1629006,1610612766,32,24,1610612766
167,0 days 00:08:17,2025-01-25 00:43:14.500000+00:00,2,steal,,1629014,1610612757,37,28,1610612757
182,0 days 00:07:06,2025-01-25 00:45:35.900000+00:00,2,steal,,1631109,1610612766,39,33,1610612766
193,0 days 00:06:49,2025-01-25 00:46:42.500000+00:00,2,steal,,1631217,1610612766,42,33,1610612766
217,0 days 00:05:07,2025-01-25 00:51:52.200000+00:00,2,steal,,1630703,1610612757,42,36,1610612757
254,0 days 00:01:59,2025-01-25 00:58:33+00:00,2,steal,,1630166,1610612757,48,46,1610612757
288,0 days 00:10:37,2025-01-25 01:20:38.500000+00:00,3,steal,,1641739,1610612757,54,54,1610612757
