# Import all possessions of a season using PBP API

In [44]:
import dill
import numpy as np
import pandas as pd
import zstandard as zstd
from tqdm import tqdm

export_DIR = "../../data/rapm/"
pbp_DIR = "../../data/pbpdata/"
box_DIR = "../../data/box/"

In [4]:
class Poss:
    def __init__(self,gi=0,ot=0,dt=0,o1=0,o2=0,o3=0,o4=0,o5=0,d1=0,d2=0,d3=0,d4=0,d5=0,pts=0,home=0,away=0,length=0,secs=0,tidh=0,tida=0,des=0,evtyp=0):
        self.gid  = gi
        self.offt = ot
        self.deft = dt
        self.off1 = o1
        self.off2 = o2
        self.off3 = o3
        self.off4 = o4
        self.off5 = o5
        self.def1 = d1
        self.def2 = d2
        self.def3 = d3
        self.def4 = d4
        self.def5 = d5
        self.pts = pts

        self.home = home
        self.away = away
        self.margin = home - away
        self.length = length
        self.secs = secs
        self.tidh = tidh
        self.tida = tida
        self.description = des
        self.event_type = evtyp


ovars = ["off1","off2","off3","off4","off5"]
dvars = ["def1","def2","def3","def4","def5"]

In [None]:
def get_rapm_possessions(games_list):
    poss_list, points, poss_store = [], [], []
    for game in tqdm(games_list):
        hmscr, rdscr = [], []
        tems = list(game.possessions.items[0].events[0].current_players.keys())
        tidh = game.boxscore.team_home 
        for possession in game.possessions.items:
            for possession_event in possession.events:
                if possession_event.count_as_possession: # and possession_event.event_type in (1, 3, 4, 5):
                    if possession_event.description == "Game End":
                        continue
                    off_id = possession_event.get_offense_team_id()
                    if not bool(off_id):
                        continue
                    p = Poss()
                    setattr(p,"gid",possession_event.game_id)
                    for te in tems:
                        if te == off_id:
                            for i,var in enumerate(ovars):
                                setattr(p,var,possession_event.current_players[te][i])
                                setattr(p,"offt",te)
                        else:
                            for i,var in enumerate(dvars):
                                setattr(p,var,possession_event.current_players[te][i])             
                                setattr(p,"deft",te)
                    for te in tems:
                        if te == tidh:
                            p.home = possession_event.score[te]
                            hmscr.append(p.home) #possession_event.home_score
                        else:
                            p.away = possession_event.score[te]
                            rdscr.append(p.away) #possession_event.away_score
                    p.margin = p.home - p.away
                    p.length = possession_event.seconds_since_previous_event
                    if possession_event.period <=4:
                        secs_rem = possession_event.seconds_remaining + 12*60*(4-possession_event.period)
                    else:
                        secs_rem = possession_event.seconds_remaining
                    p.secs = secs_rem - p.length
                    p.tidh = tems[0]
                    p.tida = tems[1]
                    p.description = possession_event.description
                    p.event_type = possession_event.event_type
                    poss_list.append(p)
        hm_pts = []
        rd_pts = []
        for i in range(len(hmscr) - 1):
            hm_pts.append(hmscr[i + 1] - hmscr[i])
            rd_pts.append(rdscr[i + 1] - rdscr[i])
        pts = []
        pts.append(max(hmscr[0], rdscr[0]))
        for i in range(len(hmscr) - 1):
            pts.append(max(hm_pts[i], rd_pts[i]))
        points.extend(pts)
    data = pd.DataFrame([vars(p) for p in poss_list])
    data["pts"] = points
    return data,poss_store

In [54]:
league = "NBA"
season_start = 2017
season_end = 2024
seasons = np.arange(season_start, season_end, 1).astype(str)
for season in seasons:
    print(season)
    with zstd.open(pbp_DIR + league + "_PBPdata_" + season + ".pkl.zst","rb") as f:
        games_list = dill.load(f)
    box = pd.read_parquet(box_DIR + league + "_Box_T_Base_" + season + ".parquet")
    box = box[["GAME_ID","TEAM_ID"]].rename(columns={"GAME_ID":"game_id","TEAM_ID":"tidh"})
    for game in games_list:
        id = game.game_id
        game.boxscore.team_home = box.query(f"game_id == '{id}'").iloc[0,1]
    data,poss_store = get_rapm_possessions(games_list)
    # data.iloc[:,:-2].to_csv(export_DIR+league +"_rapm_possessions_"+season+".csv",index=False)
    data.iloc[:,:-2].to_parquet(export_DIR+league +"_rapm_possessions_"+season+".parquet")

2018


100%|██████████| 1230/1230 [00:56<00:00, 21.81it/s]


2019


100%|██████████| 1059/1059 [00:59<00:00, 17.76it/s]


2020


100%|██████████| 1080/1080 [00:41<00:00, 25.88it/s]


2021


100%|██████████| 1228/1228 [01:00<00:00, 20.20it/s]


2022


100%|██████████| 1230/1230 [01:16<00:00, 16.11it/s]


2023


100%|██████████| 1230/1230 [01:25<00:00, 14.34it/s]
