In [None]:
import os, sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath("__file__"))))
from nbafuns import *
from pbpstats.resources.enhanced_pbp import FieldGoal

data_DIR = "../Shot_Charts/ShotLocationData/"

In [None]:
# vars1 = [attr for attr in dir(possession_event) if not callable(getattr(possession_event,attr)) and not attr.startswith("_")]
shot_variables = [
    "game_id",
    "clock",
    "player1_id",
    "team_id",
    "distance",
    "locX",
    "locY",
    "shot_value",
    "shot_type",
    "is_and1",
    "is_assisted",
    "is_blocked",
    "is_corner_3",
    "is_heave",
    "is_made",
    "is_putback",
    "player2_id",
    "period",
    "score_margin",
    "seconds_remaining",
    "seconds_since_previous_event",
]

In [None]:
def set_dtypes(df):
    for col in df.columns:
        if "is_" in col:
            df[col] = df[col].astype(bool)
        elif "_id" in col:
            df[col] = df[col].astype(int)
    if df["clock"].dtype == 'O':
        mask = ~df["clock"].str.contains('\.')
        df.loc[mask,"clock"] = df.loc[mask,"clock"].apply(lambda x: x + ".0")
        df["clock"] = pd.to_datetime(df["clock"],format="%M:%S.%f").dt.time
    return df

In [None]:
def get_loc_data(games_list, player_dict):
    pos_store = []
    for game in tqdm(games_list):
        for possession in game.possessions.items:
            for possession_event in possession.events:
                if isinstance(possession_event, FieldGoal):
                    poss = {}
                    for var in shot_variables:
                        try:
                            poss[var] = getattr(possession_event, var)
                        except:
                            poss[var] = 0
                    pos_store.append(poss)
    df = pd.DataFrame(pos_store)
    df = df.rename(columns={"player1_id": "player_id", "player2_id": "player_ast_id"})
    df["player_name"] = df["player_id"].map(player_dict)
    df["player_ast_name"] = df["player_ast_id"].map(player_dict)
    return df

In [None]:
league = "NBA"
season_type = "Regular Season"
seasons = np.arange(2023, 2024, 1).astype(str)
for season in seasons:
    print(season)
    if int(season) > 2021:
        data_provider = "data_nba"
    else:
        data_provider = "stats_nba"
    games_id = pbp_season(
        league=league,
        season_yr=season,
        season_type=season_type,
        data_provider=data_provider,
    )
    games_list = pbp_games(games_id, data_provider=data_provider)
    player_dict = get_players_pbp(league=league)
    data = get_loc_data(games_list, player_dict)
    data = set_dtypes(data)
    # data.to_csv(data_DIR + f"{league}_Shot_Loc_" + season + ".csv", index=False)
    # data.to_pickle(data_DIR + f"{league}_Shot_Loc_" + season + ".pkl.zst")
    data.to_parquet(data_DIR + f"{league}_Shot_Loc_" + season + ".parquet")