# Code to Compress and Save PBP Data for faster loading
## Uses `dill` for pickling and `zstandard` for compression

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import dill
import zstandard as zstd
from time import perf_counter

from pbpstats.client import Client

fig_DIR = "../figs/pbp_related/"
export_DIR = "../../data/pbpdata/"

pbp_DIR = "../../pbpdata/"
# pbp_DIR = "../../data/pbpdata/"

In [None]:
# pbp function to get all games list for a season
def pbp_season(
    league="NBA",
    season_yr="2023",
    season_type="Regular Season",
    data_provider="data_nba",
):
    settings = {
        "Games": {"source": "file", "data_provider": data_provider},
        "dir": pbp_DIR + data_provider,
    }
    client = Client(settings)
    season = client.Season(league, season_yr, season_type)
    games_id = []
    for final_game in season.games.final_games:
        games_id.append(final_game["game_id"])
    print("Number of games: ", len(games_id))
    return games_id


# function to get all games pbp data for a season
def pbp_games(games_id, data_provider="data_nba"):
    settings = {
        "Boxscore": {"source": "file", "data_provider": data_provider},
        "Possessions": {"source": "file", "data_provider": data_provider},
        "dir": pbp_DIR + data_provider,
    }
    client = Client(settings)
    games_list = []
    bad_games_list = []
    for gameid in tqdm(games_id):
        try:
            games_list.append(client.Game(gameid))
        except:
            bad_games_list.append(gameid)
            continue
    print("Number of bad games: ", len(bad_games_list))

    return games_list

In [None]:
league = "NBA"
season_start = 2021
season_end = 2022
seasons = np.arange(season_start,season_end,1).astype(str)
season_type = "Regular Season"
for season in seasons:
    print(season)
    if int(season) > 2015:
        data_provider = "data_nba"
    else:
        data_provider = "stats_nba"
    games_id = pbp_season(
        league=league,
        season_yr=season,
        season_type=season_type,
        data_provider=data_provider,
    )
    games_list = pbp_games(games_id, data_provider=data_provider)
    t1 = perf_counter()
    with zstd.open(export_DIR + league + "_PBPdata_" + season + ".pkl.zst","wb") as f:
        dill.dump(games_list,f)
    t2 = perf_counter()
    round(t2-t1)

In [None]:
league = "WNBA"
season_start = 2015
season_end = 2023
seasons = np.arange(season_start,season_end,1).astype(str)
season_type = "Regular Season"
for season in seasons:
    print(season)

    data_provider = "stats_nba"

    games_id = pbp_season(
        league=league,
        season_yr=season,
        season_type=season_type,
        data_provider=data_provider,
    )
    games_list = pbp_games(games_id, data_provider=data_provider)
    with zstd.open(export_DIR + league + "_PBPdata_" + season + ".pkl.zst","wb") as f:
        dill.dump(games_list,f)