In [5]:
from collections.abc import Callable
import os
from pybaseball import statcast
import pandas as pd

In [4]:
pd.Timestamp('2025-02-17', tz='America/New_York').strftime("%Y-%m-%d")

'2025-02-17'

In [17]:
class FetchStatcast:
    """Fetch Statcast data.

    Reads from and writes to caches during fetches by default, preventing needless
    API calls and speeding up transactions.
    """
    def __init__(self, local_file_cache: str):
        self.local_file_cache = local_file_cache

    def statcast(
            self,
            start: str,
            end: str,
            preprocess: Callable[[pd.DataFrame], pd.DataFrame],
            read_from_cache=True,
            write_to_cache=True
        ) -> pd.DataFrame:
        """Fetch pitch level data from statcast.

        Data is read from local_file_cache.

        Parameters
        ----------
        start:
            YYYY-MM-DD formatted time string.
        end:
            YYYY-MM-DD formatted time string.
        preprocess:
            A callable for transforming slices of statcast data.

            The fetch process works on one-day chunks of statcast data. This function is called on each
            chunk before aggragating all the data.
        read_from_cache:
            Whether to read from local_file_cache if available.
        write_to_cache:
            Whether to write fetched data to cache.
        """
        dates = pd.date_range(start, end)
        data = []
        for _start, _end in zip(dates, dates[1:]):
            data.append(self._fetch_data(_start, _end, read_from_cache, write_to_cache))
        return pd.concat(data)

    def _fetch_data(self, start: pd.Timestamp, end: pd.Timestamp, use_cache: bool, write_to_cache: bool):
        """Fetch a single slice of data through a cache file."""
        filename = self._file_name(start, end)
        if os.path.exists(filename) and use_cache:
            df = pd.read_parquet(filename)
        else:
            start_str = start.strftime("%Y-%m-%d")
            end_str = end.strftime("%Y-%m-%d")
            df = statcast(start_str, end_str)
            if write_to_cache:
                df.to_parquet(filename)
        return df

    def _file_name(self, start: pd.Timestamp, end: pd.Timestamp):
        start_str = start.strftime("%Y-%m-%d")
        end_str = end.strftime("%Y-%m-%d")
        name = f"statcast_{start_str}_{end_str}"
        return os.path.join(self.local_file_cache, name)


In [18]:
fetch_statcast = FetchStatcast('statcast_data')
fetch_statcast._file_name(pd.Timestamp('2025-02-17', tz='America/New_York'), pd.Timestamp('2025-02-17', tz='America/New_York'))

'statcast_data\\statcast_2025-02-17_2025-02-17'

In [19]:
df = fetch_statcast.statcast("2024-06-24", "2024-06-27")

Unnamed: 0,pitch_type,game_date,release_speed,release_pos_x,release_pos_z,player_name,batter,pitcher,events,description,...,n_thruorder_pitcher,n_priorpa_thisgame_player_at_bat,pitcher_days_since_prev_game,batter_days_since_prev_game,pitcher_days_until_next_game,batter_days_until_next_game,api_break_z_with_gravity,api_break_x_arm,api_break_x_batter_in,arm_angle
2228,FF,2024-06-25,90.4,-3.7,4.3,"Sewald, Paul",680777,623149,field_out,hit_into_play,...,1,3,4,3,4,2,1.52,0.81,0.81,11.7
2339,FF,2024-06-25,90.0,-3.72,4.28,"Sewald, Paul",680777,623149,,ball,...,1,3,4,3,4,2,1.78,0.89,0.89,14.5
2407,FF,2024-06-25,91.1,-3.61,4.39,"Sewald, Paul",680777,623149,,ball,...,1,3,4,3,4,2,1.55,0.97,0.97,16.1
2464,ST,2024-06-25,83.4,-3.57,4.39,"Sewald, Paul",621439,623149,double,hit_into_play,...,1,3,4,2,4,1,2.46,-1.05,-1.05,18.9
2550,FF,2024-06-25,91.8,-3.6,4.29,"Sewald, Paul",621439,623149,,ball,...,1,3,4,2,4,1,1.73,0.86,0.86,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3946,FC,2024-06-26,91.6,-1.1,6.2,"Pepiot, Ryan",668942,686752,,called_strike,...,1,0,5,1,7,2,1.63,-0.54,0.54,44.6
4096,FF,2024-06-26,94.4,-0.95,6.16,"Pepiot, Ryan",668942,686752,,ball,...,1,0,5,1,7,2,0.81,0.67,-0.67,48.0
4264,FC,2024-06-26,90.5,-1.09,6.21,"Pepiot, Ryan",641487,686752,field_out,hit_into_play,...,1,0,5,1,7,2,1.47,-0.4,0.4,44.0
4331,FF,2024-06-26,94.6,-0.84,6.21,"Pepiot, Ryan",641487,686752,,foul,...,1,0,5,1,7,2,0.8,0.76,-0.76,49.4
