# Import Packages

In [1]:
import requests
import numpy as np
import pandas as pd
import math

# Extract Gamefeed Data

In [2]:
class savant_gamefeed_scraper:

    def __init__(self):
        # Initialize the base URL for the Statcast Gamefeed API
        self.base_url = "https://baseballsavant.mlb.com/gf"
        pass

    def fix_for_lefties(self, p_throws, value):
        # Normalize horizontal coordinates for left-handed pitchers
        # For left-handed pitchers, negate the value to standardize coordinates
        if value is None:
            return None
        if p_throws == 'L':
            return value * -1
        else:
            return value

    def fix_plate_z(self, plate_z, sz_bot, sz_top):
        # Normalize the vertical pitch location relative to the strike zone
        # Converts raw plate_z value to a normalized value where 2.5 is the middle of the zone
        try:
            if plate_z is None or sz_bot is None or sz_top is None:
                return None
            result = round(((plate_z - ((sz_bot + sz_top) / 2)) / (sz_top - ((sz_bot + sz_top) / 2))) + 2.5, 2)
            return result
        except Exception:
            return None

    def is_whiff(self, is_strike_swinging):
        # Determine if the pitch resulted in a whiff (swing and miss)
        # Returns 1 for a whiff, 0 otherwise
        try:
            if is_strike_swinging is None:
                return 0
            return 1 if is_strike_swinging == True else 0
        except Exception:
            return 0

    def is_swing(self, result_code):
        # Determine if the pitch resulted in a swing based on the result code
        # Return 1 if the result code indicates a swing, 0 otherwise
        # X=in play, F=foul, S=swinging strike, D=in play (double), E=in play (error), T=in play (triple), W=swinging strike (blocked)
        swing_list = ['X', 'F', 'S', 'D', 'E', 'T', 'W']
        try:
            if result_code is None:
                return 0
            return 1 if result_code in swing_list else 0
        except Exception:
            return 0

    def fetch_game_feed(self, game_pk):
        # Fetch pitch-by-pitch data for a specific game from Statcast API
        # game_pk is the unique identifier for the game
        # Returns a pandas DataFrame containing all pitch data
        api_url = f"{self.base_url}?game_pk={game_pk}"
        try:
            response = requests.get(api_url)
            response.raise_for_status()
            data = response.json()
            plays = data.get("team_home", []) + data.get("team_away", [])
            df = pd.DataFrame(plays)
            return df

        except requests.RequestException as e:
            print(f"Error fetching data: {e}")
            return pd.DataFrame([])

    def backcalculate_release_position(self, df):
        # Calculate the ball release position based on trajectory data
        # Uses physics equations to determine where the ball was released
        # This provides x, y, z coordinates of the release point
        df["release_pos_y"] = 60.5 - df["extension"]
        delta_t = (df["release_pos_y"] - df["y0"]) / df["vy0"]
        df["release_pos_x"] = df["x0"] + df["vx0"] * delta_t + 0.5 * df["ax"] * delta_t ** 2
        df["release_pos_z"] = df["z0"] + df["vz0"] * delta_t + 0.5 * df["az"] * delta_t ** 2
        return df

    def add_release_metrics_pandas(self, df):
        # 簡略化された関数 - phiとspin_effのみを計算
        # Gravitational constant (feet/sec^2)
        z_constant = 32.174

        # Calculate release point y-coordinate (distance from mound to release point)
        df["yR"] = 60.5 - df["extension"]

        # Calculate time from release to crossing 50ft distance
        df["tR"] = (-df["vy0"] - np.sqrt(df["vy0"]**2 - 2 * df["ay"] * (50 - df["yR"]))) / df["ay"]

        # Calculate velocity components at release point
        df["vxR"] = df["vx0"] + df["ax_flipped"] * df["tR"]
        df["vyR"] = df["vy0"] + df["ay"] * df["tR"]
        df["vzR"] = df["vz0"] + df["az"] * df["tR"]

        # Calculate time from 50ft distance to crossing home plate
        df["tf"] = (-df["vyR"] - np.sqrt(df["vyR"]**2 - 2 * df["ay"] * (df["yR"] - 17 / 12))) / df["ay"]

        # Calculate average velocity components over the flight path
        df["vxbar"] = (2 * df["vxR"] + df["ax_flipped"] * df["tf"]) / 2
        df["vybar"] = (2 * df["vyR"] + df["ay"] * df["tf"]) / 2
        df["vzbar"] = (2 * df["vzR"] + df["az"] * df["tf"]) / 2

        # Calculate average velocity magnitude
        df["vbar"] = np.sqrt(df["vxbar"]**2 + df["vybar"]**2 + df["vzbar"]**2)

        # Calculate air resistance (drag) acceleration
        df["adrag"] = -(df["ax_flipped"] * df["vxbar"] + df["ay"] * df["vybar"] +
                        (df["az"] + z_constant) * df["vzbar"]) / df["vbar"]

        # Calculate Magnus force acceleration components (spin-induced)
        df["amagx"] = df["ax_flipped"] + df["adrag"] * df["vxbar"] / df["vbar"]
        df["amagy"] = df["ay"] + df["adrag"] * df["vybar"] / df["vbar"]
        df["amagz"] = df["az"] + df["adrag"] * df["vzbar"] / df["vbar"] + z_constant

        # Calculate total Magnus force acceleration magnitude
        df["amag"] = np.sqrt(df["amagx"]**2 + df["amagy"]**2 + df["amagz"]**2)

        # Calculate spin factor (S)
        df["Cl"] = df["amag"] / (5.153E-03 * df["vbar"]**2)
        df["S"] = 0.4 * df["Cl"] / (1 - 2.32 * df["Cl"])

        # Calculate transverse spin rate (spin perpendicular to velocity)
        df["spinT"] = 78.92 * df["S"] * df["vbar"]

        # Calculate tilt angle (phi) in degrees
        # This represents the spin axis orientation (e.g., 180° is perfect backspin)
        df["phi"] = (np.arctan2(df["amagz"], df["amagx"]) * 180 / math.pi + 90)
        df["phi"] = df.apply(lambda row: row["phi"] + (360 if row["amagz"] < 0 else 0), axis=1)
        df["phi"] = df["phi"] % 360

        # Calculate spin efficiency (ratio of transverse spin to total spin)
        df["spin_eff"] = df["spinT"] / df["spin_rate"]

        # 他の計算を削除し、必要な中間計算と最終結果のみを残す
        # 不要な中間計算列を削除
        columns_to_drop = ["yR", "tR", "vxR", "vyR", "vzR", "tf", "vxbar", "vybar",
                          "vzbar", "vbar", "adrag", "amagx", "amagy", "amagz",
                          "amag", "Cl", "S", "spinT"]

        df = df.drop(columns=columns_to_drop, errors='ignore')

        return df

    def add_attributes(self, df):
        # Add all relevant attributes and derived metrics to the pitch data
        # This function integrates all calculations into a complete pitch analysis dataset

        # Calculate release position coordinates
        df = self.backcalculate_release_position(df)

        # Normalize horizontal coordinates for consistent comparison between lefties and righties
        df['release_pos_x_normalized'] = df.apply(
            lambda row: self.fix_for_lefties(row['p_throws'], row['release_pos_x']), axis=1)

        df['plate_x_normalized'] = df.apply(
            lambda row: self.fix_for_lefties(row['p_throws'], row['px']), axis=1)

        # Normalize vertical location relative to strike zone
        df['plate_z_normalized'] = df.apply(
            lambda row: self.fix_plate_z(row['pz'], row['sz_bot'], row['sz_top']), axis=1)

        # Flip acceleration for consistent physics calculations
        df['ax_flipped'] = df['ax'] * -1

        # Normalize horizontal acceleration for lefties and righties
        df['ax_normalized'] = df.apply(
            lambda row: self.fix_for_lefties(row['p_throws'], row['ax_flipped']), axis=1)

        # Add advanced pitch metrics and movement calculations
        df = self.add_release_metrics_pandas(df)

        # Add batter-pitcher matchup type (same-handed or opposite-handed)
        df['hand_split'] = np.where(df['p_throws'] == df['stand'], 'SHH', 'OHH')

        # Add swing and miss indicators
        df['is_whiff'] = df.apply(
            lambda row: self.is_whiff(row['is_strike_swinging']), axis=1)

        df['is_swing'] = df.apply(
            lambda row: self.is_swing(row['result_code']), axis=1)

        return df

    def construct_game(self, game_pk):
        # Main method to retrieve and process all pitch data for a specific game
        # Returns a complete DataFrame with all raw and calculated pitch metrics
        df = self.fetch_game_feed(game_pk)
        df = self.add_attributes(df)
        return df

In [3]:
# Create an instance of the class
scraper = savant_gamefeed_scraper()

# Fetch data for a specific game (game_pk is the game ID)
game_pk = 745298
game_data = scraper.construct_game(game_pk)

# Display the first 5 records of the dataframe
game_data.head()

Unnamed: 0,play_id,inning,ab_number,cap_index,outs,batter,stand,batter_name,pitcher,p_throws,...,release_pos_x_normalized,plate_x_normalized,plate_z_normalized,ax_flipped,ax_normalized,phi,spin_eff,hand_split,is_whiff,is_swing
0,4e032026-4d49-4a67-89ba-e767d144d2ba,1,1,14,1,671739,L,Michael Harris II,657277,R,...,-1.548776,0.465797,1.26,16.970637,16.970637,95.66603,0.877641,OHH,0,0
1,d4076d40-8827-444f-bba3-c0598c977f3a,1,1,14,1,671739,L,Michael Harris II,657277,R,...,-1.657051,-0.378419,1.22,18.772858,18.772858,86.78993,0.945273,OHH,0,1
2,8eadd017-7fcd-4292-8a3d-08e7bf7af3e5,1,1,14,1,671739,L,Michael Harris II,657277,R,...,-1.67526,-0.703136,1.52,7.034036,7.034036,45.170284,0.522193,OHH,0,0
3,ea09c647-117f-4720-a748-7ee4862389f1,1,1,14,1,671739,L,Michael Harris II,657277,R,...,-1.46544,-1.08769,3.61,10.378986,10.378986,135.924842,0.576088,OHH,1,1
4,d798ec29-f0db-435a-b00f-9dfac0ae2e75,1,2,0,2,663586,R,Austin Riley,657277,R,...,-1.444003,0.600604,2.53,14.96926,14.96926,85.11217,0.679863,SHH,0,0
