# Import Packages

In [42]:
import requests
import numpy as np
import pandas as pd
import math

# Extract Gamefeed Data

In [43]:
class savant_gamefeed_scraper:

    def __init__(self):
        # Initialize the base URL for the Statcast Gamefeed API
        self.base_url = "https://baseballsavant.mlb.com/gf"
        pass

    def fix_for_lefties(self, p_throws, value):
        # Normalize horizontal coordinates for left-handed pitchers
        # For left-handed pitchers, negate the value to standardize coordinates
        if value is None:
            return None
        if p_throws == 'L':
            return value * -1
        else:
            return value

    def fix_plate_z(self, plate_z, sz_bot, sz_top):
        # Normalize the vertical pitch location relative to the strike zone
        # Converts raw plate_z value to a normalized value where 2.5 is the middle of the zone
        try:
            if plate_z is None or sz_bot is None or sz_top is None:
                return None
            result = round(((plate_z - ((sz_bot + sz_top) / 2)) / (sz_top - ((sz_bot + sz_top) / 2))) + 2.5, 2)
            return result
        except Exception:
            return None

    def is_whiff(self, is_strike_swinging):
        # Determine if the pitch resulted in a whiff (swing and miss)
        # Returns 1 for a whiff, 0 otherwise
        try:
            if is_strike_swinging is None:
                return 0
            return 1 if is_strike_swinging == True else 0
        except Exception:
            return 0

    def is_swing(self, result_code):
        # Determine if the pitch resulted in a swing based on the result code
        # Return 1 if the result code indicates a swing, 0 otherwise
        # X=in play, F=foul, S=swinging strike, D=in play (double), E=in play (error), T=in play (triple), W=swinging strike (blocked)
        swing_list = ['X', 'F', 'S', 'D', 'E', 'T', 'W']
        try:
            if result_code is None:
                return 0
            return 1 if result_code in swing_list else 0
        except Exception:
            return 0

    def fetch_game_feed(self, game_pk):
        # Fetch pitch-by-pitch data for a specific game from Statcast API
        # game_pk is the unique identifier for the game
        # Returns a pandas DataFrame containing all pitch data
        api_url = f"{self.base_url}?game_pk={game_pk}"
        try:
            response = requests.get(api_url)
            response.raise_for_status()
            data = response.json()
            plays = data.get("team_home", []) + data.get("team_away", [])
            df = pd.DataFrame(plays)
            return df

        except requests.RequestException as e:
            print(f"Error fetching data: {e}")
            return pd.DataFrame([])

    def backcalculate_release_position(self, df):
        # Calculate the ball release position based on trajectory data
        # Uses physics equations to determine where the ball was released
        # This provides x, y, z coordinates of the release point
        df["release_pos_y"] = 60.5 - df["extension"]
        delta_t = (df["release_pos_y"] - df["y0"]) / df["vy0"]
        df["release_pos_x"] = df["x0"] + df["vx0"] * delta_t + 0.5 * df["ax"] * delta_t ** 2
        df["release_pos_z"] = df["z0"] + df["vz0"] * delta_t + 0.5 * df["az"] * delta_t ** 2
        return df

    def add_release_metrics_pandas(self, df):
        # Add advanced pitch metrics using physics calculations
        # This function calculates movement, spin, and other characteristics
        # These metrics help analyze pitch quality and behavior

        # Gravitational constant (feet/sec^2)
        z_constant = 32.174

        # Calculate release point y-coordinate (distance from mound to release point)
        df["yR"] = 60.5 - df["extension"]

        # Calculate time from release to crossing 50ft distance
        df["tR"] = (-df["vy0"] - np.sqrt(df["vy0"]**2 - 2 * df["ay"] * (50 - df["yR"]))) / df["ay"]

        # Calculate velocity components at release point
        df["vxR"] = df["vx0"] + df["ax_flipped"] * df["tR"]
        df["vyR"] = df["vy0"] + df["ay"] * df["tR"]
        df["vzR"] = df["vz0"] + df["az"] * df["tR"]

        # Calculate release speed discrepancy for validation
        df["dv0"] = df["start_speed"] - (df["vxR"]**2 + df["vyR"]**2 + df["vzR"]**2)**0.5 / 1.467

        # Calculate time from 50ft distance to crossing home plate
        df["tf"] = (-df["vyR"] - np.sqrt(df["vyR"]**2 - 2 * df["ay"] * (df["yR"] - 17 / 12))) / df["ay"]

        # Calculate horizontal movement (deviation from straight path)
        df["x_mvt"] = df["px"] - df["release_pos_x"] - (df["vxR"] / df["vyR"]) * (17/12 - df["yR"])

        # Calculate vertical movement (deviation from gravity-only path)
        df["z_mvt"] = df["pz"] - df["release_pos_z"] - (df["vzR"] / df["vyR"]) * (17/12 - df["yR"]) + 0.5 * z_constant * df["tf"]**2

        # Calculate average velocity components over the flight path
        df["vxbar"] = (2 * df["vxR"] + df["ax_flipped"] * df["tf"]) / 2
        df["vybar"] = (2 * df["vyR"] + df["ay"] * df["tf"]) / 2
        df["vzbar"] = (2 * df["vzR"] + df["az"] * df["tf"]) / 2

        # Calculate average velocity magnitude
        df["vbar"] = np.sqrt(df["vxbar"]**2 + df["vybar"]**2 + df["vzbar"]**2)

        # Calculate air resistance (drag) acceleration
        df["adrag"] = -(df["ax_flipped"] * df["vxbar"] + df["ay"] * df["vybar"] +
                        (df["az"] + z_constant) * df["vzbar"]) / df["vbar"]

        # Calculate drag coefficient
        df["Cd"] = df["adrag"] / (5.153E-03 * df["vbar"]**2)

        # Calculate Magnus force acceleration components (spin-induced)
        df["amagx"] = df["ax_flipped"] + df["adrag"] * df["vxbar"] / df["vbar"]
        df["amagy"] = df["ay"] + df["adrag"] * df["vybar"] / df["vbar"]
        df["amagz"] = df["az"] + df["adrag"] * df["vzbar"] / df["vbar"] + z_constant

        # Calculate total Magnus force acceleration magnitude
        df["amag"] = np.sqrt(df["amagx"]**2 + df["amagy"]**2 + df["amagz"]**2)

        # Calculate horizontal and vertical movement in inches
        df["Mx"] = 0.5 * df["amagx"] * df["tf"]**2 * 12
        df["Mz"] = 0.5 * df["amagz"] * df["tf"]**2 * 12

        # Calculate lift coefficient (measures Magnus effect strength)
        df["Cl"] = df["amag"] / (5.153E-03 * df["vbar"]**2)

        # Calculate spin factor (S)
        df["S"] = 0.4 * df["Cl"] / (1 - 2.32 * df["Cl"])

        # Calculate transverse spin rate (spin perpendicular to velocity)
        df["spinT"] = 78.92 * df["S"] * df["vbar"]

        # Calculate spin axis components
        df["spinTX"] = df["spinT"] * (df["vybar"] * df["amagz"] - df["vzbar"] * df["amagy"]) / (df["amag"] * df["vbar"])
        df["spinTY"] = df["spinT"] * (df["vzbar"] * df["amagx"] - df["vxbar"] * df["amagz"]) / (df["amag"] * df["vbar"])
        df["spinTZ"] = df["spinT"] * (df["vxbar"] * df["amagy"] - df["vybar"] * df["amagx"]) / (df["amag"] * df["vbar"])

        # Validation check for spin calculations
        df["spin_check"] = np.sqrt(df["spinTX"]**2 + df["spinTY"]**2 + df["spinTZ"]**2) - df["spinT"]

        # Calculate tilt angle (phi) in degrees
        # This represents the spin axis orientation (e.g., 180° is perfect backspin)
        df["phi"] = (np.arctan2(df["amagz"], df["amagx"]) * 180 / math.pi + 90)
        df["phi"] = df.apply(lambda row: row["phi"] + (360 if row["amagz"] < 0 else 0), axis=1)
        df["phi"] = df["phi"] % 360

        # Calculate spin efficiency (ratio of transverse spin to total spin)
        df["spin_eff"] = df["spinT"] / df["spin_rate"]

        return df

    def add_attributes(self, df):
        # Add all relevant attributes and derived metrics to the pitch data
        # This function integrates all calculations into a complete pitch analysis dataset

        # Calculate release position coordinates
        df = self.backcalculate_release_position(df)

        # Normalize horizontal coordinates for consistent comparison between lefties and righties
        df['release_pos_x_normalized'] = df.apply(
            lambda row: self.fix_for_lefties(row['p_throws'], row['release_pos_x']), axis=1)

        df['plate_x_normalized'] = df.apply(
            lambda row: self.fix_for_lefties(row['p_throws'], row['px']), axis=1)

        # Normalize vertical location relative to strike zone
        df['plate_z_normalized'] = df.apply(
            lambda row: self.fix_plate_z(row['pz'], row['sz_bot'], row['sz_top']), axis=1)

        # Flip acceleration for consistent physics calculations
        df['ax_flipped'] = df['ax'] * -1

        # Normalize horizontal acceleration for lefties and righties
        df['ax_normalized'] = df.apply(
            lambda row: self.fix_for_lefties(row['p_throws'], row['ax_flipped']), axis=1)

        # Add advanced pitch metrics and movement calculations
        df = self.add_release_metrics_pandas(df)

        # Add batter-pitcher matchup type (same-handed or opposite-handed)
        df['hand_split'] = np.where(df['p_throws'] == df['stand'], 'SHH', 'OHH')

        # Add swing and miss indicators
        df['is_whiff'] = df.apply(
            lambda row: self.is_whiff(row['is_strike_swinging']), axis=1)

        df['is_swing'] = df.apply(
            lambda row: self.is_swing(row['result_code']), axis=1)

        return df

    def construct_game(self, game_pk):
        # Main method to retrieve and process all pitch data for a specific game
        # Returns a complete DataFrame with all raw and calculated pitch metrics
        df = self.fetch_game_feed(game_pk)
        df = self.add_attributes(df)
        return df

In [44]:
# Create an instance of the scraper
scraper = savant_gamefeed_scraper()

# Fetch data for a specific game (game_pk is the game ID)
game_pk = 745298
game_data = scraper.construct_game(game_pk)

# Display the first 5 records of the dataframe
game_data.head()

Unnamed: 0,play_id,inning,ab_number,cap_index,outs,batter,stand,batter_name,pitcher,p_throws,pitcher_name,team_batting,team_fielding,team_batting_id,team_fielding_id,result,des,events,contextMetrics,strikes,balls,pre_strikes,pre_balls,call,call_name,pitch_type,pitch_name,description,result_code,pitch_call,is_strike_swinging,balls_and_strikes,start_speed,end_speed,sz_top,sz_bot,extension,plateTime,zone,spin_rate,px,pz,x0,y0,z0,ax,ay,az,vx0,vy0,vz0,pfxX,pfxZ,pfxZWithGravity,pfxZWithGravityNice,pfxZDirection,pfxXWithGravity,pfxXNoAbs,pfxXDirection,breakX,breakZ,inducedBreakZ,inducedBreakZDec,inducedBreakZForcedSign,ivbZDirection,isSword,is_bip_out,pitch_number,player_total_pitches,player_total_pitches_pitch_types,game_total_pitches,rowId,game_pk,player_name,batSpeed,hit_speed_round,hit_speed,hit_distance,xba,hit_angle,is_barrel,hc_x,hc_x_ft,hc_y,hc_y_ft,runnerOn1B,runnerOn2B,runnerOn3B,release_pos_y,release_pos_x,release_pos_z,release_pos_x_normalized,plate_x_normalized,plate_z_normalized,ax_flipped,ax_normalized,yR,tR,vxR,vyR,vzR,dv0,tf,x_mvt,z_mvt,vxbar,vybar,vzbar,vbar,adrag,Cd,amagx,amagy,amagz,amag,Mx,Mz,Cl,S,spinT,spinTX,spinTY,spinTZ,spin_check,phi,spin_eff,hand_split,is_whiff,is_swing
0,4e032026-4d49-4a67-89ba-e767d144d2ba,1,1,14,1,671739,L,Michael Harris II,657277,R,Logan Webb,ATL,SF,144,137,Strikeout,Michael Harris II strikes out swinging.,Strikeout,"{'averagePitchSpeedPlayer': 92.4, 'maxPitchSpeedPlayer': 95.6, 'pitchSpeedPlayerRank': 16}",0,0,0,0,B,Ball,SI,Sinker,Ball,B,ball,False,0,94.5,87.6,3.482918,1.74905,6.822918,0.39647,14,1940,0.465797,1.539027,-1.32886,50.0047,5.140372,-16.970637,26.794261,-28.481283,8.005938,-137.476378,-4.607287,-8.751946,1.906628,-27.11513,27,↓,15,-15,→,14,27,3,3.2,3,↑,False,N,1,1,1,4,4-745298,745298,SF,,,,,,,,,,,,,,,53.677082,-1.548776,5.253284,-1.548776,0.465797,1.26,16.970637,16.970637,53.677082,-0.026678,7.553201,-138.191186,-3.847473,0.123094,0.39316,-0.841857,0.227407,10.889291,-132.923967,-9.446327,133.703368,25.516813,0.277001,19.04882,1.426194,1.889919,19.1954,17.666823,1.752805,0.208378,0.161358,1702.624297,-157.720295,-133.026987,1690.076219,0.0,95.66603,0.877641,OHH,0,0
1,d4076d40-8827-444f-bba3-c0598c977f3a,1,1,14,1,671739,L,Michael Harris II,657277,R,Logan Webb,ATL,SF,144,137,Strikeout,Michael Harris II strikes out swinging.,Strikeout,"{'averagePitchSpeedPlayer': 92.4, 'maxPitchSpeedPlayer': 95.6, 'pitchSpeedPlayerRank': 10}",0,1,0,1,S,Strike,SI,Sinker,Foul,F,foul,False,10,94.8,87.8,3.49,1.61,6.835183,0.395295,13,2094,-0.378419,1.345085,-1.479733,50.006877,5.094384,-18.772858,27.89847,-31.390121,6.442969,-138.044261,-4.523227,-9.629892,0.405262,-29.472966,29,↓,16,-16,→,16,30,1,0.5,1,↑,False,N,2,2,2,5,5-745298,745298,,73.2,,,,,,,,,,,,,,53.664817,-1.657051,5.203221,-1.657051,-0.378419,1.22,18.772858,18.772858,53.664817,-0.026477,5.945915,-138.782937,-3.692101,0.076546,0.391912,-0.959849,0.002725,9.624568,-133.316066,-9.843182,134.024976,26.460363,0.285867,20.673023,1.578066,-1.159446,20.76556,19.051628,-1.06851,0.224343,0.187138,1979.400807,120.982894,-136.78847,1970.959058,-2.273737e-13,86.78993,0.945273,OHH,0,1
2,8eadd017-7fcd-4292-8a3d-08e7bf7af3e5,1,1,14,1,671739,L,Michael Harris II,657277,R,Logan Webb,ATL,SF,144,137,Strikeout,Michael Harris II strikes out swinging.,Strikeout,"{'averagePitchSpeedPlayer': 87.3, 'maxPitchSpeedPlayer': 90.7}",1,1,1,1,S,Strike,CH,Changeup,Called Strike,C,called_strike,False,11,88.0,82.2,3.482918,1.632148,6.959855,0.424151,7,1552,-0.703136,1.648348,-1.573382,50.001643,5.068128,-7.034036,22.568236,-38.581788,3.598488,-128.352873,-1.160272,-4.149242,-3.78337,-41.115387,41,↓,7,-7,→,7,41,-6,-6.0,-6,↓,False,N,3,3,1,6,6-745298,745298,,,,,,,,,,,,,,,,53.540145,-1.67526,5.085454,-1.67526,-0.703136,1.52,7.034036,7.034036,53.540145,-0.027515,3.404948,-128.973833,-0.098702,0.052618,0.41954,-0.403951,-0.565687,4.880476,-124.239698,-8.191997,124.605098,21.805278,0.27254,7.888096,0.826901,-7.841348,11.153141,8.330468,-8.281099,0.139401,0.082414,810.443437,572.071274,-15.36619,573.861397,0.0,45.170284,0.522193,OHH,0,0
3,ea09c647-117f-4720-a748-7ee4862389f1,1,1,14,1,671739,L,Michael Harris II,657277,R,Logan Webb,ATL,SF,144,137,Strikeout,Michael Harris II strikes out swinging.,Strikeout,"{'averagePitchSpeedPlayer': 92.4, 'maxPitchSpeedPlayer': 94.6, 'pitchSpeedPlayerRank': 3}",2,1,2,1,S,Strike,FF,4-Seam Fastball,Swinging Strike,S,swinging_strike,True,12,94.6,87.4,3.49,1.61,6.86084,0.395408,11,2196,-1.08769,3.595474,-1.390066,50.005684,5.209566,-10.378986,27.22729,-19.660174,2.723797,-137.89625,-0.822769,-5.324776,6.422799,-19.320496,19,↓,9,-9,→,9,18,12,12.0,12,↑,False,N,4,4,1,7,7-745298,745298,,68.5,,,,,,,,,,,,,,53.63916,-1.46544,5.22442,-1.46544,-1.08769,3.61,10.378986,10.378986,53.63916,-0.026322,2.450599,-138.612931,-0.305271,0.097663,0.391829,-0.545515,0.955904,4.483994,-133.278707,-4.156986,133.418891,27.23976,0.296967,11.29447,0.016152,11.665105,16.236995,10.40425,10.745672,0.177015,0.120148,1265.088173,-907.880096,-57.964213,879.114308,2.273737e-13,135.924842,0.576088,OHH,1,1
4,d798ec29-f0db-435a-b00f-9dfac0ae2e75,1,2,0,2,663586,R,Austin Riley,657277,R,Logan Webb,ATL,SF,144,137,Groundout,"Austin Riley grounds out, shortstop Tyler Fitzgerald to first baseman Mark Canha.",Groundout,"{'averagePitchSpeedPlayer': 92.4, 'maxPitchSpeedPlayer': 95.6, 'pitchSpeedPlayerRank': 4}",0,0,0,0,S,Strike,SI,Sinker,Called Strike,C,called_strike,False,0,95.2,87.9,3.367112,1.515043,6.941002,0.393956,6,2011,0.600604,2.470072,-1.239631,50.006734,5.183676,-14.96926,28.160529,-32.222042,7.77763,-138.522278,-1.58273,-7.627738,-0.019301,-29.986546,30,↓,13,-13,→,12,30,0,0.4,0,,False,Y,1,5,3,8,8-745298,745298,,,,,,,,,,,,,,,,53.558998,-1.444003,5.213669,-1.444003,0.600604,2.53,14.96926,14.96926,53.558998,-0.025626,7.39403,-139.243916,-0.757012,0.147411,0.389835,-0.724217,-0.01536,10.311799,-133.754938,-7.037649,134.336314,26.887084,0.289132,17.033141,1.389806,-1.456611,17.15171,15.531279,-1.328177,0.184442,0.128959,1367.204817,121.41134,-62.217645,1360.381293,-2.273737e-13,85.11217,0.679863,SHH,0,0
