# Relief Pitcher Summary Statistics and Rest Day Analysis

This notebook loads the filtered relief pitcher dataset, calculates rest days between appearances, and creates game-level summary statistics for analysis.

In [7]:
# Import necessary packages for data analysis
import pandas as pd
import numpy as np
import polars as pl
from datetime import datetime, timedelta

In [8]:
# Load the relief pitcher dataset with polars
statcast_relievers = pl.read_parquet('data/statcast_relievers.parquet')

statcast_relievers.shape

(1207931, 33)

In [13]:
# Check all column names to identify the correct pitcher name column
statcast_relievers.columns

['pitcher_id',
 'pitcher_name',
 'date',
 'game_id',
 'velocity',
 'spin_rate',
 'pitch_type_abbr',
 'pitch_type',
 'pitch_result',
 'ab_result',
 'pitch_call',
 'strike_zone',
 'ball_count',
 'strike_count',
 'outs',
 'inning',
 'top_bottom',
 'home_team',
 'away_team',
 'ab_number',
 'pitch_num',
 'batted_ball_type',
 'runner_1b',
 'runner_2b',
 'runner_3b',
 'away_score',
 'home_score',
 'defense_score',
 'offense_score',
 'game_type',
 'batter_side',
 'pitcher_hand',
 'year',
 'rest_days']

In [14]:
# Let's check one of the original parquet files to see the correct column names
test_data = pl.read_parquet('data/statcast_2021.parquet')
print("All columns with 'name' in them:")
[col for col in test_data.columns if 'name' in col.lower()]

All columns with 'name' in them:


['player_name', 'pitch_name']

In [9]:
# Preview data for sample pitcher Jhoan Duran
jhoan_duran = statcast_relievers.filter(pl.col('pitcher_name') == 'Duran, Jhoan')

jhoan_duran

pitcher_id,pitcher_name,date,game_id,velocity,spin_rate,pitch_type_abbr,pitch_type,pitch_result,ab_result,pitch_call,strike_zone,ball_count,strike_count,outs,inning,top_bottom,home_team,away_team,ab_number,pitch_num,batted_ball_type,runner_1b,runner_2b,runner_3b,away_score,home_score,defense_score,offense_score,game_type,batter_side,pitcher_hand,year
i64,str,datetime[ns],i64,f64,i64,str,str,str,str,str,i64,i64,i64,i64,i64,str,str,str,i64,i64,str,i64,i64,i64,i64,i64,i64,i64,str,str,str,i32
661395,"""Duran, Jhoan""",2022-04-08 00:00:00,661750,88.5,2584,"""SL""","""Slider""","""ball""",,"""B""",14,0,0,0,5,"""Top""","""MIN""","""SEA""",37,1,,608385,664034,,2,1,1,2,"""R""","""R""","""R""",2022
661395,"""Duran, Jhoan""",2022-04-08 00:00:00,661750,93.6,1650,"""FS""","""Split-Finger""","""blocked_ball""",,"""B""",14,0,0,0,5,"""Top""","""MIN""","""SEA""",36,1,,664034,,,2,1,1,2,"""R""","""L""","""R""",2022
661395,"""Duran, Jhoan""",2022-04-08 00:00:00,661750,95.7,1824,"""FS""","""Split-Finger""","""swinging_strike""",,"""S""",14,0,0,2,5,"""Top""","""MIN""","""SEA""",39,1,,,608385,664034,2,1,1,2,"""R""","""L""","""R""",2022
661395,"""Duran, Jhoan""",2022-04-08 00:00:00,661750,96.8,1754,"""FS""","""Split-Finger""","""called_strike""",,"""S""",5,0,0,0,5,"""Top""","""MIN""","""SEA""",35,1,,,,,2,1,1,2,"""R""","""R""","""R""",2022
661395,"""Duran, Jhoan""",2022-04-08 00:00:00,661750,96.5,1865,"""SI""","""Sinker""","""ball""",,"""B""",14,0,0,2,6,"""Top""","""MIN""","""SEA""",47,1,,,,,2,1,1,2,"""R""","""L""","""R""",2022
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
661395,"""Duran, Jhoan""",2024-09-26 00:00:00,745853,86.1,2565,"""KC""","""Knuckle Curve""","""foul""",,"""S""",12,2,2,0,9,"""Top""","""MIN""","""MIA""",71,5,,,,,4,4,4,4,"""R""","""R""","""R""",2024
661395,"""Duran, Jhoan""",2024-09-26 00:00:00,745853,84.9,2616,"""KC""","""Knuckle Curve""","""ball""",,"""B""",11,2,2,0,9,"""Top""","""MIN""","""MIA""",71,6,,,,,4,4,4,4,"""R""","""R""","""R""",2024
661395,"""Duran, Jhoan""",2024-09-26 00:00:00,745853,100.5,1999,"""FF""","""4-Seam Fastball""","""foul""",,"""S""",3,3,2,1,9,"""Top""","""MIN""","""MIA""",72,6,,,,,4,4,4,4,"""R""","""L""","""R""",2024
661395,"""Duran, Jhoan""",2024-09-26 00:00:00,745853,99.8,2091,"""FF""","""4-Seam Fastball""","""hit_into_play""","""double""","""X""",5,3,2,1,9,"""Top""","""MIN""","""MIA""",72,7,"""fly_ball""",,,,4,4,4,4,"""R""","""L""","""R""",2024


In [11]:
# Calculate rest days: difference in days between current appearance and previous appearance
# Group by pitcher_id and calculate the difference in dates
statcast_relievers = statcast_relievers.with_columns(
    pl.col('date')
    .diff()
    .over('pitcher_id')
    .dt.total_days()
    .cast(pl.Int32)
    .alias('rest_days')
)

# Preview the result
statcast_relievers.select(['pitcher_name', 'pitcher_id', 'date', 'game_id', 'rest_days']).head(20)

pitcher_name,pitcher_id,date,game_id,rest_days
str,i64,datetime[ns],i64,i32
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,0
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,0
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,0
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,0
…,…,…,…,…
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,0
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,0
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,0
"""Pujols, Albert""",405395,2022-05-15 00:00:00,661984,0


In [12]:
# First, ensure date column is in datetime format and sort by pitcher and date
statcast_relievers = statcast_relievers.with_columns(
    pl.col('date').cast(pl.Date)
).sort(['pitcher_id', 'date', 'game_id', 'pitch_num'])

# Check the data types
statcast_relievers.schema

Schema([('pitcher_id', Int64),
        ('pitcher_name', String),
        ('date', Date),
        ('game_id', Int64),
        ('velocity', Float64),
        ('spin_rate', Int64),
        ('pitch_type_abbr', String),
        ('pitch_type', String),
        ('pitch_result', String),
        ('ab_result', String),
        ('pitch_call', String),
        ('strike_zone', Int64),
        ('ball_count', Int64),
        ('strike_count', Int64),
        ('outs', Int64),
        ('inning', Int64),
        ('top_bottom', String),
        ('home_team', String),
        ('away_team', String),
        ('ab_number', Int64),
        ('pitch_num', Int64),
        ('batted_ball_type', String),
        ('runner_1b', Int64),
        ('runner_2b', Int64),
        ('runner_3b', Int64),
        ('away_score', Int64),
        ('home_score', Int64),
        ('defense_score', Int64),
        ('offense_score', Int64),
        ('game_type', String),
        ('batter_side', String),
        ('pitcher_hand', String)

## Step 1: Calculate Rest Days Between Appearances

For each pitcher, we need to calculate the number of days between consecutive appearances. This will be our primary independent variable.