In [1]:
import pandas as pd
import numpy as np
import pybaseball as pyb
import matplotlib.pyplot as plt
import seaborn as sns
from math import sqrt, atan2, degrees, pi, atan
from scipy.stats import percentileofscore

In [2]:
statcast_data = pyb.statcast(start_dt='2024-03-28', end_dt='2024-04-30')

This is a large query, it may take a moment to complete


100%|███████████████████████████████████████████| 34/34 [00:07<00:00,  4.56it/s]


In [3]:
fastballs = statcast_data[statcast_data['pitch_type'].isin(['FF'])]

In [4]:
fastballs.dropna(subset=['vy0'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


In [5]:
fastballs.dropna(subset=['release_extension'], inplace=True)

In [6]:
def calculate_vaa(row):
    yf = 17/12  # Home plate distance in feet, converted to inches
    ay = row['ay']  # Acceleration in y-dimension for the current pitch
    vy0 = row['vy0']  # Velocity in y-dimension at y=50 feet for the current pitch
    vz0 = row['vz0']  # Velocity in z-dimension at y=50 feet for the current pitch
    az = row['az']  # Acceleration in z-dimension for the current pitch

    vy_f = -sqrt(vy0**2 - (2 * ay * (50 - yf)))
    t = (vy_f - vy0) / ay
    vz_f = vz0 + (az * t)
    vaa_rad = atan2(vz_f, vy_f)
    vaa_deg = (180+degrees(vaa_rad))*-1

    return vaa_deg
fastballs['VAA'] = fastballs.apply(calculate_vaa, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fastballs['VAA'] = fastballs.apply(calculate_vaa, axis=1)


In [7]:
filtered_ff = fastballs.loc[:, ['pitcher','stand','release_pos_z', 'p_throws', 'player_name', 'pfx_z', 'release_spin_rate','spin_axis','VAA','release_extension','zone','plate_z','plate_x','release_speed','release_pos_x','vx0','vy0','vz0','ax','ay','az']]

In [8]:
def calculate_VRA(vy0, ay, release_extension, vz0, az):
    vy_s = -np.sqrt(vy0**2 - 2 * ay * (60.5 - release_extension - 50))
    t_s = (vy_s - vy0) / ay
    vz_s = vz0 - az * t_s
    VRA = -np.arctan(vz_s / vy_s) * (180 / np.pi)
    return VRA

def calculate_HRA(vy0, ay, release_extension, vx0, ax):
    vy_s = -np.sqrt(vy0**2 - 2 * ay * (60.5 - release_extension - 50))
    t_s = (vy_s - vy0) / ay
    vx_s = vx0 - ax * t_s
    HRA = -np.arctan(vx_s / vy_s) * (180 / np.pi)
    return HRA

filtered_ff['VRA'] = filtered_ff.apply(lambda x: calculate_VRA(x['vy0'], x['ay'], x['release_extension'], x['vz0'], x['az']), axis=1)
filtered_ff['HRA'] = filtered_ff.apply(lambda x: calculate_HRA(x['vy0'], x['ay'], x['release_extension'], x['vx0'], x['ax']), axis=1)

In [9]:
filtered_ff['VRA'] = pd.to_numeric(filtered_ff['VRA'], errors='coerce')
filtered_ff['HRA'] = pd.to_numeric(filtered_ff['HRA'], errors='coerce')

In [10]:
grouped_pitchers = filtered_ff.groupby('pitcher').agg({
    'VRA': 'mean',
    'HRA': 'mean',
    'release_pos_z': 'mean',
    'release_pos_x': 'mean'
}).reset_index()


In [11]:
pitch_count = filtered_ff['pitcher'].value_counts()

filtered_ff_filtered = filtered_ff[filtered_ff['pitcher'].isin(pitch_count.index[pitch_count >= 75])]

In [12]:
pitcher_std = filtered_ff_filtered.groupby('pitcher')[['VRA', 'HRA', 'release_pos_z', 'release_pos_x']].std()

pitcher_std['player_name'] = filtered_ff_filtered.groupby('pitcher')['player_name'].first()

pitcher_std = pitcher_std.reset_index()

In [13]:
weights = {'VRA_degrees': 0.42, 'HRA_degrees': 0.27, 'release_pos_z': 0.17, 'release_pos_x': 0.14}

pitcher_std['VRA_percentile'] = 1 - pitcher_std['VRA'].rank(pct=True)
pitcher_std['HRA_percentile'] = 1 - pitcher_std['HRA'].rank(pct=True)
pitcher_std['release_pos_z_percentile'] = 1 - pitcher_std['release_pos_z'].rank(pct=True)
pitcher_std['release_pos_x_percentile'] = 1 - pitcher_std['release_pos_x'].rank(pct=True)

pitcher_std['weighted_tightness_metric'] = (
    pitcher_std['VRA_percentile'] * weights['VRA_degrees'] +
    pitcher_std['HRA_percentile'] * weights['HRA_degrees'] +
    pitcher_std['release_pos_z_percentile'] * weights['release_pos_z'] +
    pitcher_std['release_pos_x_percentile'] * weights['release_pos_x']
)

ranked_pitchers_weighted = pitcher_std.sort_values('weighted_tightness_metric', ascending=False)


In [14]:
ranked_pitchers_weighted.to_excel('kirby_index_5_01.xlsx')