In [8]:
import re

import numpy as np
import pandas as pd

LEAGUE_LOB_PCT = {2021: 0.721, 2022: 0.7257, 2023: 0.7188}
LEAGUE_BABIP = {2021: 0.296522719, 2022: 0.290404678, 2023: 0.291664854}

def load_leaderboard(fp: str) -> pd.DataFrame:
    df = (pd.read_csv(fp)
          .drop(['Name', 'Team', 'MLBAMID'], axis=1)
          .rename({'NameASCII': 'PlayerName', 'PlayerId': 'PlayerID'}, axis=1)
          .set_index('PlayerName')
          .reset_index())
    df['IP'] = df['IP'].round() + (df['IP'] - df['IP'].round()).div(0.3)
    df['aWAR'] = df[['WAR', 'RA9-WAR']].mean(axis=1)
    df[['Stuff+', 'Location+', 'Pitching+']] = df[['Stuff+', 'Location+', 'Pitching+']].fillna(0.0)
    if '-' in fp:
        df = df.set_index('PlayerID')
    else:
        season = int(re.search('[0-9]+', fp)[0])
        df['Season'] = season
        df['LOB%+'] = df['LOB%+'].fillna(df['LOB%'].apply(lambda x: np.round(x * 100.0 / LEAGUE_LOB_PCT[season])))
        df['BABIP+'] = df['BABIP+'].fillna(df['BABIP'].apply(lambda x: np.round(x * 100.0 / LEAGUE_BABIP[season])))
        df = df.set_index(['Season', 'PlayerID'])
    
    return df

def display_leaderboard(df: pd.DataFrame, *args: str) -> pd.DataFrame:
    cdf = df.copy()
    int_cols = [col for col in cdf if col[-1] in ['+', '-']]
    single_digit_cols = ['LA', 'WAR', 'RA9-WAR', 'aWAR']
    double_digit_cols = ['ERA', 'xERA', 'FIP', 'xFIP', 'SIERA']
    pct_cols = [col for col in cdf if col.endswith('%') or col == 'HR/FB']
    
    cdf[int_cols] = cdf[int_cols].round().astype(int)
    cdf[single_digit_cols] = cdf[single_digit_cols].round(1)
    cdf[double_digit_cols] = cdf[double_digit_cols].round(2)
    cdf[pct_cols] = cdf[pct_cols].mul(100).round(1).astype(str) + '%'
    cdf['IP'] = cdf['IP'].astype(int) + (cdf['IP'] - cdf['IP'].astype(int)).mul(0.3)
    return cdf[['PlayerName'] + list(args)] if len(args) > 0 else cdf

pd.set_option('display.max_columns', None)

sp2021 = load_leaderboard('sp2021.csv')
sp2022 = load_leaderboard('sp2022.csv')
sp2023 = load_leaderboard('sp2023.csv')
sp = pd.concat([sp2021, sp2022, sp2023]).swaplevel().sort_index()

display_leaderboard(sp2023, 'IP', 'SwStr%', 'K-BB%', 'xERA', 'SIERA', 'FIP-', 'xFIP-', 'Stuff+', 'Pitching+', 'WAR', 'aWAR').query('IP >= 50.0').sort_values('Stuff+', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,PlayerName,IP,SwStr%,K-BB%,xERA,SIERA,FIP-,xFIP-,Stuff+,Pitching+,WAR,aWAR
Season,PlayerID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023,27552,Graham Ashcraft,145.2,9.6%,9.5%,5.00,4.80,110,104,128,103,1.5,1.9
2023,19361,Corbin Burnes,193.2,12.2%,17.1%,3.40,4.02,87,87,127,107,3.4,4.1
2023,24586,Kyle Bradish,168.2,11.0%,18.5%,3.82,3.76,78,82,126,105,3.8,4.6
2023,27498,Spencer Strider,186.2,18.9%,29.2%,3.09,2.86,66,67,125,112,5.5,4.6
2023,22182,Hunter Greene,112.0,13.4%,20.9%,3.82,3.74,92,91,124,107,2.0,1.6
2023,...,...,...,...,...,...,...,...,...,...,...,...,...
2023,14361,Ty Blach,64.2,7.7%,8.0%,,5.17,124,116,75,87,0.2,0.6
2023,6902,Martin Perez,108.1,7.0%,5.9%,,5.43,120,123,73,94,0.3,0.4
2023,16256,Kyle Freeland,155.2,7.8%,7.7%,5.74,5.24,115,117,70,94,1.2,1.6
2023,27472,Jared Shuster,52.2,9.0%,1.7%,5.44,6.12,125,140,64,87,0.1,0.1


In [2]:
weighted_tbf = pd.DataFrame(data={'WeightedTBF': sp.reset_index()[['Season', 'TBF']].apply(lambda x: x['Season'] // 2020 * x['TBF'], axis=1).tolist()}, index=sp.index)

display_leaderboard(pd.concat([sp.reset_index(level='Season')['PlayerName'].drop_duplicates(), sp.loc[:, 'G':'TBF'].groupby('PlayerID').sum(), sp.loc[:, 'O-Swing%':'Pitching+'].mul(weighted_tbf.squeeze(), axis=0).groupby('PlayerID').sum().div(weighted_tbf.groupby('PlayerID').sum().squeeze(), axis=0), sp.loc[:, 'WAR':].mul(pd.Series([1, 2, 3], index=[2021, 2022, 2023]), axis='index', level=1).groupby('PlayerID').sum().div(pd.DataFrame([(pk, year % 2020) for pk, year in sp.index], columns=['PlayerID', 'Weight']).groupby('PlayerID')['Weight'].sum(), axis=0)], axis=1), 'IP', 'SwStr%', 'K-BB%', 'xERA', 'SIERA', 'FIP-', 'xFIP-', 'Stuff+', 'Pitching+', 'WAR', 'aWAR').query('IP >= 50.0').sort_values('WAR', ascending=False)

Unnamed: 0_level_0,PlayerName,IP,SwStr%,K-BB%,xERA,SIERA,FIP-,xFIP-,Stuff+,Pitching+,WAR,aWAR
PlayerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
10310,Zack Wheeler,558.1,12.4%,22.4%,3.01,3.31,66,74,112,110,5.6,5.3
14107,Kevin Gausman,551.2,14.6%,23.7%,3.58,3.24,67,74,108,107,5.3,4.8
27498,Spencer Strider,293.3,17.6%,29.6%,1.97,2.69,60,63,127,111,5.0,4.2
16149,Aaron Nola,579.1,12.4%,23.3%,3.28,3.27,77,77,105,107,4.8,4.1
13125,Gerrit Cole,591.0,13.4%,24.9%,3.30,3.12,76,74,126,111,4.6,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...
25386,Jhony Brito,52.2,8.6%,7.3%,0.00,5.26,146,126,93,104,-0.5,-0.5
20132,Adam Oller,68.1,9.0%,3.5%,0.00,5.77,179,153,89,94,-0.5,-0.6
10811,Mike Foltynewicz,130.0,8.0%,10.5%,0.00,4.94,139,120,95,99,-0.6,-0.1
20253,Griffin Jax,69.1,10.2%,9.9%,0.00,5.17,147,134,95,101,-0.6,-0.6
