In [44]:
import pandas as pd
import numpy as np

In [107]:
def pitcher_abilities(year):
    df = pd.read_csv(f'Pitcher Abilities Data {year}.csv', index_col=False).drop('Unnamed: 16', axis=1)
    
    # Power Pitcher FaB%
    fb_percent_mean = df['n_fastball_formatted'].mean()
    fb_percent_std = df['n_fastball_formatted'].std()
    df['ZFBPercent'] = (df['n_fastball_formatted'] - fb_percent_mean) / fb_percent_std
    df['PowerPitcher'] = False
    df.loc[df['ZFBPercent'] >= 1, 'PowerPitcher'] = True
    
    # Breaking Pitcher BrB%
    bb_percent_mean = df['n_breaking_formatted'].mean()
    bb_percent_std = df['n_breaking_formatted'].std()
    df['ZBBPercent'] = (df['n_breaking_formatted'] - bb_percent_mean) / bb_percent_std
    df['BreakingPitcher'] = False
    df.loc[df['ZBBPercent'] >= 2.25, 'BreakingPitcher'] = True
    
    # Dr. K                       K %
    k_percent_mean = df['p_k_percent'].mean()
    k_percent_std = df['p_k_percent'].std()
    df['ZKPercent'] = (df['p_k_percent'] - k_percent_mean) / k_percent_std
    df['DrK'] = False
    df.loc[df['ZKPercent'] >= 1.5, 'DrK'] = True
    
    # Fastball Life               Fastball Spin
    fb_spin_mean = df['fastball_avg_spin'].mean()
    fb_spin_std = df['fastball_avg_spin'].std()
    df['ZFBSpin'] = (df['fastball_avg_spin'] - fb_spin_mean) / fb_spin_std
    df['FastballLife4'] = False
    df['FastballLife2'] = False
    df.loc[df['ZFBSpin'] >= 0.75, 'FastballLife4'] = True
    df.loc[df['ZFBSpin'] <= -0.75, 'FastballLife2'] = True
    
    # Groundball Pitcher  GB%
    gb_percent_mean = df['groundballs_percent'].mean()
    gb_percent_std = df['groundballs_percent'].std()
    df['ZGBPercent'] = (df['groundballs_percent'] - gb_percent_mean) / gb_percent_std
    df['GroundballPitcher'] = False
    df.loc[df['ZGBPercent'] >= 1.25, 'GroundballPitcher'] = True
    
    # Flyball Pitcher  Fly%
    fly_percent_mean = df['flyballs_percent'].mean()
    fly_percent_std = df['flyballs_percent'].std()
    df['ZFlyPercent'] = (df['flyballs_percent'] - fly_percent_mean) / fly_percent_std
    df['FlyballPitcher'] = False
    df.loc[df['ZFlyPercent'] >= 1.25, 'FlyballPitcher'] = True
    
    for pitcher in df.loc[(df['GroundballPitcher']) & (df['FlyballPitcher'])].index:
        df.loc[pitcher, 'GroundballPitcher'] = False
        df.loc[pitcher, 'FlyballPitcher'] = False
        
    # Gyroball                    4-Seam Spin
    ff_spin_mean = df['ff_avg_spin'].mean()
    ff_spin_std = df['ff_avg_spin'].std()
    df['ZFFSpin'] = (df['ff_avg_spin'] - ff_spin_mean) / ff_spin_std
    df['Gyroball'] = False
    df.loc[df['ZFFSpin'] >= 2.5, 'Gyroball'] = True
    
    # Shuuto Spin                 FaB H Movement
    fb_x_mean = df['fastball_avg_break_x'].mean()
    fb_x_std = df['fastball_avg_break_x'].std()
    df['ZFBX'] = (df['fastball_avg_break_x'] - fb_x_mean) / fb_x_std
    df['ShuutoSpin'] = False
    df.loc[df['ZFBX'] >= 2.25, 'ShuutoSpin'] = True
    
    # Spin                        BB Avg Spin
    bb_spin_mean = df['breaking_avg_spin'].mean()
    bb_spin_std = df['breaking_avg_spin'].std()
    df['ZBBSpin'] = (df['breaking_avg_spin'] - bb_spin_mean) / bb_spin_std
    df['Spin4'] = False
    df['Spin2'] = False
    df.loc[df['ZBBSpin'] >= 1.5, 'Spin4'] = True
    df.loc[df['ZBBSpin'] <= -2.25, 'Spin2'] = True
    
    # Walk                        BB %
    bob_percent_mean = df['p_bb_percent'].mean()
    bob_percent_std = df['p_bb_percent'].std()
    df['ZBoBPercent'] = (df['p_bb_percent'] - bob_percent_mean) / bob_percent_std
    df['Walk'] = False
    df.loc[df['ZBoBPercent'] >= 1.5, 'Walk'] = True
    
    # Lucky                        W/L
    df['WinPercent'] = df['p_win'] / (df['p_win'] + df['p_loss'])
    df['Decisions'] = df['p_win'] + df['p_loss']
    
    decision_mean = df['Decisions'].mean()
    decision_std = df['Decisions'].std()
    df['ZDecisions'] = (df['Decisions'] - decision_mean) / decision_std
    
    qualified_df = df.copy()
    qualified_df = qualified_df[qualified_df['ZDecisions'] >= 1]

    wp_mean = qualified_df['WinPercent'].mean(skipna=True)
    wp_std = qualified_df['WinPercent'].std(skipna=True)
    df['ZWinPercent'] = (df['WinPercent'] - wp_mean) / wp_std
        
    df['Lucky'] = False
    df['Unlucky'] = False
    df.loc[(df['ZWinPercent'] >= 1.25) & (df['ZDecisions'] >= 1), 'Lucky'] = True
    df.loc[(df['ZWinPercent'] <= -1.5) & (df['ZDecisions'] >= 1), 'Unlucky'] = True
    
    print(df.columns)
    
    columns = ['player_id', 'PowerPitcher', 'BreakingPitcher', 'DrK', 'FastballLife4', 'FastballLife2', 'GroundballPitcher', 'FlyballPitcher', 'Gyroball', 'ShuutoSpin', 'Spin4', 'Spin2', 'Walk', 'Lucky', 'Unlucky']

#     for col in columns:
#         if col == 'player_id':
#             continue
#         print(col, len(df[df[col]]))
        
    return df[columns]

pa_df = pitcher_abilities(2022)
pa_df.head(20)

Index(['last_name', ' first_name', 'player_id', 'year', 'p_k_percent',
       'p_bb_percent', 'p_win', 'p_loss', 'groundballs_percent',
       'flyballs_percent', 'ff_avg_spin', 'n_fastball_formatted',
       'fastball_avg_spin', 'fastball_avg_break_x', 'n_breaking_formatted',
       'breaking_avg_spin', 'ZFBPercent', 'PowerPitcher', 'ZBBPercent',
       'BreakingPitcher', 'ZKPercent', 'DrK', 'ZFBSpin', 'FastballLife4',
       'FastballLife2', 'ZGBPercent', 'GroundballPitcher', 'ZFlyPercent',
       'FlyballPitcher', 'ZFFSpin', 'Gyroball', 'ZFBX', 'ShuutoSpin',
       'ZBBSpin', 'Spin4', 'Spin2', 'ZBoBPercent', 'Walk', 'WinPercent',
       'Decisions', 'ZDecisions', 'ZWinPercent', 'Lucky', 'Unlucky'],
      dtype='object')


Unnamed: 0,player_id,PowerPitcher,BreakingPitcher,DrK,FastballLife4,FastballLife2,GroundballPitcher,FlyballPitcher,Gyroball,ShuutoSpin,Spin4,Spin2,Walk,Lucky,Unlucky
0,425794,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,425844,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,434378,False,False,False,True,False,False,False,False,False,False,False,False,True,False
3,434671,False,False,False,False,True,False,False,False,False,False,False,False,False,False
4,445276,True,False,True,True,False,False,True,False,False,False,False,False,False,False
5,445926,True,False,False,False,False,False,False,False,False,False,False,False,False,False
6,446321,True,False,False,False,False,False,False,False,False,False,False,False,False,False
7,446372,False,False,False,False,False,False,False,False,False,False,False,False,False,False
8,448179,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,450203,False,False,False,True,False,False,False,False,False,True,False,False,False,False


In [108]:
pa_df.to_csv("2022_Pitcher_Ability.csv")