# Pitcher summary

The point of this notebook is to start collecting the summary data on pitchers. This will then be used for future analysis/wrangling.

In [1]:
import pybaseball as pyb

import pandas as pd
import numpy as np
import requests
import time

from bs4 import BeautifulSoup

## Pitcher summary

A summary of all pitchers

In [85]:
import os

fg_pitching_df = None

for f in os.listdir('../data/fangraphs_pitchers'):
    year_df = pd.read_csv(f'../data/fangraphs_pitchers/{f}')
    year_df['year'] = int(f.replace('.csv', ''))
    if fg_pitching_df is None:
        fg_pitching_df = year_df
    else:
        fg_pitching_df = pd.concat([fg_pitching_df, year_df])

In [88]:
fg_pitching_df = fg_pitching_df.sort_values('year')

In [89]:
fg_pitching_df.head()

Unnamed: 0,Name,Team,W,L,SV,G,GS,IP,K/9,BB/9,...,LOB%,GB%,HR/FB,EV,ERA,FIP,xFIP,WAR,playerid,year
104,Kazuhiro Sasaki,Mariners,0,4,45,69,0,66.2,8.37,1.49,...,71.4%,,,,3.24,3.03,,1.6,1098,2001
392,Blaine Neal,Marlins,0,0,0,4,0,5.1,5.06,8.44,...,66.7%,,,,6.75,4.74,,0.0,1468,2001
393,Kevin Tolar,Tigers,0,0,0,9,0,10.2,9.28,10.97,...,60.0%,,,,6.75,4.64,,0.0,1009,2001
394,Rick Bauer,Orioles,0,5,0,6,6,33.0,4.36,2.45,...,65.3%,,,,4.64,5.75,,0.0,125,2001
395,Johnny Ruffin,Marlins,0,0,0,3,0,3.2,9.82,9.82,...,60.0%,,,,4.91,4.96,,0.0,1011295,2001


In [90]:
fg_pitching_df.tail()

Unnamed: 0,Name,Team,W,L,SV,G,GS,IP,K/9,BB/9,...,LOB%,GB%,HR/FB,EV,ERA,FIP,xFIP,WAR,playerid,year
381,Josh Lucas,Orioles,0,0,1,9,0,15.2,9.19,4.02,...,49.5%,45.7%,10.0%,88.5,5.74,4.17,5.05,0.1,11686,2019
380,Brian Schlitter,Athletics,0,0,0,6,0,9.2,5.59,3.72,...,75.0%,61.3%,0.0%,87.4,3.72,3.21,4.65,0.1,3599,2019
379,Buddy Boshers,Blue Jays,0,3,0,28,1,20.0,11.7,4.5,...,78.4%,45.1%,15.8%,88.8,4.05,4.21,4.15,0.1,8490,2019
385,Dan Altavilla,Mariners,2,1,0,17,0,14.2,11.05,7.36,...,61.2%,45.5%,7.7%,86.8,5.52,4.1,4.98,0.1,16507,2019
18,Sonny Gray,Reds,11,8,0,31,31,175.1,10.52,3.49,...,79.7%,50.8%,13.0%,88.1,2.87,3.42,3.65,4.4,12768,2019


In [92]:
fg_pitching_df.columns

Index(['Name', 'Team', 'W', 'L', 'SV', 'G', 'GS', 'IP', 'K/9', 'BB/9', 'HR/9',
       'BABIP', 'LOB%', 'GB%', 'HR/FB', 'EV', 'ERA', 'FIP', 'xFIP', 'WAR',
       'playerid', 'year'],
      dtype='object')

In [93]:
fg_pitching_df.groupby(['Name', 'playerid'])['Team'].unique()

Name          playerid
A.J. Achter   11387                                        [Twins, Angels]
A.J. Burnett  512         [Marlins, Blue Jays, Yankees, Pirates, Phillies]
A.J. Cole     11467                            [Nationals, - - -, Indians]
A.J. Griffin  11132                                   [Athletics, Rangers]
A.J. Minter   18655                                               [Braves]
                                                ...                       
Zack Littell  15823                                                [Twins]
Zack Segovia  8742                                   [Phillies, Nationals]
Zack Weiss    15444                                                 [Reds]
Zack Wheeler  10310                                                 [Mets]
Zeke Spruill  3873                                          [Diamondbacks]
Name: Team, Length: 3185, dtype: object

Get their first and last seasons, starting and ending age, and number of games played. Also record all the teams they played for, and how many this is.

In [95]:
pitchers_summary_df = fg_pitching_df.groupby(['Name', 'playerid']).agg({'year': [min, max], 
                                                              'G': sum, 'GS': sum})
pitchers_summary_df['teams'] = fg_pitching_df.groupby(['Name', 'playerid'])['Team'].unique()
pitchers_summary_df['num_teams'] = fg_pitching_df.groupby(['Name', 'playerid'])['Team'].nunique()
pitchers_summary_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,year,year,G,GS,teams,num_teams
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,sum,sum,Unnamed: 6_level_1,Unnamed: 7_level_1
Name,playerid,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
A.J. Achter,11387,2014,2016,45,0,"[Twins, Angels]",2
A.J. Burnett,512,2001,2015,415,410,"[Marlins, Blue Jays, Yankees, Pirates, Phillies]",5
A.J. Cole,11467,2015,2019,79,19,"[Nationals, - - -, Indians]",3
A.J. Griffin,11132,2012,2017,88,85,"[Athletics, Rangers]",2
A.J. Minter,18655,2017,2019,117,0,[Braves],1
A.J. Morris,9919,2016,2016,7,0,[Reds],1
A.J. Murray,3422,2007,2008,16,4,[Rangers],1
A.J. Puk,19343,2019,2019,10,0,[Athletics],1
A.J. Reed,16246,2019,2019,1,0,[White Sox],1
A.J. Schugel,11432,2015,2017,73,0,"[Diamondbacks, Pirates]",2


In [97]:
pitchers_summary_df.columns = ['first_season', 'last_season', 'games_played', 'games_started', 'teams', 'num_teams']

In [98]:
pitchers_summary_df = pitchers_summary_df.reset_index()

In [99]:
pitchers_summary_df.head()

Unnamed: 0,Name,playerid,first_season,last_season,games_played,games_started,teams,num_teams
0,A.J. Achter,11387,2014,2016,45,0,"[Twins, Angels]",2
1,A.J. Burnett,512,2001,2015,415,410,"[Marlins, Blue Jays, Yankees, Pirates, Phillies]",5
2,A.J. Cole,11467,2015,2019,79,19,"[Nationals, - - -, Indians]",3
3,A.J. Griffin,11132,2012,2017,88,85,"[Athletics, Rangers]",2
4,A.J. Minter,18655,2017,2019,117,0,[Braves],1


In [None]:
pitcher_keys = []
ten_pct_inc = int(pitchers_summary_df['Name'].nunique() / 10)

for i, name in enumerate(pitchers_summary_df['Name'].unique()):
    # Try and get their first and last name to search for. If this splits into more than
    # just two parts, record it and move on
    try:
        first, last = name.split(' ', 1)
        if '.' in first:
            first = first.replace('.', '. ')
            first = first.rstrip(' ')
    except Exception as e:
        row = [name] + [None]*4
        pitcher_keys.append(row)
        continue
        
    # If you get a first and last name, look them up. If this returns more than one player,
    # record it and move on. If not, get their data and 
    pitcher_data = pyb.playerid_lookup(last, first)
    if pitcher_data.shape[0] > 1:
        row = [name] + [None]*4
        pitcher_keys.append(row)
        continue
    else:
        try:
            row = [name] + list(pitcher_data[['key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs']].values[0])
        except Exception as e:
            row = [name] + [None]*4
        pitcher_keys.append(row)
        
    # Sleep for one second to avoid rate limiting
    time.sleep(1)
    
    if i % ten_pct_inc == 0:
        print(f'{10*i/ten_pct_inc}% complete')

Gathering player lookup table. This may take a moment.
0.0% complete
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a mome

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

In [55]:
pitchers_keys_df = pd.DataFrame(pitcher_keys, columns=['Name', 'key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs'])
pitchers_keys_df.head()

Unnamed: 0,Name,key_mlbam,key_retro,key_bbref,key_fangraphs
0,A.J. Burnett,150359.0,burna001,burnea.01,512.0
1,A.J. Griffin,456167.0,grifa002,griffaj01,11132.0
2,Aaron Cook,346871.0,cooka002,cookaa01,1571.0
3,Aaron Harang,421685.0,haraa001,haranaa01,1451.0
4,Aaron Nola,605400.0,nolaa001,nolaaa01,16149.0


In [56]:
pitchers_summary_df = pitchers_summary_df.merge(pitchers_keys_df, how='left', on='Name')

In [57]:
pitchers_summary_df.head()

Unnamed: 0,Name,IDfg,first_season,last_season,start_age,end_age,games_played,teams,num_teams,key_mlbam,key_retro,key_bbref,key_fangraphs
0,A.J. Burnett,512,2001,2015,24,38,370,"[Blue Jays, Marlins, Pirates, Yankees, Phillies]",5,150359.0,burna001,burnea.01,512.0
1,A.J. Griffin,11132,2013,2013,25,25,32,[Athletics],1,456167.0,grifa002,griffaj01,11132.0
2,Aaron Cook,1571,2006,2008,27,29,89,[Rockies],1,346871.0,cooka002,cookaa01,1571.0
3,Aaron Harang,1451,2005,2015,27,37,279,"[Reds, Braves, Dodgers, Padres, Phillies]",5,421685.0,haraa001,haranaa01,1451.0
4,Aaron Nola,16149,2017,2019,24,26,94,[Phillies],1,605400.0,nolaa001,nolaaa01,16149.0


In [58]:
pitchers_summary_df[pitchers_summary_df['key_mlbam'].isna()].shape[0]

39

39 pitchers couldn't be processed, so about 10%.

In [60]:
pitchers_summary_df[pitchers_summary_df['key_mlbam'].isna()]

Unnamed: 0,Name,IDfg,first_season,last_season,start_age,end_age,games_played,teams,num_teams,key_mlbam,key_retro,key_bbref,key_fangraphs
7,Adam Eaton,1029,2003,2004,25,26,64,[Padres],1,,,,
27,Bobby Jones,1039,2001,2001,31,31,33,[Padres],1,,,,
45,Brian Anderson,44,2003,2004,31,32,67,"[- - -, Royals]",2,,,,
56,Carlos Martinez,11682,2015,2017,23,25,94,[Cardinals],1,,,,
65,Chan Ho Park,1267,2001,2001,28,28,36,[Dodgers],1,,,,
66,Charlie Morton,4676,2011,2019,27,35,92,"[Rays, Astros, Pirates]",3,,,,
70,Chris Carpenter,1292,2001,2011,26,36,224,"[Cardinals, Blue Jays]",2,,,,
77,Chris Young,3196,2005,2014,26,35,122,"[Padres, Rangers, Mariners]",3,,,,
84,Cliff Lee,1636,2004,2013,25,34,284,"[- - -, Phillies, Indians]",3,,,,
105,Danys Baez,367,2002,2002,24,24,39,[Indians],1,,,,


It looks like at least one issue is players with first names who are initial are stored with a space between their initials.

In [249]:
pitcher_keys_initials = []

for name in pitchers_summary_df[pitchers_summary_df['key_mlbam'].isna()]['Name'].unique():
    try:
        first, last = name.split(' ')
    except Exception as e:
        row = [name] + [None]*4
        pitcher_keys_missing.append(row)
        continue
    if '.' in first:
        first = first.replace('.', '. ')
        first = first.rstrip(' ')
    else:
        continue

    # If you get a first and last name, look them up. If this returns more than one player,
    # record it and move on. If not, get their data and 
    pitcher_data = pyb.playerid_lookup(last, first)
    if pitcher_data.shape[0] > 1:
        row = [name] + [None]*4
        pitcher_keys_missing.append(row)
        continue
    else:
        try:
            row = [name] + list(pitcher_data[['key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs']].values[0])
        except Exception as e:
            row = [name] + [None]*4
        pitcher_keys_missing.append(row)

    # Sleep for one second to avoid rate limiting
    time.sleep(1)

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.


In [286]:
pitcher_keys_initials

[['A.J. Burnett', 150359, 'burna001', 'burnea.01', 512],
 ['A.J. Griffin', 456167, 'grifa002', 'griffaj01', 11132],
 ['C.J. Wilson', 450351, 'wilsc004', 'wilsocj01', 3580],
 ['Chan Ho Park', None, None, None, None],
 ['J.A. Happ', 457918, 'happj001', 'happja01', 7410],
 ['Jorge De La Rosa', None, None, None, None],
 ['R.A. Dickey', 285079, 'dickr001', 'dicker.01', 1245],
 ['Rubby de la Rosa', None, None, None, None],
 ['Tony Armas Jr.', None, None, None, None]]

In [287]:
pitchers_keys_df = pd.DataFrame(pitcher_keys_initials, columns=['Name', 'key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs'])
pitchers_keys_df = pitchers_keys_df.set_index('Name')
pitchers_summary_df = pitchers_summary_df.set_index('Name')
pitchers_summary_df.update(pitchers_keys_df)

In [295]:
pitchers_keys_df = pitchers_keys_df.reset_index()
pitchers_summary_df = pitchers_summary_df.reset_index()

In [296]:
pitchers_summary_df.head()

Unnamed: 0,Name,first_season,last_season,start_age,end_age,games_played,teams,num_teams,key_mlbam,key_retro,key_bbref,key_fangraphs
0,A.J. Burnett,2001,2015,24,38,370,"[Blue Jays, Marlins, Pirates, Yankees, Phillies]",5,150359.0,burna001,burnea.01,512.0
1,A.J. Griffin,2013,2013,25,25,32,[Athletics],1,456167.0,grifa002,griffaj01,11132.0
2,Aaron Cook,2006,2008,27,29,89,[Rockies],1,346871.0,cooka002,cookaa01,1571.0
3,Aaron Harang,2005,2015,27,37,279,"[Reds, Braves, Dodgers, Padres, Phillies]",5,421685.0,haraa001,haranaa01,1451.0
4,Aaron Nola,2017,2019,24,26,94,[Phillies],1,605400.0,nolaa001,nolaaa01,16149.0


In [289]:
pitchers_summary_df[pitchers_summary_df['key_mlbam'].isna()].shape[0]

41

Only fixed five pitchers.

In [290]:
pyb.playerid_lookup('De La Rosa')

Gathering player lookup table. This may take a moment.


Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last
0,de la rosa,dane,451773,delad001,delarda01,10095,2011.0,2014.0
1,de la rosa,eury,545001,delae002,delareu01,4055,2013.0,2014.0
2,de la rosa,francisco,113227,delaf001,dela_fr01,1003165,1991.0,1991.0
3,de la rosa,jesus,113228,delaj101,dela_je01,1003166,1975.0,1975.0
4,de la rosa,jorge,407822,delaj001,rosajo01,2047,2004.0,2018.0
5,de la rosa,rubby,523989,delar003,delarru01,3862,2011.0,2017.0
6,de la rosa,tomas,150401,delat001,delarto01,7227,2000.0,2006.0


Next we'll do pitchers with multiple spaces in their name. It looks like usually the first word after splitting is the first name, and everything else should be grouped into the last name.

In [308]:
pitcher_keys_spaces = []

for name in pitchers_summary_df[pitchers_summary_df['key_mlbam'].isna()]['Name'].unique():
    try:
        first, last = name.split(' ', 1)
    except Exception as e:
        row = [name] + [None]*4
        pitcher_keys_spaces.append(row)
        continue

    # If you get a first and last name, look them up. If this returns more than one player,
    # record it and move on. If not, get their data and 
    pitcher_data = pyb.playerid_lookup(last, first)
    if pitcher_data.shape[0] > 1:
        row = [name] + [None]*4
        pitcher_keys_spaces.append(row)
        continue
    else:
        try:
            row = [name] + list(pitcher_data[['key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs']].values[0])
        except Exception as e:
            row = [name] + [None]*4
        pitcher_keys_spaces.append(row)

    # Sleep for one second to avoid rate limiting
    time.sleep(1)

Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering player lookup table. This may take a moment.
Gathering 

In [309]:
pitcher_keys_spaces

[['Adam Eaton', None, None, None, None],
 ['Bobby Jones', None, None, None, None],
 ['Brian Anderson', None, None, None, None],
 ['Carlos Martinez', None, None, None, None],
 ['Chan Ho Park', None, None, None, None],
 ['Charlie Morton', None, None, None, None],
 ['Chris Carpenter', None, None, None, None],
 ['Chris Young', None, None, None, None],
 ['Cliff Lee', None, None, None, None],
 ['Danys Baez', None, None, None, None],
 ['Doug Davis', None, None, None, None],
 ['Eduardo Rodriguez', None, None, None, None],
 ['Erasmo Ramirez', None, None, None, None],
 ['Freddy Garcia', None, None, None, None],
 ['Greg Smith', None, None, None, None],
 ['Hyun-Jin Ryu', None, None, None, None],
 ['Jae Seo', None, None, None, None],
 ['James McDonald', None, None, None, None],
 ['Jason Johnson', None, None, None, None],
 ["Jeff D'Amico", None, None, None, None],
 ['John Patterson', None, None, None, None],
 ['Jon Niese', None, None, None, None],
 ['Jorge De La Rosa', 407822, 'delaj001', 'rosajo01'

In [310]:
pitchers_keys_df = pd.DataFrame(pitcher_keys_spaces, columns=['Name', 'key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs'])
pitchers_keys_df = pitchers_keys_df.set_index('Name')
pitchers_summary_df = pitchers_summary_df.set_index('Name')
pitchers_summary_df.update(pitchers_keys_df)

In [311]:
pitchers_keys_df = pitchers_keys_df.reset_index()
pitchers_summary_df = pitchers_summary_df.reset_index()

In [312]:
pitchers_summary_df[pitchers_summary_df['key_mlbam'].isna()].shape[0]

39

Fixed two more. I'm guessing most of the remaining ones are just people who appear more than once, or (maybe) don't appear at all (is that possible? I assume BR should have everyone).

### Manual updates

Just going through and manually finding the remaining pitchers.

In [10]:
missing_pitchers = ['eatonad01', 'jonesbo04', 'anderbr02', 'martica04', 'parkch01', 
                    'mortoch02', 'carpech01', 'youngch03', 'leecl02', 'baezda01',
                   'davisdo02', 'rodried05', 'ramirer02', 'ramirer01', 'jonesbo03',
                   'martica03', 'rodried01', 'garcifr03', 'smithgr02', 'ryuhy01',
                   'seoja01', 'mcdonja03', 'johnsja02', "d'amije01", "d'amije02",
                   'pattejo02', 'niesejo01', 'contrjo01', 'fernajo02', 'fernajo04',
                   'brownke01', 'brownke03', 'hernali01', 'castilu02', 'boydma01',
                   'gonzami03', 'gonzami05', 'martipe02', 'martipe03', 'johnsra05',
                   'reedri01', 'hernaro01', 'carmofa01', 'delarru01', 'bakersc02',
                   'sparkst01', 'sparkst02', 'armasto02', 'williwo02', 'wheelza01']

In [11]:
missing_pitchers_df = pyb.playerid_reverse_lookup(missing_pitchers, key_type='bbref')

Gathering player lookup table. This may take a moment.


In [16]:
missing_pitchers_df['Name'] = missing_pitchers_df[['name_first', 'name_last']].agg(' '.join, axis=1)

In [19]:
missing_pitchers_df = missing_pitchers_df[missing_pitchers_df['mlb_played_last'] > 2000]

In [20]:
missing_pitchers_df.head()

Unnamed: 0,name_last,name_first,key_mlbam,key_retro,key_bbref,key_fangraphs,mlb_played_first,mlb_played_last,Name
0,anderson,brian,110230,andeb002,anderbr02,44,1993.0,2005.0,brian anderson
1,armas,tony,150393,armat002,armasto02,761,1999.0,2008.0,tony armas
2,báez,danys,276056,baezd001,baezda01,367,2001.0,2011.0,danys báez
3,baker,scott,435044,bakes002,bakersc02,6176,2005.0,2015.0,scott baker
4,boyd,matt,571510,boydm001,boydma01,15440,2015.0,2020.0,matt boyd


In [21]:
cols = ['Name', 'mlb_played_first', 'mlb_played_last', 'key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs']

missing_pitchers_df = missing_pitchers_df[cols]
missing_pitchers_df.columns = ['Name', 'first_season', 'last_season', 'key_mlbam', 'key_retro', 'key_bbref', 'key_fangraphs']
missing_pitchers_df.head()

Unnamed: 0,Name,first_season,last_season,key_mlbam,key_retro,key_bbref,key_fangraphs
0,brian anderson,1993.0,2005.0,110230,andeb002,anderbr02,44
1,tony armas,1999.0,2008.0,150393,armat002,armasto02,761
2,danys báez,2001.0,2011.0,276056,baezd001,baezda01,367
3,scott baker,2005.0,2015.0,435044,bakes002,bakersc02,6176
4,matt boyd,2015.0,2020.0,571510,boydm001,boydma01,15440


In [26]:
missing_pitchers_df['Name'] = missing_pitchers_df['Name'].str.title()
missing_pitchers_df['first_season'] = missing_pitchers_df['first_season'].astype(int)
missing_pitchers_df['last_season'] = missing_pitchers_df['last_season'].astype(int)

In [27]:
missing_pitchers_df.head()

Unnamed: 0,Name,first_season,last_season,key_mlbam,key_retro,key_bbref,key_fangraphs
0,Brian Anderson,1993,2005,110230,andeb002,anderbr02,44
1,Tony Armas,1999,2008,150393,armat002,armasto02,761
2,Danys Báez,2001,2011,276056,baezd001,baezda01,367
3,Scott Baker,2005,2015,435044,bakes002,bakersc02,6176
4,Matt Boyd,2015,2020,571510,boydm001,boydma01,15440


In [35]:
missing_pitchers_df = missing_pitchers_df.set_index(['Name', 'first_season', 'last_season'])
pitchers_summary_df = pitchers_summary_df.set_index(['Name', 'first_season', 'last_season'])
pitchers_summary_df.update(missing_pitchers_df)
pitchers_summary_df = pitchers_summary_df.reset_index()

In [42]:
missing_pitchers_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,key_mlbam,key_retro,key_bbref,key_fangraphs
Name,first_season,last_season,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Brian Anderson,1993,2005,110230,andeb002,anderbr02,44
Tony Armas,1999,2008,150393,armat002,armasto02,761
Danys Báez,2001,2011,276056,baezd001,baezda01,367
Scott Baker,2005,2015,435044,bakes002,bakersc02,6176
Matt Boyd,2015,2020,571510,boydm001,boydma01,15440
Kevin Brown,1986,2005,111554,browk001,brownke01,642
Chris Carpenter,1997,2012,112020,carpc002,carpech01,1292
Luis Castillo,2017,2020,622491,castl003,castilu02,15689
José Contreras,2003,2013,425747,contj002,contrjo01,1660
Jeff D'Amico,1996,2004,113026,damij001,d'amije01,871


In [39]:
pitchers_summary_df[pitchers_summary_df['key_mlbam'].isna()]

Unnamed: 0,Name,first_season,last_season,start_age,end_age,games_played,teams,num_teams,key_mlbam,key_retro,key_bbref,key_fangraphs
7,Adam Eaton,2003,2004,25,26,64,['Padres'],1,,,,
27,Bobby Jones,2001,2001,31,31,33,['Padres'],1,,,,
45,Brian Anderson,2003,2004,31,32,67,['- - -' 'Royals'],2,,,,
56,Carlos Martinez,2015,2017,23,25,94,['Cardinals'],1,,,,
65,Chan Ho Park,2001,2001,28,28,36,['Dodgers'],1,,,,
66,Charlie Morton,2011,2019,27,35,92,['Rays' 'Astros' 'Pirates'],3,,,,
70,Chris Carpenter,2001,2011,26,36,224,['Cardinals' 'Blue Jays'],2,,,,
77,Chris Young,2005,2014,26,35,122,['Padres' 'Rangers' 'Mariners'],3,,,,
84,Cliff Lee,2004,2013,25,34,284,['- - -' 'Phillies' 'Indians'],3,,,,
105,Danys Baez,2002,2002,24,24,39,['Indians'],1,,,,


In [37]:
pitchers_summary_df.to_csv('../data/pitchers_summary.csv', index=False)

## Fetch pitchers game-by-game data from BR

pybaseball doesn't seem to give you access to game-by-game stats for pitchers, which I need. So taking their code and modifying it to pull directly from BR. Note that this _does_ include ERA, but _doesn't_ include WHIP.

In [3]:
pitchers_summary_df = pd.read_csv('../data/pitchers_summary.csv')

In [4]:
pitchers_summary_df.head()

Unnamed: 0,Name,first_season,last_season,start_age,end_age,games_played,teams,num_teams,key_mlbam,key_retro,key_bbref,key_fangraphs
0,A.J. Burnett,2001,2015,24,38,370,['Blue Jays' 'Marlins' 'Pirates' 'Yankees' 'Ph...,5,150359.0,burna001,burnea.01,512.0
1,A.J. Griffin,2013,2013,25,25,32,['Athletics'],1,456167.0,grifa002,griffaj01,11132.0
2,Aaron Cook,2006,2008,27,29,89,['Rockies'],1,346871.0,cooka002,cookaa01,1571.0
3,Aaron Harang,2005,2015,27,37,279,['Reds' 'Braves' 'Dodgers' 'Padres' 'Phillies'],5,421685.0,haraa001,haranaa01,1451.0
4,Aaron Nola,2017,2019,24,26,94,['Phillies'],1,605400.0,nolaa001,nolaaa01,16149.0


In [314]:
def pitcher_bref(br_id, season):
    """
    Get season-level Pitching Statistics for Specific Team (from Baseball-Reference)
    ARGUMENTS:
    br_id : str : The BR unique identifier. You can get this from playerid_lookup in the key_bberf columns
    end_season : int : season you want data for (data is returned on a game-by-game basis)
    """

    url = f"https://www.baseball-reference.com/players/gl.fcgi?id={br_id}&t=p&year={season}"

    data = []
    headings = None
    stats_url = url
    response = requests.get(stats_url)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find_all('table', {'id': 'pitching_gamelogs'})[0]

    if headings is None:
        headings = [row.text.strip() for row in table.find_all('th')[1:50]]

    rows = table.find_all('tr')
    # Skip the last row, as this is a footer with only yearly summary data
    for row in rows[:-1]:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]
        cols = [col.replace('*', '').replace('#', '') for col in cols]  # Removes '*' and '#' from some names
        cols = [col for col in cols if 'Totals' not in col and 'NL teams' not in col and 'AL teams' not in col]  # Removes Team Totals and other rows
        cols.insert(2, int(season))
        data.append([ele for ele in cols[0:]])

    headings.insert(2, "Year")
    data = pd.DataFrame(data=data, columns=headings) # [:-5]  # -5 to remove Team Totals and other rows (didn't work in multi-year queries)
    data.columns = [x if x != '' else 'at' for x in data.columns]
    data = data.dropna()  # Removes Row of All Nones
    data.reset_index(drop=True, inplace=True)  # Fixes index issue (Index was named 'W" for some reason)
    
    return data

In [315]:
test_df = pitcher_bref('cookaa01', 2002)

In [316]:
test_df.head()

Unnamed: 0,Gcar,Gtm,Year,Date,Tm,at,Opp,Rslt,Inngs,Dec,...,GDP,SF,ROE,aLI,WPA,acLI,cWPA,RE24,Entered,Exited
0,1,116,2002.0,Aug 10,COL,,CHC,"L,1-15",6-7,,...,0,0,0,0.0,0.0,0.0,0.00%,0.14,6t --- 0 out d13,7t 3 out d14
1,2,121,2002.0,Aug 16,COL,@,ATL,"L,1-4",8-GF(8),,...,0,0,0,0.14,0.007,0.0,0.00%,0.51,8b --- 0 out d3,8b end d 3
2,3,125,2002.0,Aug 20,COL,,MON,"W,8-6",6-7,,...,0,0,0,0.51,-0.094,0.0,0.00%,-2.12,6t --- 0 out a5,7t 1-- 2 out a2
3,4,127,2002.0,Aug 22,COL,,MON,"W,14-6",6-6,H(1),...,0,0,0,1.17,0.054,0.01,0.00%,0.57,6t --- 0 out a3,6t 3 out a3
4,5,131,2002.0,Aug 26,COL,,SFG,"L,3-4",GS-7,,...,4,0,0,0.8,-0.098,0.0,0.00%,-0.6,1t start tie,7t 12- 0 out d2


In [320]:
test_df['Year'] = test_df['Year'].astype(int).astype(str)

In [323]:
test_df['Date'] = test_df[['Date', 'Year']].agg(' '.join, axis=1)

In [325]:
test_df = test_df.drop('Year', axis='columns')

In [326]:
test_df.head()

Unnamed: 0,Gcar,Gtm,Date,Tm,at,Opp,Rslt,Inngs,Dec,DR,...,GDP,SF,ROE,aLI,WPA,acLI,cWPA,RE24,Entered,Exited
0,1,116,Aug 10 2002,COL,,CHC,"L,1-15",6-7,,99,...,0,0,0,0.0,0.0,0.0,0.00%,0.14,6t --- 0 out d13,7t 3 out d14
1,2,121,Aug 16 2002,COL,@,ATL,"L,1-4",8-GF(8),,5,...,0,0,0,0.14,0.007,0.0,0.00%,0.51,8b --- 0 out d3,8b end d 3
2,3,125,Aug 20 2002,COL,,MON,"W,8-6",6-7,,3,...,0,0,0,0.51,-0.094,0.0,0.00%,-2.12,6t --- 0 out a5,7t 1-- 2 out a2
3,4,127,Aug 22 2002,COL,,MON,"W,14-6",6-6,H(1),1,...,0,0,0,1.17,0.054,0.01,0.00%,0.57,6t --- 0 out a3,6t 3 out a3
4,5,131,Aug 26 2002,COL,,SFG,"L,3-4",GS-7,,3,...,4,0,0,0.8,-0.098,0.0,0.00%,-0.6,1t start tie,7t 12- 0 out d2


In [327]:
home_team = []
for i in range(test_df.shape[0]):
    if test_df.loc[i, 'at'] == '@':
        home_team.append(test_df.loc[i, 'Opp'])
    else:
        home_team.append(test_df.loc[i, 'Tm'])

In [329]:
test_df['Home_Tm'] = home_team
test_df = test_df.drop('at', axis='columns')

In [330]:
test_df.head()

Unnamed: 0,Gcar,Gtm,Date,Tm,Opp,Rslt,Inngs,Dec,DR,IP,...,SF,ROE,aLI,WPA,acLI,cWPA,RE24,Entered,Exited,Home_Tm
0,1,116,Aug 10 2002,COL,CHC,"L,1-15",6-7,,99,2.0,...,0,0,0.0,0.0,0.0,0.00%,0.14,6t --- 0 out d13,7t 3 out d14,COL
1,2,121,Aug 16 2002,COL,ATL,"L,1-4",8-GF(8),,5,1.0,...,0,0,0.14,0.007,0.0,0.00%,0.51,8b --- 0 out d3,8b end d 3,ATL
2,3,125,Aug 20 2002,COL,MON,"W,8-6",6-7,,3,1.2,...,0,0,0.51,-0.094,0.0,0.00%,-2.12,6t --- 0 out a5,7t 1-- 2 out a2,COL
3,4,127,Aug 22 2002,COL,MON,"W,14-6",6-6,H(1),1,1.0,...,0,0,1.17,0.054,0.01,0.00%,0.57,6t --- 0 out a3,6t 3 out a3,COL
4,5,131,Aug 26 2002,COL,SFG,"L,3-4",GS-7,,3,6.0,...,0,0,0.8,-0.098,0.0,0.00%,-0.6,1t start tie,7t 12- 0 out d2,COL


In [347]:
def get_pitcher_game_stats(br_id, year):
    pitcher_df = pitcher_bref(br_id, year)
    pitcher_df['Year'] = pitcher_df['Year'].astype(int).astype(str)
    pitcher_df['Date'] = pitcher_df[['Date', 'Year']].agg(' '.join, axis=1)
    pitcher_df = pitcher_df.drop('Year', axis='columns')
    
    home_team = []
    for i in range(pitcher_df.shape[0]):
        if pitcher_df.loc[i, 'at'] == '@':
            home_team.append(pitcher_df.loc[i, 'Opp'])
        else:
            home_team.append(pitcher_df.loc[i, 'Tm'])
            
    pitcher_df['Home_Tm'] = home_team
    pitcher_df = pitcher_df.drop('at', axis='columns')
    
    pitcher_df['WHIP'] = (pitcher_df['BB'].astype(int) + pitcher_df['H'].astype(int)) / pitcher_df['IP'].astype(float)
    return pitcher_df

In [348]:
acook = get_pitcher_game_stats('cookaa01', 2002)

In [349]:
acook

Unnamed: 0,Gcar,Gtm,Date,Tm,Opp,Rslt,Inngs,Dec,DR,IP,...,ROE,aLI,WPA,acLI,cWPA,RE24,Entered,Exited,Home_Tm,WHIP
0,1,116,Aug 10 2002,COL,CHC,"L,1-15",6-7,,99,2.0,...,0,0.0,0.0,0.0,0.00%,0.14,6t --- 0 out d13,7t 3 out d14,COL,1.0
1,2,121,Aug 16 2002,COL,ATL,"L,1-4",8-GF(8),,5,1.0,...,0,0.14,0.007,0.0,0.00%,0.51,8b --- 0 out d3,8b end d 3,ATL,0.0
2,3,125,Aug 20 2002,COL,MON,"W,8-6",6-7,,3,1.2,...,0,0.51,-0.094,0.0,0.00%,-2.12,6t --- 0 out a5,7t 1-- 2 out a2,COL,3.333333
3,4,127,Aug 22 2002,COL,MON,"W,14-6",6-6,H(1),1,1.0,...,0,1.17,0.054,0.01,0.00%,0.57,6t --- 0 out a3,6t 3 out a3,COL,1.0
4,5,131,Aug 26 2002,COL,SFG,"L,3-4",GS-7,,3,6.0,...,0,0.8,-0.098,0.0,0.00%,-0.6,1t start tie,7t 12- 0 out d2,COL,2.166667
5,6,136,Aug 31 2002,COL,SDP,"L,0-3",GS-7,L(0-1),4,7.0,...,0,1.0,-0.08,0.0,0.00%,0.12,1b start tie,7b 3 out d3,SDP,1.285714
6,7,141,Sep 6 2002,COL,SDP,"W,7-3",GS-7,W(1-1),5,7.0,...,0,0.67,0.272,0.0,0.00%,2.99,1t start tie,7t 3 out a6,COL,1.0
7,8,146,Sep 11 2002,COL,HOU,"W,8-6",GS-6,W(2-1),4,6.0,...,0,0.9,0.076,0.0,0.00%,0.16,1b start tie,6b 3 out a3,HOU,1.333333
8,9,151,Sep 17 2002,COL,STL,"L,4-11",GS-4,,5,4.0,...,0,0.76,-0.187,0.0,0.00%,-1.72,1t start tie,4t 3 out d3,COL,2.5


In [350]:
acook.to_csv('aaron_cook.csv', index=False)

Examples of this data can be seen [here](https://www.baseball-reference.com/players/gl.fcgi?id=cookaa01&t=p&year=2002).

Descriptions of columns:
- Gcar -- Career Game Number for Player
- Gtm -- Season Game Number for Team. Number in parentheses indicates number of team games the player did not play in from one appearance to next.
- Date -- A number in parentheses indicates which game of a doubleheader.
- Rslt -- Game Result for Team. W - Win, L - Loss, T - Tie (for a suspended game)
- Inngs -- Innings Played by Player
    - CG - Complete Game started and finished
    - GS-# - Game Started to what inning
    - #-GF, Inning entered to end of game
    - #-# - Inning Entered to Inning Left
    - (#) Game did not go 9 innings (only shown when player finished the game).
    - For pitchers, an SHO means they shutout the opposition. A zero for the innings means the innings played is unknown.
- Dec -- Decision, Save, or Hold
    - W - Win (pitcher record after game)
    - L - Loss (pitcher record after game)
    - BW - Blown Save and Win (pitcher record after game)
    - BL - Blown Save and Loss (pitcher record after game)
    - S - Save (pitcher saves thus far)
    - BSv - Blown Save (pitcher blown saves thus far)
    - H - Hold (pitcher holds thus far)
- DR -- Days Rest. Number or days since their previous appearance. 99 if start of season or 99 or more days (may include demotions). -1 if pitching both games of double-header.
- IP -- Innings Pitched
- H -- Hits/Hits Allowed
- R -- Runs Scored/Allowed
- ER -- Earned Runs Allowed
- BB -- Bases on Balls/Walks
- SO -- Strikeouts
- HR -- Home Runs Hit/Allowed
- HBP -- Times Hit by a Pitch.
- ERA -- 9 * ER / IP. For recent years, leaders need 1 IP per team game played.
- BF -- Batters Faced
- Pit -- Number of pitches in the PA.
- Str -- Strikes. Includes both pitches in the zone and those swung at out of the zone.
- StL -- Strikes Looking. Strikes called by the umpire.
- StS -- Strikes Swinging. Strikes due to a swing and a miss.
- GB -- Ground Balls. Includes bunts and all other ground balls.
- FB -- Fly Balls. Includes Fly Balls, Line Drives, and Pop-Ups.
- LD -- Line Drives. These are double-counted in Fly Balls as well.
- PU -- Pop Ups. Generally, high fly balls that land within the infield circle. These are double-counted in Fly Balls as well.
- Unk -- Unknown batted ball type. A ball in play for which we don’t know the type.
- GSc -- Game Score. Developed by Bill James
    1. Start with 50 points.
    2. Add 1 point for each out recorded, so 3 points for every complete inning pitched.
    3. Add 2 points for each inning completed after the 4th.
    4. Add 1 point for each strikeout.
    5. Subtract 2 points for each hit allowed.
    6. Subtract 4 points for each earned run allowed.
    7. Subtract 2 points for each unearned run allowed.
    8. Subtract 1 point for each walk.
- IR -- Inherited Runners. Number of runners on base when pitcher entered the game.
- IS -- Inherited Score. Number or percentage of runners on base when pitcher entered the game who subsequently scored. These runners show up in the previous pitcher’s ERA.
- SB -- Stolen Bases
- CS -- Caught Stealing
- PO -- Pickoffs. Runner picked off a base. May include cases they were safe on an error. Also includes Pickoff Caught Stealing plays.
- AB -- At Bats
- 2B -- Doubles Hit/Allowed
- 3B -- Triples Hit/Allowed
- IBB -- Intentional Bases on Balls
- GDP -- Double Plays Grounded Into. Only includes standard 6-4-3, 4-3, etc. double plays. For gamelogs only in seasons we have play-by-play, we include triple plays as well. All official seasonal totals do not include GITP's.
- SF -- Sacrifice Flies
- ROE -- Reached On Error. Times a batter reached due to an error. DOES NOT include a fielder’s choice where no out was recorded.
- aLI -- Average Leverage Index. The average pressure the pitcher or batter saw in this game or season. 1.0 is average pressure, below 1.0 is low pressure and above 1.0 is high pressure.
- WPA -- Win Probability Added by Pitcher. Given average teams, this is the change in probability. A change of +/- 1 would indicate one win added or lost.
- acLI -- Average Championship Leverage Index. The average pressure the pitcher or batter saw in this game or season. 1.0 is average pressure, below 1.0 is low pressure and above 1.0 is high pressure.
- cWPA -- Championship Win Probability Added by Pitcher. Given average teams, this is the change in probability, displayed in percentage points. A change of +/- 100% would indicate one world series win added or lost.
- RE24 -- Base-Out Runs Saved. Given the bases occupied/out situation, how many runs did the pitcher save in the resulting play. Compared to average, so 0 is average, and above 0 is better than average
- Entered -- The situation when pitcher entered game. 
    - Inning top or bottom: 8b (bottom of 8th) 
    - bases occupied or start of inning: ’---’ (bases empty) 
    - score from pitching team’s perspective 
        - ahead/down and runs or tie: a4 (ahead by 4 runs) 
- Exited -- The situation when pitcher exited game
    - Inning top or bottom: 4t (top of 4th)
    - bases occupied, 3 outs, or end of game: ’123’ (bases loaded)
    - score from pitching team’s perspective
        - ahead/down and runs or tie: d2 (down by 2 runs)