## Which teams have had the largest platoon advantages (best and worst)? 
### For each of hitters and pitchers

In [1]:
import pandas as pd
import numpy as np
import boxball_loader as bbl

In [2]:
cols = ['bat_team_id', 'fld_team_id', 'bat_id', 'pit_id', 'bat_event_fl', 'bat_hand_cd', 'pit_hand_cd', 'event_cd', 'game_id']
pa = bbl.load_event_data(1920, 2019, cols)
pa


Unnamed: 0,game_id,date,game_type,bat_id,bat_event_fl,pit_id,event_cd,ab_fl,fld_team_id,pit_hand_cd,bat_team_id,bat_hand_cd,tb_ct,h_fl,ob_fl,yr
0,CLE192004140,1920-04-14,RS,austj101,True,coves101,3,True,CLE,R,SLA,L,0,0,0,1920
1,CLE192004140,1920-04-14,RS,gedej101,True,coves101,21,True,CLE,R,SLA,R,2,1,1,1920
2,CLE192004140,1920-04-14,RS,tobij101,True,coves101,2,True,CLE,R,SLA,L,0,0,0,1920
3,CLE192004140,1920-04-14,RS,sislg101,True,coves101,2,True,CLE,R,SLA,L,0,0,0,1920
4,CLE192004140,1920-04-14,RS,granj104,True,sotha101,16,False,SLA,R,CLE,L,0,0,1,1920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13917197,ANA201909290,2019-09-29,RS,stram002,True,garcl005,3,True,ANA,R,HOU,R,0,0,0,2019
13917198,ANA201909290,2019-09-29,RS,lastt001,True,devec001,2,True,HOU,R,ANA,L,0,0,0,2019
13917199,ANA201909290,2019-09-29,RS,fletd002,True,devec001,20,True,HOU,R,ANA,R,1,1,1,2019
13917200,ANA201909290,2019-09-29,RS,goodb001,True,devec001,2,True,HOU,R,ANA,L,0,0,0,2019


In [3]:
# For each PA, classify whether the batter has the platoon advantage
# Then we can aggregate this column by pitching team and batting team
pa['platoon_adv_bat'] = (pa.bat_hand_cd != pa.pit_hand_cd)
pa['platoon_adv_bat'].value_counts() # batters have the advantage slightly more often

True     7130536
False    6215882
Name: platoon_adv_bat, dtype: int64

In [4]:
# Split each PA into two rows: one for the batting team and one for the fielding team (use melt to do this)
cols = ['yr', 'fld_team_id', 'bat_team_id', 'platoon_adv_bat']
pa_split = pa[cols].melt(id_vars=['yr', 'platoon_adv_bat'], value_vars=['bat_team_id', 'fld_team_id'], var_name='role', value_name='team')

# Now group by team/season/role
tm_counts = pa_split.groupby(['yr', 'team', 'role', 'platoon_adv_bat'])['platoon_adv_bat'].count().unstack()
tm_counts


Unnamed: 0_level_0,Unnamed: 1_level_0,platoon_adv_bat,False,True
yr,team,role,Unnamed: 3_level_1,Unnamed: 4_level_1
1920,BOS,bat_team_id,1860,1378
1920,BOS,fld_team_id,1607,1660
1920,BRO,bat_team_id,2997,2231
1920,BRO,fld_team_id,2418,2725
1920,BSN,bat_team_id,1742,2417
...,...,...,...,...
2019,TEX,fld_team_id,2880,3474
2019,TOR,bat_team_id,2713,3378
2019,TOR,fld_team_id,3125,3188
2019,WAS,bat_team_id,3455,2812


In [5]:
# Compute the frequencies
tm_adv_freq = (tm_counts[True]/tm_counts.sum(axis=1)).unstack()
tm_adv_freq.nlargest(50, 'bat_team_id')


Unnamed: 0_level_0,role,bat_team_id,fld_team_id
yr,team,Unnamed: 2_level_1,Unnamed: 3_level_1
1986,SLN,0.85887,0.625969
1985,SLN,0.831608,0.542493
1987,SLN,0.800127,0.646946
1980,MIN,0.776709,0.609085
1969,NYA,0.776461,0.561362
1985,HOU,0.775517,0.564915
1979,MIN,0.774019,0.550918
1993,NYN,0.770348,0.61681
1978,MIN,0.769048,0.544298
1976,SLN,0.765933,0.555377


In [6]:
# Teams to get the platoon advantage the *least* while pitching
# Lefty-heavy staffs
tm_adv_freq.nlargest(50, 'fld_team_id')

Unnamed: 0_level_0,role,bat_team_id,fld_team_id
yr,team,Unnamed: 2_level_1,Unnamed: 3_level_1
1988,CIN,0.537193,0.693203
1983,NYA,0.659181,0.68824
1990,PHI,0.608487,0.685892
1987,PHI,0.53441,0.684757
1991,CAL,0.51686,0.682786
1985,LAN,0.551896,0.681119
1989,PHI,0.650946,0.674404
1984,PHI,0.510025,0.674219
1992,ATL,0.704053,0.674078
1990,ATL,0.563281,0.673489


In [7]:
# Teams to get the platoon advantage most frequently
tm_adv_freq.nsmallest(50, 'fld_team_id')

Unnamed: 0_level_0,role,bat_team_id,fld_team_id
yr,team,Unnamed: 2_level_1,Unnamed: 3_level_1
1960,BOS,0.540459,0.362927
1944,BSN,0.581261,0.385652
1957,BOS,0.392494,0.385719
1961,BOS,0.539876,0.390006
1943,BSN,0.581069,0.391547
1932,CIN,0.5305,0.39276
1954,BAL,0.574782,0.393473
1937,DET,0.436961,0.394371
1945,BOS,0.475162,0.396338
1932,CHN,0.398369,0.39913


In [8]:
# these leading teams are full of RHPs
pa.query('fld_team_id == "BOS" & yr==1960')[['pit_id', 'pit_hand_cd']].value_counts()


pit_id    pit_hand_cd
monbb101  R              909
brewt101  R              828
sullf101  R              678
deloi101  R              576
muffb101  R              530
casaj101  R              470
fornm101  R              461
sturt101  R              443
wilse102  R              300
borlt101  L              236
hilld102  R              161
willt102  L              143
bowst101  L               94
worta101  R               62
nichc102  L               55
chitn101  R               40
earla101  L               25
stalt101  R               15
dtype: int64