In [1]:
import pandas as pd
import numpy as np
import boxball_loader as bbl

In [2]:
df = pd.read_parquet('../data/mine/gl_teams.parquet')
df = df[df['game_type']=='RS'].sort_values(by=['team', 'yr', 'team_game_number'])

In [3]:
# compute the cumulative W-L record to-date after each game
df['cum_w'] = df.groupby(['team', 'yr'])['W'].cumsum()
df['cum_l'] = df.groupby(['team', 'yr'])['L'].cumsum()

# compute days at .500 and distance from .500
df['at500'] = df['cum_w']==df['cum_l']
df['delta'] = abs(df['cum_w']-df['cum_l'])

df

Unnamed: 0,game_id,date,double_header,yr,game_type,park_id,team,team_league,team_game_number,runs_scored,...,HA,opp,runs_allowed,W,L,linescore_parsed,cum_w,cum_l,at500,delta
43181,CNU188404170,1884-04-17,0,1884,RS,CIN03,ALT,UA,1,2,...,A,CNU,7,False,True,,0,1,False,1
43184,CNU188404180,1884-04-18,0,1884,RS,CIN03,ALT,UA,2,2,...,A,CNU,9,False,True,,0,2,False,2
43186,CNU188404190,1884-04-19,0,1884,RS,CIN03,ALT,UA,3,6,...,A,CNU,9,False,True,,0,3,False,3
43195,SLU188404240,1884-04-24,0,1884,RS,STL04,ALT,UA,4,2,...,A,SLU,11,False,True,,0,4,False,4
43199,SLU188404260,1884-04-26,0,1884,RS,STL04,ALT,UA,5,3,...,A,SLU,9,False,True,,0,5,False,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44692,SLU188410120,1884-10-12,0,1884,RS,STL04,WSU,UA,110,8,...,A,SLU,10,False,True,,42,59,False,17
44707,KCU188410140,1884-10-14,0,1884,RS,KAN01,WSU,UA,111,5,...,A,KCU,2,True,False,,43,59,False,16
44718,KCU188410160,1884-10-16,0,1884,RS,KAN01,WSU,UA,112,4,...,A,KCU,10,False,True,,43,60,False,17
44720,KCU188410180,1884-10-18,0,1884,RS,KAN01,WSU,UA,113,5,...,A,KCU,1,True,False,,44,60,False,16


In [4]:
# Most days at .500
df.groupby(['team', 'yr'])[['at500', 'W', 'L']].sum().sort_values(by='at500', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,at500,W,L
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CHN,1959,35,74,80
MIN,2009,34,87,76
BRO,1939,34,84,69
CHN,1993,33,84,78
TOR,2011,33,81,81
TEX,1974,33,83,76
OAK,2010,33,81,81
PIT,1999,32,78,83
LAN,1998,31,83,79
ARI,2019,31,85,77


In [5]:
# Aggregate everything into seasons

seasons = df.groupby(['team', 'yr']).agg(
    {'W': [sum, len], 'L': sum, 'delta': [max, np.mean, sum], 'at500': sum})

# this is just to flatten the data to make it easier to use
seasons = pd.concat([seasons['W']['len'].rename('G'), 
           seasons['W']['sum'].rename('W'), 
           seasons['L']['sum'].rename('L'), 
           seasons['delta']['max'].rename('delta_max'), 
           seasons['delta']['mean'].rename('delta_mean'), 
           seasons['delta']['sum'].rename('delta_sum'), 
           seasons['at500']['sum'].rename('at500')], 
          axis=1)

seasons

Unnamed: 0_level_0,Unnamed: 1_level_0,G,W,L,delta_max,delta_mean,delta_sum,at500
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ALT,1884,25,6,19,14,9.240000,231,0
ANA,1997,162,84,78,16,5.722222,927,11
ANA,1998,162,85,77,17,7.808642,1265,9
ANA,1999,162,70,92,32,11.796296,1911,6
ANA,2000,162,82,80,9,2.672840,433,19
...,...,...,...,...,...,...,...,...
WSN,1896,133,58,73,26,9.195489,1223,9
WSN,1897,135,60,70,23,11.377778,1536,0
WSN,1898,155,51,101,50,23.348387,3619,1
WSN,1899,155,54,98,46,24.664516,3823,0


In [6]:
seasons.sort_values('delta_max').head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,W,L,delta_max,delta_mean,delta_sum,at500
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MLU,1884,3,2,1,1,0.666667,2,1
SLN,2020,58,30,28,2,0.913793,53,17
WS3,1871,32,14,16,4,1.4375,46,6
SPU,1884,9,2,6,4,2.777778,25,0
MIL,2020,60,29,31,4,1.866667,112,8
DET,1948,154,78,76,5,1.915584,295,22
PHI,2020,60,28,32,5,1.833333,110,9
LAN,1998,162,83,79,5,1.561728,253,31
TOR,2011,162,81,81,5,1.450617,235,33
ANA,2017,162,80,82,5,1.808642,293,26


In [7]:
seasons.sort_values('delta_mean').head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,W,L,delta_max,delta_mean,delta_sum,at500
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
MLU,1884,3,2,1,1,0.666667,2,1
SLN,2020,58,30,28,2,0.913793,53,17
WS3,1871,32,14,16,4,1.4375,46,6
TOR,2011,162,81,81,5,1.450617,235,33
BRO,1903,139,70,66,6,1.539568,214,27
LAN,1998,162,83,79,5,1.561728,253,31
PIT,1999,161,78,83,5,1.583851,255,32
OAK,2010,162,81,81,6,1.660494,269,33
CIN,1909,157,77,77,6,1.687898,265,29
CHN,1993,163,84,78,6,1.699387,277,33
