# What is the most .500 team of all team?

Inspired by a [Reddit post about the 2011 Cubs](https://www.reddit.com/r/baseball/comments/n0cfbm/in_2011_the_cubs_started_the_season_11_22_33_44/), who started the season with .500 records through the first ten pairs of games (e.g., 1-1, 2-2, 3-3, 4-4, 5-5, 6-6, 7-7, 8-8, 9-9, and 10-10), I was curious as to which team had the most .500 records in a season.  And then what is the "most .500" team of all time, by looking at final record, and how close a team was to .500 over the course of the season.

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read in the gamelog_teams table: one row per team-game (and limit to regular season)

index_cols = ['yr', 'team', 'team_game_number']
cols = index_cols + ['game_type', 'W', 'L']
df = pd.read_parquet('../data/mine/gl_teams.parquet')[cols]
df = df[df['game_type']=='RS'].sort_values(by=index_cols).set_index(index_cols)

In [3]:
# for each team-game: compute the cumulative W and L to date
df['cum_w'] = df.groupby(['yr', 'team'])['W'].cumsum()
df['cum_l'] = df.groupby(['yr', 'team'])['L'].cumsum()

# for each team-game: compute distance from .500, and whether this is at .500 (or within 1)
df['delta'] = abs(df['cum_w']-df['cum_l'])
df['at500'] = df['delta']==0 
df['within_half'] = df['delta']<=1

# for each team-game: Did the team move towards .500 or away from it? (False if they already were at .500)
df['towards500'] = ((df['W'])&(df['cum_w']<=df['cum_l']) | (df['L'])&(df['cum_l']<=df['cum_w']))

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,game_type,W,L,cum_w,cum_l,delta,at500,within_half,towards500
yr,team,team_game_number,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1871,BS1,1,RS,True,False,1,0,1,False,True,False
1871,BS1,2,RS,True,False,2,0,2,False,False,False
1871,BS1,3,RS,False,True,2,1,1,False,True,True
1871,BS1,4,RS,True,False,3,1,2,False,False,False
1871,BS1,5,RS,False,False,3,1,2,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
2020,WAS,56,RS,False,True,23,33,10,False,False,False
2020,WAS,57,RS,False,True,23,34,11,False,False,False
2020,WAS,58,RS,True,False,24,34,10,False,False,True
2020,WAS,59,RS,True,False,25,34,9,False,False,True


In [4]:
# Most days at .500
df.groupby(['team', 'yr'])[['at500', 'W', 'L']].sum().sort_values(by='at500', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,at500,W,L
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CHN,1959,35,74,80
MIN,2009,34,87,76
BRO,1939,34,84,69
CHN,1993,33,84,78
TOR,2011,33,81,81
TEX,1974,33,83,76
OAK,2010,33,81,81
PIT,1999,32,78,83
LAN,1998,31,83,79
ARI,2019,31,85,77


In [5]:
# Most days at .500 or just a half-game off (after an odd number of W+L)
df.groupby(['team', 'yr'])[['at500', 'within_half', 'W', 'L']].sum().sort_values(by='within_half', ascending=False).head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,at500,within_half,W,L
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
TOR,2011,33,93,81,81
MIN,2009,34,90,87,76
CHN,1993,33,89,84,78
OAK,2010,33,87,81,81
PIT,1999,32,86,78,83
LAN,1998,31,86,83,79
CIN,1909,29,82,77,77
HOU,1971,31,82,79,83
CHN,1959,35,82,74,80
BRO,1903,27,81,70,66


In [6]:
# Aggregate everything into team-seasons
grps = df.groupby(['team', 'yr'])

seasons = pd.concat([
           grps['W'].agg(len).rename('G'), 
           grps[['W', 'L', 'at500', 'within_half', 'towards500']].sum(),
           grps['delta'].max().rename('delta_max'), 
           grps['delta'].mean().rename('delta_mean')
    ], axis=1).sort_values('yr')

seasons

Unnamed: 0_level_0,Unnamed: 1_level_0,G,W,L,at500,within_half,towards500,delta_max,delta_mean
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
PH1,1871,28,19,9,4,9,9,10,4.571429
CL1,1871,29,10,19,1,3,10,10,5.344828
BS1,1871,31,20,10,3,10,10,10,3.322581
WS3,1871,32,14,16,6,18,14,4,1.437500
RC1,1871,25,8,17,2,5,8,9,4.760000
...,...,...,...,...,...,...,...,...,...
OAK,2020,60,36,24,3,8,24,14,8.566667
ARI,2020,60,25,35,2,9,25,14,6.533333
DET,2020,58,23,35,6,17,23,12,3.672414
PHI,2020,60,28,32,9,27,28,5,1.833333


In [7]:
# Teams that strayed the least from .500, at their furthest
seasons.sort_values('delta_max').head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,W,L,at500,within_half,towards500,delta_max,delta_mean
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
MLU,1884,3,2,1,1,3,1,1,0.666667
SLN,2020,58,30,28,17,46,28,2,0.913793
WS3,1871,32,14,16,6,18,14,4,1.4375
MIL,2020,60,29,31,8,25,29,4,1.866667
SPU,1884,9,2,6,0,1,2,4,2.777778
DET,1948,154,78,76,22,68,76,5,1.915584
TOR,2011,162,81,81,33,93,81,5,1.450617
PHI,2020,60,28,32,9,27,28,5,1.833333
PIT,1999,161,78,83,32,86,78,5,1.583851
LAN,1998,162,83,79,31,86,79,5,1.561728


In [8]:
# Teams that strayed the least from .500, on average over the season
seasons.sort_values('delta_mean').head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,W,L,at500,within_half,towards500,delta_max,delta_mean
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
MLU,1884,3,2,1,1,3,1,1,0.666667
SLN,2020,58,30,28,17,46,28,2,0.913793
WS3,1871,32,14,16,6,18,14,4,1.4375
TOR,2011,162,81,81,33,93,81,5,1.450617
BRO,1903,139,70,66,27,81,66,6,1.539568
LAN,1998,162,83,79,31,86,79,5,1.561728
PIT,1999,161,78,83,32,86,78,5,1.583851
OAK,2010,162,81,81,33,87,81,6,1.660494
CIN,1909,157,77,77,29,82,77,6,1.687898
CHN,1993,163,84,78,33,89,78,6,1.699387


In [12]:
# what about moving towards .500?  How many times did a team move its record towards .500 vs away?
seasons.sort_values('towards500', ascending=False).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,W,L,at500,within_half,towards500,delta_max,delta_mean
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SLN,1973,162,81,81,5,25,81,15,5.265432
PHI,2012,162,81,81,17,51,81,14,4.722222
MON,1989,162,81,81,13,35,81,19,6.771605
ARI,2013,162,81,81,5,20,81,9,3.845679
MON,1988,163,81,81,18,58,81,11,2.834356
NYA,1971,162,81,81,20,53,81,9,3.203704
LAN,1993,162,81,81,10,28,81,8,3.462963
HOU,1977,162,81,81,10,26,81,13,6.302469
CHN,1977,162,81,81,7,19,81,25,12.450617
KCA,2016,162,81,81,11,37,81,8,3.302469


In [13]:
seasons['pct_towards_500']=seasons['towards500']/(seasons['G']-seasons['at500'])

In [14]:
seasons.sort_values('pct_towards_500', ascending=False).head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,G,W,L,at500,within_half,towards500,delta_max,delta_mean,pct_towards_500
team,yr,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
SLN,2020,58,30,28,17,46,28,2,0.913793,0.682927
TOR,2011,162,81,81,33,93,81,5,1.450617,0.627907
OAK,2010,162,81,81,33,87,81,6,1.660494,0.627907
MIA,2020,60,31,29,13,32,29,6,1.766667,0.617021
HOU,2020,60,29,31,13,32,29,6,1.833333,0.617021
CHN,1959,155,74,80,35,82,74,8,2.296774,0.616667
DET,1957,154,78,76,30,78,76,7,1.863636,0.612903
WAS,2018,162,82,80,30,75,80,11,2.845679,0.606061
PIT,1999,161,78,83,32,86,78,5,1.583851,0.604651
SFN,2020,60,29,31,12,31,29,8,2.033333,0.604167
