In [1]:
# Monte-Carlo playoff odds
# Generate my own playoff odds

# For now, I'm focusing on the mechanics of the simulation, and less so on the inputs (e.g., the projected team quality)
# So I'm using 538's win probabilities for each game, rather than computing my own

# I'm also using 538's results/schedule data, because it is so easy to use

import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline

In [2]:
# Read in the 538 dataset, which has a row for each game in the current season (played or unplayed)
gms = pd.read_csv('https://projects.fivethirtyeight.com/mlb-api/mlb_elo_latest.csv')
#gms = pd.read_csv('../data/538/mlb-elo/mlb_elo_latest.csv')
gms

Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,...,pitcher1_rgs,pitcher2_rgs,pitcher1_adj,pitcher2_adj,rating_prob1,rating_prob2,rating1_post,rating2_post,score1,score2
0,2022-10-05,2022,0,,LAD,COL,1619.029390,1455.717535,0.746163,0.253837,...,,,,,0.753716,0.246284,,,,
1,2022-10-05,2022,0,,SEA,DET,1530.932700,1454.252576,0.640966,0.359034,...,,,,,0.637509,0.362491,,,,
2,2022-10-05,2022,0,,SDP,SFG,1511.029979,1509.271846,0.537001,0.462999,...,,,,,0.573302,0.426698,,,,
3,2022-10-05,2022,0,,NYM,WSN,1559.339016,1424.174932,0.714272,0.285728,...,,,,,0.718223,0.281777,,,,
4,2022-10-05,2022,0,,MIL,ARI,1517.900075,1482.583873,0.584543,0.415457,...,,,,,0.610971,0.389029,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2425,2022-04-07,2022,0,,ATL,CIN,1555.630840,1501.967218,0.609942,0.390058,...,58.198554,53.297336,18.664382,15.512738,0.620108,0.379892,1552.570297,1501.193092,3.0,6.0
2426,2022-04-07,2022,0,,WSN,NYM,1476.319846,1495.202033,0.507365,0.492635,...,46.506602,48.182760,-10.890192,-33.183129,0.495889,0.504111,1467.302390,1522.210391,1.0,5.0
2427,2022-04-07,2022,0,,STL,PIT,1524.880454,1456.114951,0.630416,0.369584,...,57.273136,46.669517,27.921385,2.182563,0.650312,0.349688,1503.439418,1444.031029,9.0,0.0
2428,2022-04-07,2022,0,,KCR,CLE,1480.923133,1501.256999,0.505276,0.494724,...,50.288294,59.572636,7.862364,30.139987,0.476089,0.523911,1473.144618,1491.474766,3.0,1.0


In [3]:
gms.columns

Index(['date', 'season', 'neutral', 'playoff', 'team1', 'team2', 'elo1_pre',
       'elo2_pre', 'elo_prob1', 'elo_prob2', 'elo1_post', 'elo2_post',
       'rating1_pre', 'rating2_pre', 'pitcher1', 'pitcher2', 'pitcher1_rgs',
       'pitcher2_rgs', 'pitcher1_adj', 'pitcher2_adj', 'rating_prob1',
       'rating_prob2', 'rating1_post', 'rating2_post', 'score1', 'score2'],
      dtype='object')

In [4]:
# Split out the games that have been played vs those remaining
played = gms.dropna(subset=['score1']) # games that have a score
remain = gms.loc[gms.index.difference(played.index)] # all other games
played.shape, remain.shape

((1895, 26), (535, 26))

# Define some functions that will be used in the simulation

In [5]:
def compute_standings(gms_played):
    margins = gms_played['score1']-gms_played['score2']
    winners = pd.Series(np.where(margins>0, gms_played['team1'], gms_played['team2']))
    losers  = pd.Series(np.where(margins<0, gms_played['team1'], gms_played['team2']))
    standings = pd.concat([winners.value_counts().rename('W'), losers.value_counts().rename('L')], axis=1)
    return standings

compute_standings(played)

Unnamed: 0,W,L
LAD,87,38
NYM,82,46
HOU,81,47
ATL,79,49
NYY,78,48
STL,73,54
PHI,72,55
SDP,70,58
TBD,69,57
SEA,69,57


In [6]:
random_inputs = {}

NUM_RANDOMS_PER_ITERATION = 1200
def get_randoms(iteration: int) -> pd.Series:
    if iteration not in random_inputs:
        # Generate a random number for each game
        randoms = pd.Series(np.random.rand(NUM_RANDOMS_PER_ITERATION))
        random_inputs[iteration] = randoms
    
    return random_inputs[iteration]
    
get_randoms(0)

0       0.568162
1       0.597044
2       0.029432
3       0.716706
4       0.014793
          ...   
1195    0.827315
1196    0.588782
1197    0.495282
1198    0.694235
1199    0.738477
Length: 1200, dtype: float64

In [7]:
# This is the source data for the mapping of teams to divisions/leagues
div_text = '''
NLW: ARI COL LAD SDP SFG
NLE: ATL FLA NYM PHI WSN
ALW: SEA ANA HOU OAK TEX
ALE: TBD TOR BAL NYY BOS
ALC: MIN CHW CLE KCR DET
NLC: STL MIL CHC PIT CIN
'''

divs = {line.split(': ')[0]: line.split(': ')[1].split(' ') for line in div_text.strip().split('\n')}
teams = pd.DataFrame(pd.concat([pd.Series({team: div for team in teams}) for (div, teams) in divs.items()]).rename('div'))
teams['lg'] = teams['div'].str[0]
teams

Unnamed: 0,div,lg
ARI,NLW,N
COL,NLW,N
LAD,NLW,N
SDP,NLW,N
SFG,NLW,N
ATL,NLE,N
FLA,NLE,N
NYM,NLE,N
PHI,NLE,N
WSN,NLE,N


In [8]:

def sim_rem_games(remain: pd.DataFrame, randoms: pd.Series):
    # Figure out the winners and losers
    rands = randoms[0:len(remain)]
    rands.index = remain.index
    winners = pd.Series(np.where(rands<remain['rating_prob1'], remain['team1'], remain['team2']))
    losers = pd.Series(np.where(rands>remain['rating_prob1'], remain['team1'], remain['team2']))

    # Compute and return the standings
    standings = pd.concat([winners.value_counts().rename('W'), losers.value_counts().rename('L')], axis=1)
    for col in standings.columns: # convert to int
        standings[col] = standings[col].fillna(0).astype(int)
    return standings

rand0 = get_randoms(0)
sim_rem_games(remain, rand0)

Unnamed: 0,W,L
NYM,28,6
MIL,26,11
PHI,24,11
TOR,23,14
SEA,23,13
HOU,22,12
LAD,21,16
CHC,21,14
TBD,20,16
MIN,20,18


In [9]:
rand0 = get_randoms(0)
type(rand0)


pandas.core.series.Series

In [10]:
cur_standings = compute_standings(played)
rem_standings = sim_rem_games(remain, rand0)
full_standings = cur_standings+rem_standings
full_standings

Unnamed: 0,W,L
ANA,72,90
ARI,73,89
ATL,97,65
BAL,85,77
BOS,78,84
CHC,76,86
CHW,81,81
CIN,65,97
CLE,84,78
COL,69,93


In [11]:
# find playoff teams
def add_playoff_seeds(standings, randoms):
    standings['wpct'] = standings['W'] / (standings['W'] + standings['L'])

    # Merge in the div/lg data
    standings['div'] = teams['div']
    standings['lg'] = teams['lg']

    # Rather than model out all the tie-breakers, I'm assuming that they are all random (not exactly true, but close enough),
    # and so I'm just generating a random number for each team, and we break ties by comparing that random num for each of the tied teams.
    # This is *so* much simpler and faster than modeling all the different scenarios.
    # It might be worth modeling them out with 1-2 days left in the season, but for most of the season, I way prefer using the random num to break ties
    rands = randoms[0:len(standings)]
    rands.index = standings.index
    standings['rand'] = rands

    # Now sort, and break ties using the rand
    sorted = standings.sort_values(by=['wpct', 'rand'], ascending=False)

    # div_rank is nice to have, but somewhat expensive to compute
    #standings['div_rank'] = sorted.groupby('div').cumcount()+1
    #standings['div_win'] = standings['div_rank'] == 1

    # Set div_win False as default, then set it True for div winners
    standings['div_win'] = False
    standings.loc[sorted.groupby('div').head(1).index, 'div_win'] = True
    standings['lg_rank'] = standings.sort_values(by=['div_win', 'wpct', 'rand'], ascending=False).groupby('lg').cumcount()+1
    return standings.sort_values(['lg', 'lg_rank'])

     

add_playoff_seeds(full_standings, rand0)

Unnamed: 0,W,L,wpct,div,lg,rand,div_win,lg_rank
HOU,103,59,0.635802,ALW,A,0.913558,True,1
NYY,97,65,0.598765,ALE,A,0.649366,True,2
CLE,84,78,0.518519,ALC,A,0.318823,True,3
SEA,92,70,0.567901,ALW,A,0.563987,False,4
TOR,91,71,0.561728,ALE,A,0.320261,False,5
TBD,89,73,0.549383,ALE,A,0.918091,False,6
BAL,85,77,0.524691,ALE,A,0.716706,False,7
MIN,83,79,0.512346,ALC,A,0.32047,False,8
CHW,81,81,0.5,ALC,A,0.29268,False,9
BOS,78,84,0.481481,ALE,A,0.014793,False,10


In [12]:
%%prun -s cumulative # This runs the code profiler, which creates data I can use to find opportunities for me to speed up the code

[add_playoff_seeds(full_standings, rand0) for _ in range(1000)]
None # This is to suppress printing the output, which is 1000 lines of the same list of teams

 

         11671933 function calls (11539933 primitive calls) in 4.044 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    4.044    4.044 {built-in method builtins.exec}
        1    0.003    0.003    4.044    4.044 <string>:1(<module>)
        1    0.003    0.003    4.041    4.041 <string>:3(<listcomp>)
     1000    0.020    0.000    4.038    0.004 776701362.py:2(add_playoff_seeds)
     3000    0.004    0.000    1.712    0.001 _decorators.py:302(wrapper)
     3000    0.019    0.000    1.706    0.001 frame.py:6275(sort_values)
     3000    0.059    0.000    1.179    0.000 sorting.py:285(lexsort_indexer)
     7000    0.038    0.000    0.895    0.000 categorical.py:365(__init__)
    16000    0.034    0.000    0.874    0.000 frame.py:3463(__getitem__)
     1000    0.003    0.000    0.768    0.001 groupby.py:3049(cumcount)
51000/35000    0.021    0.000    0.597    0.000 groupby.py:908(__getattribute__

In [13]:
def finish_one_season(incoming_standings, remain, randoms):
    rem_standings = sim_rem_games(remain, randoms)
    full_standings = incoming_standings+rem_standings
    full_standings = add_playoff_seeds(full_standings, randoms)
    return full_standings

finish_one_season(cur_standings, remain, rand0)

Unnamed: 0,W,L,wpct,div,lg,rand,div_win,lg_rank
HOU,103,59,0.635802,ALW,A,0.913558,True,1
NYY,97,65,0.598765,ALE,A,0.649366,True,2
CLE,84,78,0.518519,ALC,A,0.318823,True,3
SEA,92,70,0.567901,ALW,A,0.563987,False,4
TOR,91,71,0.561728,ALE,A,0.320261,False,5
TBD,89,73,0.549383,ALE,A,0.918091,False,6
BAL,85,77,0.524691,ALE,A,0.716706,False,7
MIN,83,79,0.512346,ALC,A,0.32047,False,8
CHW,81,81,0.5,ALC,A,0.29268,False,9
BOS,78,84,0.481481,ALE,A,0.014793,False,10


In [14]:
def sim_1_season(incoming_standings, remain, i):
    randoms = get_randoms(i)
    standings = finish_one_season(incoming_standings, remain, randoms)
    standings['iter'] = i
    standings = standings.reset_index().rename(columns={'index': 'team'}).set_index(['team', 'iter'])
    return standings

def sim_n_seasons(incoming_standings, remain, n):
    return pd.concat([sim_1_season(incoming_standings, remain, i) for i in range(n)])

sim_results = sim_n_seasons(cur_standings, remain, 10)
sim_results

Unnamed: 0_level_0,Unnamed: 1_level_0,W,L,wpct,div,lg,rand,div_win,lg_rank
team,iter,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
HOU,0,103,59,0.635802,ALW,A,0.913558,True,1
NYY,0,97,65,0.598765,ALE,A,0.649366,True,2
CLE,0,84,78,0.518519,ALC,A,0.318823,True,3
SEA,0,92,70,0.567901,ALW,A,0.563987,False,4
TOR,0,91,71,0.561728,ALE,A,0.320261,False,5
...,...,...,...,...,...,...,...,...,...
FLA,9,67,95,0.413580,NLE,N,0.993755,False,11
CIN,9,65,97,0.401235,NLC,N,0.938916,False,12
COL,9,61,101,0.376543,NLW,N,0.347356,False,13
PIT,9,60,102,0.370370,NLC,N,0.366459,False,14


In [15]:
# Count the number of div/wc/playoff appearances by team from a set of results

# Championship weights by seed position
weights = {i: 1/16 for i in range(1,7)}
weights[1] = 1/8
weights[2] = 1/8

def summarize_sim_results(df_results):
    counts = df_results.query('lg_rank <= 6').reset_index()[['team', 'lg_rank']].value_counts().unstack()
    mean_wins = df_results.groupby('team')['W'].mean().rename('mean_wins')
    summary = pd.merge(left=mean_wins, right=counts, on='team', how='left')
    for col in counts.columns:
        summary[col] = summary[col].fillna(0).astype(int)    

    summary['div_wins'] = summary[range(1, 4)].sum(axis=1)
    summary['playoffs'] = summary[range(1, 7)].sum(axis=1)
    summary['champ_shares'] = (summary[range(1,7)] * np.array(weights)).sum(axis=1)
    return summary

summarize_sim_results(sim_results)

Unnamed: 0_level_0,mean_wins,1,2,3,4,5,6,div_wins,playoffs,champ_shares
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ANA,70.0,0,0,0,0,0,0,0,0,0.0
ARI,74.1,0,0,0,0,0,0,0,0,0.0
ATL,100.4,0,2,0,8,0,0,2,10,0.75
BAL,83.9,0,0,0,0,1,4,0,5,0.3125
BOS,81.4,0,0,0,0,1,0,0,1,0.0625
CHC,71.4,0,0,0,0,0,0,0,0,0.0
CHW,81.8,0,0,0,0,0,2,0,2,0.125
CIN,66.3,0,0,0,0,0,0,0,0,0.0
CLE,87.3,0,0,9,0,0,0,9,9,0.5625
COL,65.5,0,0,0,0,0,0,0,0,0.0


In [16]:
#%%prun -s cumulative # This runs the code profiler, which creates data I can use to find opportunities for me to speed up the code

sim_results = sim_n_seasons(cur_standings, remain, 10*1000)
summarize_sim_results(sim_results)

Unnamed: 0_level_0,mean_wins,1,2,3,4,5,6,div_wins,playoffs,champ_shares
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ANA,70.6961,0,0,0,0,0,0,0,0,0.0
ARI,73.1679,0,0,0,0,0,3,0,3,0.1875
ATL,99.7424,20,1684,5,8013,271,6,1709,9999,731.4375
BAL,83.4646,0,2,0,215,647,1666,2,2530,158.25
BOS,79.229,0,0,0,8,47,241,0,296,18.5
CHC,71.3599,0,0,0,0,0,0,0,0,0.0
CHW,81.839,0,0,1137,14,82,467,1137,1700,106.25
CIN,67.6275,0,0,0,0,0,0,0,0,0.0
CLE,86.4414,0,14,7242,8,73,320,7256,7657,479.4375
COL,66.7445,0,0,0,0,0,0,0,0,0.0


In [17]:
sim_results.groupby('iter')['W'].max().median()

111.0

In [18]:
summary = summarize_sim_results(sim_results)
print(summary.sort_values('champ_shares', ascending=False).to_string())

      mean_wins     1     2     3     4     5     6  div_wins  playoffs  champ_shares
team                                                                                 
LAD    111.3667  9667   333     0     0     0     0     10000     10000     1250.0000
HOU    101.7986  7478  2506     2    12     2     0      9986     10000     1249.0000
NYY     99.1218  2516  7343    12   100    25     4      9871     10000     1241.1875
NYM    103.7191   313  7958    11  1713     5     0      8282     10000     1141.9375
ATL     99.7424    20  1684     5  8013   271     6      1709      9999      731.4375
STL     93.3512     0    16  9276     0   180   345      9292      9817      614.5625
SEA     89.6339     3    11     0  4698  3370  1349        14      9431      590.3125
PHI     91.1329     0     9     0   265  7159  1882         9      9315      582.7500
TOR     88.8075     3    87     0  3473  3276  1992        90      8831      557.5625
CLE     86.4414     0    14  7242     8    73   320   

In [19]:
# How many games does each team win in each seeding?
sim_results.query('lg_rank <= 6').groupby(['team', 'lg_rank'])['W'].mean().unstack()

lg_rank,1,2,3,4,5,6
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ARI,,,,,,82.0
ATL,106.45,102.788599,99.2,99.265943,94.634686,91.333333
BAL,,92.5,,89.437209,87.772798,86.033613
BOS,,,,87.125,86.617021,84.80083
CHW,,,85.822339,87.571429,86.390244,85.220557
CLE,,94.071429,87.588788,89.625,87.068493,85.684375
HOU,102.620487,99.39146,94.0,95.0,93.5,
LAD,111.538223,106.387387,,,,
MIL,,,91.189266,92.333333,90.09434,87.94508
MIN,,,86.06285,88.153846,86.46988,85.252546


In [20]:
# How many wins do teams have in division-winning seasons?
sim_results.query('div_win').groupby('team')['W'].mean()

team
ATL    102.820948
BAL     92.500000
CHW     85.822339
CLE     87.601295
HOU    101.808432
LAD    111.366700
MIL     91.189266
MIN     86.062850
NYM    104.351123
NYY     99.197548
PHI     99.444444
SEA     96.285714
STL     93.659815
TBD     93.972973
TOR     94.811111
Name: W, dtype: float64

In [21]:
# How often do teams win the division when they win 95 games?
finishes = sim_results.query('W>=95').groupby('team').agg(num_seasons=('div_win', len), div_wins=('div_win', sum))
finishes['pct_win'] = finishes['div_wins']/finishes['num_seasons']
finishes


Unnamed: 0_level_0,num_seasons,div_wins,pct_win
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ATL,9684,1709,0.176477
CLE,33,33,1.0
HOU,9956,9947,0.999096
LAD,10000,10000,1.0
MIL,46,37,0.804348
NYM,9996,8282,0.828531
NYY,9442,9407,0.996293
PHI,1274,9,0.007064
SDP,50,0,0.0
SEA,452,11,0.024336


In [22]:
pads95 = sim_results.query('team=="SDP" and W>=95').reset_index()['iter']
sim_results.query('iter in @pads95 and div=="NLW"').groupby('team')['W'].mean()

team
ARI     71.46
COL     65.44
LAD    108.96
SDP     95.60
SFG     78.40
Name: W, dtype: float64

In [29]:
remain.tail(10)


Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1_pre,elo2_pre,elo_prob1,elo_prob2,...,pitcher1_rgs,pitcher2_rgs,pitcher1_adj,pitcher2_adj,rating_prob1,rating_prob2,rating1_post,rating2_post,score1,score2
525,2022-08-28,2022,0,,CHW,ARI,1499.544578,1482.583873,0.558676,0.441324,...,58.474146,48.759737,28.631922,-14.399257,0.641695,0.358305,,,,
526,2022-08-28,2022,0,,NYM,COL,1559.339016,1455.717535,0.675824,0.324176,...,63.020212,53.083878,36.939633,15.381847,0.71354,0.28646,,,,
527,2022-08-28,2022,0,,TOR,ANA,1534.383807,1473.486867,0.619802,0.380198,...,54.917213,45.400313,9.324779,-32.485557,0.663146,0.336854,,,,
528,2022-08-28,2022,0,,WSN,CIN,1424.174932,1470.269501,0.468246,0.531754,...,40.992888,50.941707,-20.193528,6.051259,0.436412,0.563588,,,,
529,2022-08-28,2022,0,,PHI,PIT,1536.207823,1419.325286,0.692319,0.307681,...,52.240448,48.131682,-18.254359,5.049788,0.666647,0.333353,,,,
530,2022-08-28,2022,0,,BOS,TBD,1497.575485,1540.96299,0.472128,0.527872,...,53.382413,52.881872,7.498225,-5.797199,0.525067,0.474933,,,,
531,2022-08-28,2022,0,,FLA,LAD,1464.643906,1619.02939,0.320699,0.679301,...,53.837098,59.057526,0.538485,14.407526,0.315991,0.684009,,,,
532,2022-08-27,2022,0,,SEA,CLE,1530.9327,1520.846277,0.548898,0.451102,...,58.741468,49.771567,34.825946,-18.708007,0.61891,0.38109,,,,
533,2022-08-27,2022,0,,OAK,NYY,1452.564739,1564.333276,0.376311,0.623689,...,44.319196,52.961968,-24.187803,-14.779824,0.329368,0.670632,,,,
535,2022-08-27,2022,0,,MIN,SFG,1501.843256,1509.271846,0.52383,0.47617,...,55.965278,52.199314,20.21002,-4.906395,0.581449,0.418551,,,,


In [30]:

def sim_one_way(game_id, prob, num_iterations):
    orig_prob = remain.loc[game_id, 'rating_prob1']
    remain.loc[game_id, 'rating_prob1'] = prob
    sim_results = sim_n_seasons(cur_standings, remain, num_iterations)
    remain.loc[game_id, 'rating_prob1'] = orig_prob
    results = summarize_sim_results(sim_results)
    wp1 = results['champ_shares'].rename(f'{prob}')
    return wp1

def sim_both_ways(game_id, num_iterations):
    results = pd.concat([sim_one_way(game_id, prob, num_iterations) for prob in [0, 1]], axis=1)

    team1 = remain.loc[game_id, 'team1']
    diff = (results['1'] - results['0']).rename(game_id)
    return diff


sim_both_ways(remain.index.values[-1], 100)



team
ANA    0.0000
ARI    0.0000
ATL    0.0000
BAL    0.0000
BOS    0.0000
CHC    0.0000
CHW   -0.0625
CIN    0.0000
CLE    0.0000
COL    0.0000
DET    0.0000
FLA    0.0000
HOU    0.0000
KCR    0.0000
LAD    0.0000
MIL    0.0000
MIN    0.2500
NYM    0.0000
NYY    0.0000
OAK    0.0000
PHI    0.0625
PIT    0.0000
SDP    0.0625
SEA    0.0000
SFG   -0.1250
STL    0.0000
TBD   -0.1875
TEX    0.0000
TOR    0.0000
WSN    0.0000
Name: 535, dtype: float64

In [31]:
next_dt = remain['date'].min()
game_ids = remain.query('date == @next_dt').index.values
game_ids

array([532, 533, 535])

In [32]:
results = pd.DataFrame()
for i in range(5):
    num_iterations = 10 ** i
    print(f'Starting {num_iterations} iterations')
    results = pd.concat([sim_both_ways(game_id, num_iterations) for game_id in game_ids], axis=1)
    print(results)

Starting 1 iterations
      532  533  535
team               
ANA   0.0  0.0  0.0
ARI   0.0  0.0  0.0
ATL   0.0  0.0  0.0
BAL   0.0  0.0  0.0
BOS   0.0  0.0  0.0
CHC   0.0  0.0  0.0
CHW   0.0  0.0  0.0
CIN   0.0  0.0  0.0
CLE   0.0  0.0  0.0
COL   0.0  0.0  0.0
DET   0.0  0.0  0.0
FLA   0.0  0.0  0.0
HOU   0.0  0.0  0.0
KCR   0.0  0.0  0.0
LAD   0.0  0.0  0.0
MIL   0.0  0.0  0.0
MIN   0.0  0.0  0.0
NYM   0.0  0.0  0.0
NYY   0.0  0.0  0.0
OAK   0.0  0.0  0.0
PHI   0.0  0.0  0.0
PIT   0.0  0.0  0.0
SDP   0.0  0.0  0.0
SEA   0.0  0.0  0.0
SFG   0.0  0.0  0.0
STL   0.0  0.0  0.0
TBD   0.0  0.0  0.0
TEX   0.0  0.0  0.0
TOR   0.0  0.0  0.0
WSN   0.0  0.0  0.0
Starting 10 iterations
         532  533     535
team                     
ANA   0.0000  0.0  0.0000
ARI   0.0000  0.0  0.0000
ATL   0.0000  0.0  0.0000
BAL  -0.0625  0.0  0.0000
BOS   0.0000  0.0  0.0000
CHC   0.0000  0.0  0.0000
CHW   0.0000  0.0  0.0000
CIN   0.0000  0.0  0.0000
CLE   0.0000  0.0  0.0000
COL   0.0000  0.0  0.0000
DET

In [None]:
game_descs = remain.apply(lambda x: f'{x.team2}\n@{x.team1}', axis=1)
game_descs.to_dict()

: 

In [None]:
for_graphing = results/(num_iterations/100)
for_graphing = for_graphing.rename(columns=game_descs.to_dict())
teamwise_leverage = for_graphing.abs().sum(axis=1).sort_values(ascending=False)
teamwise_leverage


: 

In [None]:
sns.set(rc = {'figure.figsize':(15,8)})
cmap = sns.diverging_palette(10, 133, as_cmap=True)
sns.heatmap(for_graphing, cmap=cmap, annot=True, center=0)

: 

In [None]:
(abs(results)).sum(axis=0).sort_values()

: 

In [None]:
results.sum(axis=1).sum()

: 

In [None]:
gamewise_leverage = for_graphing.abs().sum(axis=0).sort_values(ascending=False)
gamewise_leverage

: 

: 