In [2]:
from scipy.stats import binom
import pandas as pd

import playoff_odds


In [3]:
def p_from_diff(diff):
    return 1/(10**(-diff/400)+1)

In [4]:
def p_game(r_home, r_away):
    return p_from_diff(24 + r_home - r_away)

In [5]:
(played, remain) = playoff_odds.get_games()
ratings = remain[['team1', 'rating1_pre']].drop_duplicates().set_index('team1')['rating1_pre'].rename('rating')
ratings

team1
LAD    1617.704399
SEA    1529.693146
SDP    1519.142656
NYM    1556.350902
MIL    1525.470851
HOU    1571.863818
FLA    1466.686669
CLE    1506.165357
CIN    1471.741730
CHW    1512.886520
BOS    1509.251216
TEX    1470.160012
PIT    1421.294978
BAL    1489.584136
OAK    1437.261167
ANA    1494.235042
SFG    1503.418642
CHC    1466.378744
STL    1534.425759
TOR    1544.341328
WSN    1443.642873
NYY    1565.776876
ATL    1578.080101
DET    1444.361049
MIN    1509.499468
ARI    1485.637317
COL    1450.550429
KCR    1455.444731
TBD    1539.519892
PHI    1529.430192
Name: rating, dtype: float64

In [13]:
ratings['LAD']

1617.7043994612

In [7]:
p_game(ratings['LAD'], ratings['SDP'])

0.6694109561248396

In [8]:
def p_series3(r_home, r_away):
    p_g = p_game(r_home, r_away)
    
    # Binomial chance of winning at least 2 of 3 when all games are at p_g
    return sum([binom.pmf(i, 3, p_g) for i in range(2, 4)])

In [9]:
p_series3(1500, 1500)

0.5516439044456302

In [10]:
p_series3(1520, 1500)

0.5939773617521806

In [11]:
def p_series5(r_home, r_away):
    p_g1 = p_game(r_home, r_away)
    p_g2 = p_game(r_away, r_home)


    dist1 = pd.Series({i: binom.pmf(i, 3, p_g1) for i in range(0, 4)})
    dist2 = pd.Series({2-i: binom.pmf(i, 2, p_g2) for i in range(0, 3)})

    total = {(i,j): (i+j, dist1[i]*dist2[j]) for i in dist1.keys() for j in dist2.keys()}
    ps = pd.DataFrame(total).T.rename(columns = {0: 'w', 1: 'p'}).groupby('w')['p'].sum()
    return (ps[3:].sum())

def p_series7(r_home, r_away):
    p_g1 = p_game(r_home, r_away)
    p_g2 = p_game(r_away, r_home)


    dist1 = pd.Series({i: binom.pmf(i, 4, p_g1) for i in range(0, 5)})
    dist2 = pd.Series({3-i: binom.pmf(i, 3, p_g2) for i in range(0, 4)})

    total = {(i,j): (i+j, dist1[i]*dist2[j]) for i in dist1.keys() for j in dist2.keys()}
    ps = pd.DataFrame(total).T.rename(columns = {0: 'w', 1: 'p'}).groupby('w')['p'].sum()
    return (ps[4:].sum())

In [14]:
p_series5(1600, 1500)


0.7587817957990288

In [15]:
p_series7(1500, 1500)

0.5108071349156493

In [16]:
# OK now let's simulate a specific post-season

# LAD, NYM, STL, ATL, PHI SDP
seeds = dict(enumerate(['LAD', 'NYM', 'STL', 'ATL', 'PHI', 'SDP'], 1))
[ratings[tm] for tm in seeds.values()]

[1617.7043994612,
 1556.35090191654,
 1534.42575931518,
 1578.08010106132,
 1529.43019221744,
 1519.14265559203]

In [17]:
# 3 vs 6
p = p_series3(ratings[seeds[3]], ratings[seeds[6]])
{seeds[3]: p, seeds[6]: 1 -p}

{'STL': 0.5840831001315994, 'SDP': 0.41591689986840064}

In [18]:
def run_series(series_func, s1, s2):
    p = series_func(ratings[seeds[s1]], ratings[seeds[s2]])
    return {s1: p, s2: 1 -p}

In [19]:


p36 = run_series(p_series3, 3, 6)
p36

{3: 0.5840831001315994, 6: 0.41591689986840064}

In [20]:
# 2 vs 3/6


def run_scenario(tm):
    ps = run_series(p_series5, 2, tm)
    adj_ps = pd.Series({i: ps[i]*p36[tm] for i in ps.keys()})
    return adj_ps

pd.concat([run_scenario(tm) for tm in p36.keys()], axis=1).fillna(0).sum(axis=1)


2    0.588193
3    0.250233
6    0.161574
dtype: float64

In [21]:
def run_series(length, teams_l, teams_r):
    series_func = series_funcs[length]
    def run_scenario(sl, pl, sr, pr):
        s1 = sl if sl < sr else sr
        s2 = sl if sl > sr else sr
        p_this_matchup = pl * pr
        ps = series_func(ratings[seeds[s1]], ratings[seeds[s2]])
        return pd.Series({s1: p_this_matchup*ps, s2: p_this_matchup*(1-ps)})

    ps = [run_scenario(sl, pl, sr, pr) for (sl, pl) in teams_l.items() for (sr, pr) in teams_r.items()]
    return pd.concat(ps, axis=1).fillna(0).sum(axis=1)
            

In [22]:
series_funcs = {1: p_game, 3: p_series3, 5: p_series5, 7: p_series7}

In [23]:
run_series(3, {3: 1}, {6: 1})

3    0.584083
6    0.415917
dtype: float64

In [24]:
run_series(5, {3: .584, 6: 1-.584}, {2: 1})

2    0.588196
3    0.250198
6    0.161607
dtype: float64

In [25]:
d1 = run_series(5, run_series(3, {3: 1}, {6: 1}), {2: 1})
d1

2    0.588193
3    0.250233
6    0.161574
dtype: float64

In [26]:
d2 = run_series(5, run_series(3, {4: 1}, {5: 1}), {1: 1})
d2

1    0.658123
4    0.249387
5    0.092489
dtype: float64

In [27]:
nlcs = run_series(7, d1, d2)
nlcs

1    0.476355
2    0.215538
4    0.148402
5    0.041139
3    0.076559
6    0.042007
dtype: float64

In [28]:
{seeds[sd]: nlcs[sd] for sd in seeds}

{'LAD': 0.4763546163765622,
 'NYM': 0.21553796157918859,
 'STL': 0.07655899320357244,
 'ATL': 0.14840240079557068,
 'PHI': 0.041139250105256306,
 'SDP': 0.04200677793984979}