In [48]:
from numpy.random import binomial
import pandas as pd
import numpy as np
from pathlib import Path

In [49]:
interim = Path('../../data/interim')
events = pd.read_pickle(Path(interim) / 'events.pkl')

states_data = events.loc[
    events.EVENT_CD.isin([2, 3, 14, 20, 21, 22, 23]), # 15, 16
    ['EVENT_CD', 'BASE1_RUN_ID', 'BASE2_RUN_ID', 
    'BASE3_RUN_ID', 'BAT_DEST_ID', 'RUN1_DEST_ID', 'RUN2_DEST_ID', 
    'RUN3_DEST_ID', 'OUTS_CT', 'EVENT_OUTS_CT', 'EVENT_RUNS_CT']
]

states_data['1b'] = np.where(states_data['BASE1_RUN_ID'].isna(), 0, 1)
states_data['2b'] = np.where(states_data['BASE2_RUN_ID'].isna(), 0, 1)
states_data['3b'] = np.where(states_data['BASE3_RUN_ID'].isna(), 0, 1)

states_data['1b_new'] = np.where(
    (states_data['BAT_DEST_ID'] == 1) |
    (states_data['RUN1_DEST_ID'] == 1) |
    (states_data['RUN2_DEST_ID'] == 1) |
    (states_data['RUN3_DEST_ID'] == 1),
    1, 0
)

states_data['2b_new'] = np.where(
    (states_data['BAT_DEST_ID'] == 2) |
    (states_data['RUN1_DEST_ID'] == 2) |
    (states_data['RUN2_DEST_ID'] == 2) |
    (states_data['RUN3_DEST_ID'] == 2),
    1, 0
)

states_data['3b_new'] = np.where(
    (states_data['BAT_DEST_ID'] == 3) |
    (states_data['RUN1_DEST_ID'] == 3) |
    (states_data['RUN2_DEST_ID'] == 3) |
    (states_data['RUN3_DEST_ID'] == 3),
    1, 0
)

states_data = states_data.rename(columns={'OUTS_CT': 'outs', 'EVENT_RUNS_CT': 'runs'})

states_data['outs_new'] = states_data['outs'] + states_data['EVENT_OUTS_CT']

states_data = states_data.drop([
    'EVENT_OUTS_CT', 'BASE1_RUN_ID', 'BASE2_RUN_ID', 'BASE3_RUN_ID',
    'BAT_DEST_ID', 'RUN1_DEST_ID', 'RUN2_DEST_ID', 'RUN3_DEST_ID'
], axis = 1)

states_data['freq'] = 1

In [50]:
transition_counts = states_data.groupby(['EVENT_CD', 'outs', '1b', '2b', '3b', '1b_new', '2b_new', '3b_new', 'outs_new']).agg({'runs':'mean', 'freq': 'sum'})
transition_counts.runs = transition_counts.runs.round(1)

transition_counts['totals'] = transition_counts.groupby(['EVENT_CD', 'outs', '1b', '2b', '3b'])['freq'].transform('sum')
transition_counts['frac'] = transition_counts['freq'] / transition_counts['totals']

transition_counts = transition_counts.loc[transition_counts.frac >= .05]

transition_counts['totals'] = transition_counts.groupby(['EVENT_CD', 'outs', '1b', '2b', '3b'])['freq'].transform('sum')
transition_counts['frac'] = transition_counts['freq'] / transition_counts['totals']

transition_counts = transition_counts.reset_index()
transition_counts['bases'] = transition_counts['1b'] + transition_counts['2b']*2 + transition_counts['3b']*4
transition_counts['bases_new'] = transition_counts['1b_new'] + transition_counts['2b_new']*2 + transition_counts['3b_new']*4
transition_counts = transition_counts.set_index(['outs', 'bases', 'outs_new', 'bases_new'])
transition_counts = transition_counts.sort_values(['outs', 'bases', 'outs_new', 'bases_new', 'EVENT_CD'])

In [51]:
event_odds = states_data.groupby('EVENT_CD').size().to_frame()
event_odds.columns = ['count']
event_odds['total'] = event_odds['count'].sum()
event_odds['prob'] = event_odds['count'] / event_odds['total']
event_odds = event_odds.drop(['count', 'total'], axis = 1)
event_odds

Unnamed: 0_level_0,prob
EVENT_CD,Unnamed: 1_level_1
2,0.530417
3,0.148806
14,0.080918
20,0.167842
21,0.042683
22,0.006709
23,0.022625


In [52]:
transition_counts = transition_counts.reset_index().merge(event_odds, on='EVENT_CD').set_index(['outs', 'bases', 'outs_new', 'bases_new'])
transition_counts['weight'] = transition_counts['frac'] * transition_counts['prob']
transition_probs = transition_counts.groupby(['outs', 'bases', 'outs_new', 'bases_new'])['weight'].sum().to_frame()
rewards = transition_counts.groupby(['outs', 'bases', 'outs_new', 'bases_new'])['runs'].mean().to_frame()

In [53]:
outs_mat = [0, 1, 2]
bases_mat = [0, 1, 2, 3, 4, 5, 6, 7]
mind = pd.MultiIndex.from_product([outs_mat, bases_mat, outs_mat, bases_mat], names = ['outs', 'bases', 'outs_new', 'bases_new'])

In [54]:
transition_probs = transition_probs.reindex(mind, fill_value=0)
rewards = rewards.reindex(mind, fill_value=0)

In [55]:
transition_probs = transition_probs.reset_index().pivot(index=['outs', 'bases'], columns=['outs_new', 'bases_new'], values='weight')

In [56]:
rewards = rewards.reset_index().pivot(index=['outs', 'bases'], columns=['outs_new', 'bases_new'], values='runs')

In [57]:
P = transition_probs.to_numpy()
R = rewards.to_numpy()
I = np.identity(P.shape[0])

In [58]:
Q = np.sum(R * P, axis = 1).reshape(24,1)

In [59]:
v = np.linalg.solve((I-P), Q)

In [60]:
v

array([[0.47153284],
       [0.85533331],
       [1.1044428 ],
       [1.48799641],
       [1.30835432],
       [1.7201353 ],
       [1.94039347],
       [2.28017954],
       [0.24962086],
       [0.49157099],
       [0.66964199],
       [0.91006201],
       [0.91598169],
       [1.13448353],
       [1.36671354],
       [1.54513487],
       [0.09472234],
       [0.21419276],
       [0.33464004],
       [0.46222951],
       [0.35602184],
       [0.47915838],
       [0.59627044],
       [0.77760919]])

In [69]:
events[events.EVENT_CD.isin([2, 3, 14, 20, 21, 22, 23])].EVENT_RUNS_CT.sum()

1455709

In [68]:
events[events.EVENT_CD.isin([2, 3, 14, 20, 21, 22, 23])].groupby(['GAME_ID', 'BAT_HOME_ID', 'INN_CT']).size()

GAME_ID       BAT_HOME_ID  INN_CT
ANA199704020  0            1         5
                           2         4
                           3         5
                           4         3
                           5         3
                                    ..
WS2197109300  1            4         4
                           5         4
                           6         7
                           7         4
                           8         3
Length: 3068406, dtype: int64

In [70]:
print(1455709 / 3068406)

0.4744186395151098
