In [1]:
import pybaseball as bb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
start_dt = '2021-04-01'
end_dt = '2021-10-3'

In [3]:
def on_base_count(row: pd.Series) -> int:
    return sum(np.isfinite([row.on_1b, row.on_2b, row.on_3b]))

def generate(player_id: np.float64) -> pd.DataFrame:
    data = bb.statcast_pitcher(start_dt, end_dt, player_id)
    data = data[['player_name', 'batter', 'game_date', 'at_bat_number', 
                 'balls', 'strikes', 'pitch_name', 'on_3b', 'on_2b', 'on_1b']]
    data['on_base_count'] = data.apply(lambda row: on_base_count(row), axis=1)
    return data

def get_data(first: str, last: str) -> pd.DataFrame:
    l = bb.playerid_lookup(last, first)
    
    return generate(l['key_mlbam'][0])

In [4]:
def count_transitions(count: list) -> np.array:
    it = reversed(count)
    prev = next(it)
    out = np.zeros((4,4))
    for item in it:
        out[prev][item] += 1
        prev = item
    return out

def limiting_distribution(count: pd.Series, num_simulations: int = 100000) -> np.array:
    transitions = count_transitions(count)   
    transitions_sum = transitions.sum(axis=1)
    transitions_sum = np.array([value if value else 1.0 for value in transitions_sum])
    probs = transitions / transitions_sum[:, np.newaxis]
    initial_state = [1, 0, 0, 0]
    
    return simulate(initial_state, probs, num_simulations)

def simulate(state, pi, i):
    for _ in range(i):
        state = np.dot(state, pi)
    return state

class Player:
    
    def __init__(self, first, last):
        self.first = first
        self.last = last
        self.data = get_data(self.first, self.last)
        self.pi = limiting_distribution(self.data['on_base_count'], num_simulations = 100000)

In [5]:
woodruff = Player('brandon', 'woodruff')
bauer = Player('trevor', 'bauer')
scherzer = Player('max', 'scherzer')
lyles = Player('jordan', 'lyles')
gant = Player('john', 'gant')

Gathering player lookup table. This may take a moment.
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data
Gathering Player Data


In [6]:
pitchers = [
    woodruff,
    bauer,
    scherzer, 
    lyles, 
    gant
]

In [7]:
[pitcher.pi for pitcher in pitchers]

[array([0.70719844, 0.2266537 , 0.06614786, 0.        ]),
 array([0.71132376, 0.20334928, 0.07097289, 0.01435407]),
 array([0.6917363 , 0.24419684, 0.0371402 , 0.02692665]),
 array([0.54326396, 0.30996714, 0.13253012, 0.01423877]),
 array([0.51341015, 0.29952556, 0.13826491, 0.04879938])]