# Install

In [16]:
!pip install --upgrade pybaseball



# Imports

In [13]:
import os
import pandas as pd
import numpy as np
import pybaseball as pyb
from pybaseball import batting_stats

# Monte Carlo Simulation
***

- Monte Carlo are simulations evolving randomly
- Example of a marble dropping device:
  - One big circle bowl and a small square bowl
  - After a while of randomly dropping marbles into bowls, weight of big bowl / weight of small bowl is roughly pi
  - Probability of marble dropping into bowl is proportional to bowl's cross section area
  - Area of circle bowl is pi*r2, and area of square bowl is r2 which is why we get pi when dividing the two
  - We determine the area of the bowls by taking the random samples using monte carlo simulation!
- Example of finding random height of all people:
  - Measure height of small group of people
  - Make sure group is unbiased -- so we will randomly select group
  - Use a large enough sample (Law of large numbers -- average approaches true value for the more samples we have)
  - We can rely on randomly selected samples rather than measuring every single person's height
  - Eventually will approach expected value

- Can probably use Monte Carlo for lineup optimization... add batting average for each player etc and run simulations on different order on if you'll win or not

### Baseball Example: Should you pinch hit?

Lets take the scenario of being in the 9th inning, 2 outs and the Red Sox are down 1. We have two power lefty hitters lets say Rafael Devers and Triston Casas. Should we pinch hit Casas for Devers given that Spencer Strider (Braves) is pitching against them.

In [21]:
# import and pull data
import pybaseball as pyb
import pandas as pd
import numpy as np
from pybaseball import batting_stats_range
from pybaseball import pitching_stats_range
from pybaseball import schedule_and_record

In [41]:
batting = pd.read_csv('Batting.csv')
batting = batting[(batting.yearID == 2023)&(batting.teamID == 'BOS')]
batting.playerID.unique()

array(['abreuwi02', 'alfarjo01', 'arroych01', 'barraky01', 'bellobr01',
       'bernabr01', 'bleieri01', 'brasiry01', 'casastr01', 'changyu01',
       'crawfku01', 'dalbebo01', 'dermoma01', 'deverra01', 'duranja01',
       'duvalad01', 'fariaja01', 'garzaju01', 'hamilca01', 'hamilda03',
       'hernaen02', 'houckta01', 'jacqujo01', 'janseke01', 'kellyza01',
       'klubeco01', 'lametdi01', 'litteza01', 'llovema01', 'martich02',
       'mcguire01', 'murphch01', 'ortka01', 'paxtoja01', 'pivetni01',
       'rafaece01', 'refsnro01', 'reyespa01', 'roberni01', 'rodrijo04',
       'salech01', 'schrejo01', 'scottta02', 'sherrry01', 'storytr01',
       'tapiara01', 'turneju01', 'uriaslu01', 'valdeen01', 'verdual01',
       'waltebr01', 'weissza01', 'whitlga01', 'winckjo01', 'wongco01',
       'yoshima02'], dtype=object)

In [27]:
devers = batting[batting.playerID == 'deverra01']
casas = batting[batting.playerID == 'casastr01']

In [32]:
# add stats to each
devers['OBP'] = (devers['H'] + devers['BB'] + devers['HBP']) / (devers['AB'] + devers['BB'] + devers['HBP'] + devers['SF'])
casas['OBP'] = (casas['H'] + casas['BB'] + casas['HBP']) / (casas['AB'] + casas['BB'] + casas['HBP'] + casas['SF'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  devers['OBP'] = (devers['H'] + devers['BB'] + devers['HBP']) / (devers['AB'] + devers['BB'] + devers['HBP'] + devers['SF'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  casas['OBP'] = (casas['H'] + casas['BB'] + casas['HBP']) / (casas['AB'] + casas['BB'] + casas['HBP'] + casas['SF'])


In [None]:
# get pitching stats

In [43]:
pitching = pd.read_csv('Pitching.csv')
pitching = pitching[(pitching.yearID == 2023)&(pitching.teamID == 'ATL')]
pitching.playerID.unique()

array(['allarko01', 'anderni01', 'chaveje01', 'chiriyo01', 'dodddy01',
       'elderbr01', 'friedma01', 'handbr01', 'hearnta01', 'hellebe01',
       'hernada03', 'iglesra01', 'jimenjo02', 'johnspi01', 'leedy01',
       'lopezni01', 'luetglu01', 'mchugco01', 'minteaj01', 'mortoch02',
       'rodride01', 'shustja01', 'smithaj01', 'sorokmi01', 'stephja01',
       'stridsp01', 'tonkimi01', 'vinesda01', 'winanal01', 'wrighky01',
       'yateski01', 'youngda02'], dtype=object)

In [44]:
strider = pitching[pitching.playerID == 'stridsp01']
strider

Unnamed: 0,playerID,yearID,stint,teamID,lgID,W,L,G,GS,CG,...,IBB,WP,HBP,BK,BFP,GF,R,SH,SF,GIDP
44673,stridsp01,2023,1,ATL,NL,20,5,32,32,0,...,1.0,6,9.0,2,763.0,0,85,0.0,0.0,6.0


In [45]:
strider.columns

Index(['playerID', 'yearID', 'stint', 'teamID', 'lgID', 'W', 'L', 'G', 'GS',
       'CG', 'SHO', 'SV', 'IPouts', 'H', 'ER', 'HR', 'BB', 'SO', 'BAOpp',
       'ERA', 'IBB', 'WP', 'HBP', 'BK', 'BFP', 'GF', 'R', 'SH', 'SF', 'GIDP'],
      dtype='object')

In [48]:
# add stats - calc whip
strider['whip'] = (strider['BB']+strider['H']) / (strider['IPouts']/3)
strider['obp_against'] = 1 - (strider['whip'])
# whip is how many runners get on, so obp against is 1-whip or prob the batter is not getting on base against the pitcher
strider

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  strider['whip'] = (strider['BB']+strider['H']) / (strider['IPouts']/3)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  strider['obp_against'] = 1 - (strider['whip'])


Unnamed: 0,playerID,yearID,stint,teamID,lgID,W,L,G,GS,CG,...,HBP,BK,BFP,GF,R,SH,SF,GIDP,whip,obp_against
44673,stridsp01,2023,1,ATL,NL,20,5,32,32,0,...,9.0,2,763.0,0,85,0.0,0.0,6.0,1.092857,-0.092857


In [49]:
def at_bat(batter_obp, pitcher_obp_against):
  """Simulate an at-bat and return True if the runner gets on base and False otherwise"""
  # adjust obp based on the pitcher's stat
  # standardize metrics?
  # batter_obp - pitcher_obp is the difference between on based allowed, tells us if the batter is better or worse at getting on base compared to the pitchers ability to prevent it
  # adjust by 0.5, can be adjusted but this is a scaling factor
  adjusted_obp = batter_obp - (batter_obp - pitcher_obp_against) * 0.5

  # generate random number and compare to adjusted obp
  if np.random.rand() < adjusted_obp:
    return 1 # batter gets on
  else:
    return 0 # batter does not get on



In [50]:
def simulate(trials, batter_obp, pitcher_obp_against):
  on_base_count = 0
  for i in range(trials):
    on_base_count += at_bat(batter_obp, pitcher_obp_against)
  # prob simulation batter gets on base
  return on_base_count/trials

In [58]:
devers_on_base = simulate(10000, devers.OBP.iloc[0], strider.obp_against.iloc[0])
casas_on_base = simulate(10000, casas.OBP.iloc[0], strider.obp_against.iloc[0])
print('probability Devers gets on base:', devers_on_base*100)
print('probability Casas gets on base:', casas_on_base*100)

probability Devers gets on base: 12.479999999999999
probability Casas gets on base: 13.459999999999999


In [53]:
# defense
# ops (current and pinch)
# ba (current and pinch)
# ba with bases loaded
# pitcher era
# pitcher whip
# pitcher history with batter
# types of pitches