In [54]:
import pandas as pd
import numpy as np
from scipy import stats as sci
import random as rand
import os
import re
import requests
from bs4 import BeautifulSoup


In [45]:
teams = {}
historicals = os.path.join(os.curdir,'stats/')
for x in os.listdir(historicals):
    name = x[:3]
    teams[name] = pd.read_csv(os.path.join(historicals,x))

# teams['nyy'] = pd.read_csv('~/dev/sb/stats/nyy_2022.csv')
# teams['bos'] = pd.read_csv('~/dev/sb/stats/bos_2022.csv')

In [46]:
stats = {}
for team in teams.keys():
    stats[team] = {}
    stats[team]['avg_runs'] = np.average(teams[team]['R'])
    stats[team]['avg_runs_allowed'] = np.average(teams[team]['RA'])
    stats[team]['stdev_runs'] = np.std(teams[team]['R'])

In [47]:
odds = {}
#calculate odds against every team, cartesian product against "team"
for team in teams.keys():
    odds[team] = {} # each opponent has their own entry
    for opponent in [x for x in teams.keys() if x != team]: # exclude current team
        odds[team][opponent] = {} # each team x opponent matchup has their own set of odds
        odds[team][opponent]['adj_runs'] = np.sqrt(stats[team]['avg_runs']*stats[opponent]['avg_runs_allowed'])

In [48]:
#create simulation
n = 100000 # number of iterations
for team in teams.keys():
    for opponent in [x for x in teams.keys() if x != team]: # exclude current team
        results = []
        for i in range(n):
            results.append(sci.norm.ppf(rand.random(), loc=odds[team][opponent]['adj_runs'], scale=stats[team]['stdev_runs'])) #loc and scale are mean and stdev, respectively
        odds[team][opponent]['predicted_runs'] = sum(results)/len(results)

In [44]:
odds

{'nyy_2022.csv': {'bos_2022.csv': {'adj_runs': 4.919365816503159,
   'predicted_runs': 4.918742002284137},
  'tor_2022.csv': {'adj_runs': 4.5693733030098125,
   'predicted_runs': 4.575581666294908}},
 'bos_2022.csv': {'nyy_2022.csv': {'adj_runs': 3.9849252978731773,
   'predicted_runs': 3.989453102669121},
  'tor_2022.csv': {'adj_runs': 4.3607736232814345,
   'predicted_runs': 4.363067260679895}},
 'tor_2022.csv': {'nyy_2022.csv': {'adj_runs': 4.091922184071176,
   'predicted_runs': 4.086986237425229},
  'bos_2022.csv': {'adj_runs': 4.8208453844738575,
   'predicted_runs': 4.816842995249697}}}

In [53]:
for team in odds.keys():
    for opp in odds[team].keys():
        print(f"""{team} vs {opp} yields average runs {odds[team][opp]['predicted_runs']:,.2f}\
 and {odds[opp][team]['predicted_runs']:,.2f} runs allowed for a differential of\
 {odds[team][opp]['predicted_runs'] - odds[opp][team]['predicted_runs']:,.2f} """)


nyy vs bos yields average runs 4.92 and 4.01 runs allowed for a differential of 0.92 
nyy vs tor yields average runs 4.58 and 4.08 runs allowed for a differential of 0.50 
bos vs nyy yields average runs 4.01 and 4.92 runs allowed for a differential of -0.92 
bos vs tor yields average runs 4.36 and 4.83 runs allowed for a differential of -0.47 
tor vs nyy yields average runs 4.08 and 4.58 runs allowed for a differential of -0.50 
tor vs bos yields average runs 4.83 and 4.36 runs allowed for a differential of 0.47 
