In [2]:
import pandas as pd
import numpy as np
from scipy import stats as sci
from helpers import get_teams
import random as rand
import os
import pprint 

In [3]:
year = '2023'
league = 'mlb'
teams = get_teams.get_team_names(league)
historicals_path = os.path.join(os.curdir,'data')
team_results = pd.read_csv(f'{historicals_path}/{year}_{league}_results.csv')
teams = team_results.Tm.unique()
# for name in teams:
#     try:
#         team_results[name] = pd.read_csv(f'{historicals_path}/{year}_{league}_results.csv')
#     except FileNotFoundError:
#         pass

# teams['nyy'] = pd.read_csv('~/dev/sb/stats/nyy_2022.csv')
# teams['bos'] = pd.read_csv('~/dev/sb/stats/bos_2022.csv')

In [4]:
stats = {}
for team in teams:
    stats[team] = {}
    stats[team]['avg_runs'] = np.average(team_results.loc[team_results.Tm == team,:]['R'])
    stats[team]['avg_runs_allowed'] = np.average(team_results.loc[team_results.Tm == team,:]['RA'])
    stats[team]['stdev_runs'] = np.std(team_results.loc[team_results.Tm == team,:]['R'])

In [5]:
odds = {}
#calculate odds against every team, cartesian product against "team"
for team in teams:
    odds[team] = {} # each opponent has their own entry
    for opponent in [x for x in teams if x != team]: # exclude current team
        odds[team][opponent] = {} # each team x opponent matchup has their own set of odds
        odds[team][opponent]['adj_runs'] = np.sqrt(stats[team]['avg_runs']*stats[opponent]['avg_runs_allowed'])

In [23]:
#create simulation
n = 5 # number of iterations
for team in teams:
    for opponent in [x for x in teams if x != team]: # exclude current team
        results = []
        for i in range(n):
            results.append(sci.norm.ppf(rand.random(), loc=odds[team][opponent]['adj_runs'], scale=stats[team]['stdev_runs'])) #loc and scale are mean and stdev, respectively
        odds[team][opponent]['predicted_runs'] = np.array(results)

In [24]:
for team in teams:
    for opponent in [x for x in teams if x != team]: # exclude current team
        odds[team][opponent]['predicted_differential'] = odds[team][opponent]['predicted_runs'] - odds[opponent][team]['predicted_runs']
        odds[team][opponent]['predicted_outcome'] = np.where(odds[team][opponent]['predicted_differential'] > 0,1,0)
        
display(odds)
# reform = {(outerKey, innerKey): values for outerKey, innerDict in odds.items() for innerKey, values in innerDict.items()}
# print(reform)
# display(pd.DataFrame(reform).T)


{'ARI': {'ATL': {'adj_runs': 4.511395586589406,
   'predicted_runs': array([ 4.87567571,  4.95297622,  2.34464819,  3.30385879, -0.89756764]),
   'predicted_differential': array([ 1.28812419,  0.27744382,  0.55762642, -1.48288153, -7.22734832]),
   'predicted_outcome': array([1, 1, 1, 0, 0])},
  'BAL': {'adj_runs': 4.390047771153893,
   'predicted_runs': array([1.86568242, 4.49878843, 3.73369619, 3.72914239, 1.44718621]),
   'predicted_differential': array([-3.9677027 ,  1.92720323, -2.50838137, -2.73391084,  5.70622286]),
   'predicted_outcome': array([0, 1, 0, 0, 1])},
  'BOS': {'adj_runs': 4.696618233568211,
   'predicted_runs': array([ 4.37101439,  2.2665537 ,  7.65682151, -0.23526636,  4.41567734]),
   'predicted_differential': array([-1.04991375, -1.40977223, -1.41487217, -7.37680807, -7.28828544]),
   'predicted_outcome': array([0, 0, 0, 0, 0])},
  'CHC': {'adj_runs': 4.533394859584098,
   'predicted_runs': array([1.60656647, 6.16433162, 5.2298464 , 2.7375126 , 4.99121968]),
   

In [25]:
type(odds)

dict

In [53]:
for team in odds.keys():
    for opp in odds[team].keys():
        print(f"""{team} vs {opp} yields average runs {odds[team][opp]['predicted_runs']:,.2f}\
 and {odds[opp][team]['predicted_runs']:,.2f} runs allowed for a differential of\
 {odds[team][opp]['predicted_runs'] - odds[opp][team]['predicted_runs']:,.2f} """)


nyy vs bos yields average runs 4.92 and 4.01 runs allowed for a differential of 0.92 
nyy vs tor yields average runs 4.58 and 4.08 runs allowed for a differential of 0.50 
bos vs nyy yields average runs 4.01 and 4.92 runs allowed for a differential of -0.92 
bos vs tor yields average runs 4.36 and 4.83 runs allowed for a differential of -0.47 
tor vs nyy yields average runs 4.08 and 4.58 runs allowed for a differential of -0.50 
tor vs bos yields average runs 4.83 and 4.36 runs allowed for a differential of 0.47 


In [20]:
list1 = [1,2,3,4]
list2 = [4,3,2,1]
np1 = np.array(list1)
np2 = np.array(list2)
np1 - np2

array([-3, -1,  1,  3])