In [314]:
import pandas as pd
import numpy as np
from scipy import stats as sci
from helpers import get_teams
import os
import pprint 

In [315]:
year = '2023'
league = 'mlb'
teams = get_teams.get_team_names(league)
historicals_path = os.path.join(os.curdir,'data')
team_results = pd.read_csv(f'{historicals_path}/{year}_{league}_results.csv')
teams = team_results.Tm.unique()

In [316]:
teams_df = team_results.groupby('Tm')[['R','RA']].agg(avg_runs=('R','mean'),avg_runs_allowed=('RA','mean'),stdev_runs=('R','std'))
teams_df.reset_index(inplace=True)
teams_ct = teams_df.merge(teams_df,how='cross',suffixes=('_tm','_opp'))
teams_ct.rename(columns={'Tm_tm':'Tm','Tm_opp':'Opp'},inplace=True)
teams_gp = teams_ct.groupby(['Tm','Opp']).max()
teams_gp.reset_index(inplace=True)
teams_gp['adj_runs_tm'] = np.sqrt(teams_gp.avg_runs_tm*teams_gp.avg_runs_allowed_opp)
# display(teams_gp)

In [317]:
def rand_results(rand=None,avg_runs=None,stdev_runs=None):
    return max(round(sci.norm.ppf(rand, loc=avg_runs, scale=stdev_runs),2),0)

def sim_results(n=10000,avg_runs=None,stdev_runs=None):
    empty_arr = np.random.random(size=n).tolist()
    # empty_arr = np.random.random(size=n)
    rand_ = np.vectorize(rand_results)
    results = rand_(empty_arr,avg_runs,stdev_runs)
    # results = [rand_results(x,avg_runs,stdev_runs) for x in empty_arr]
    return results

teams_gp['sim_results'] = teams_gp.apply(lambda x: sim_results(avg_runs=x.avg_runs_tm,stdev_runs=x.stdev_runs_tm),axis=1)


In [319]:
def opposing_runs(team,opp):
    # print(team,opp)
    team_runs = teams_gp[(teams_gp.Tm == team) & (teams_gp.Opp == opp)]['sim_results'].values 
    opp_runs = teams_gp[(teams_gp.Tm == opp) & (teams_gp.Opp == team)]['sim_results'].values
    # print(type(opp_runs))
    # print(team_runs)
    output = np.subtract(team_runs,opp_runs).ravel()
    # print(output)
    return output

teams_gp['sim_differential'] = teams_gp.apply(lambda x: opposing_runs(x.Tm, x.Opp), axis=1)


In [320]:
def unravel(layer1):
    for a1 in layer1:
        return a1
            # print(type(a2))
            # return a2

def win_loss(arr):
    return np.where(arr>0,1,0)

def win_loss_ratio(arr):
    return np.sum(arr) / arr.shape[0]

teams_gp['sim_differential'] = teams_gp.apply(lambda x: unravel(x.sim_differential), axis=1)
teams_gp['win_loss'] = teams_gp.apply(lambda x: win_loss(x.sim_differential), axis=1)
teams_gp['w_l_ratio'] = teams_gp.apply(lambda x: win_loss_ratio(x.win_loss), axis=1)

In [321]:
teams_gp

Unnamed: 0,Tm,Opp,avg_runs_tm,avg_runs_allowed_tm,stdev_runs_tm,avg_runs_opp,avg_runs_allowed_opp,stdev_runs_opp,adj_runs_tm,sim_results,sim_differential,win_loss,w_l_ratio
0,ARI,ARI,4.604938,4.697531,2.993488,4.604938,4.697531,2.993488,4.651004,"[3.54, 8.89, 3.14, 3.72, 4.45, 2.32, 0.0, 0.79...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0000
1,ARI,ATL,4.604938,4.697531,2.993488,5.845679,4.419753,3.313010,4.511396,"[4.01, 7.97, 4.48, 5.23, 6.98, 4.71, 6.25, 8.9...","[-4.65, 2.2399999999999993, -3.21, -5.43999999...","[0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, ...",0.3873
2,ARI,BAL,4.604938,4.697531,2.993488,4.981481,4.185185,3.299675,4.390048,"[1.08, 7.68, 0.26, 1.32, 6.53, 2.57, 5.98, 4.6...","[-3.87, -1.25, -3.37, -1.0799999999999998, 6.5...","[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, ...",0.4681
3,ARI,BOS,4.604938,4.697531,2.993488,4.765432,4.790123,3.240932,4.696618,"[4.75, 6.37, 6.4, 5.01, 5.0, 5.07, 4.67, 7.87,...","[-0.08999999999999986, 4.33, 1.110000000000000...","[0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, ...",0.4889
4,ARI,CHC,4.604938,4.697531,2.993488,5.055556,4.462963,3.647938,4.533395,"[0, 7, 5, 2, 3, 0, 2, 4, 0, 2, 3, 0, 7, 4, 3, ...","[-4.28, -0.5499999999999998, 2.26, -3.15000000...","[0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, ...",0.4287
...,...,...,...,...,...,...,...,...,...,...,...,...,...
836,TOR,SFG,4.604938,4.141975,3.073344,4.160494,4.438272,3.275950,4.520837,"[5.1, 0.0, 3.99, 5.97, 6.33, 1.9, 2.97, 7.33, ...","[-2.0700000000000003, -6.72, -2.25, 2.32999999...","[0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, ...",0.5372
837,TOR,STL,4.604938,4.141975,3.073344,4.438272,5.117284,3.341702,4.854356,"[0.02, 8.16, 0.72, 8.09, 3.74, 3.01, 2.21, 2.9...","[-0.84, 4.4399999999999995, -3.970000000000000...","[0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, ...",0.5092
838,TOR,TBR,4.604938,4.141975,3.073344,5.308642,4.104938,3.319964,4.347757,"[8.72, 4.83, 0.0, 1.9, 6.57, 5.42, 4.29, 1.25,...","[1.0200000000000005, -2.5999999999999996, -2.7...","[1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, ...",0.4292
839,TOR,TEX,4.604938,4.141975,3.073344,5.438272,4.419753,3.875294,4.511396,"[8.3, 0.0, 3.5, 4.15, 0.0, 1.1, 7.04, 5.28, 6....","[2.030000000000001, -7.24, -6.369999999999999,...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...",0.4324


In [None]:
odds = {}
#calculate odds against every team, cartesian product against "team"
for team in teams:
    odds[team] = {} # each opponent has their own entry
    for opponent in [x for x in teams if x != team]: # exclude current team
        odds[team][opponent] = {} # each team x opponent matchup has their own set of odds
        odds[team][opponent]['adj_runs'] = np.sqrt(stats[team]['avg_runs']*stats[opponent]['avg_runs_allowed'])