In [34]:
import pandas as pd
import pybettor

In [33]:
# gives a base for two teams
def win_prob(home_wp, away_wp):
    # free throw - win/win, loss/loss, win/loss, loss/win
    home_prob = home_wp * (1 - away_wp)
    away_prob = away_wp * (1 - home_wp)

    # normalize
    sum_of_pct = home_prob + away_prob
    normalized_home = round(float(home_prob / sum_of_pct), 3)
    #normalized_away = round(float(away_prob / sum_of_pct), 3)

    # return home probability over away probability
    return normalized_home

In [25]:
win_prob((51/98), 49/99)

0.525

In [26]:
teams = {'Washington Nationals': 'WSH',
          'Toronto Blue Jays': 'TOR',
          'Texas Rangers': 'TEX', 
          'Tampa Bay Rays': 'TBR',
          'St. Louis Cardinals': 'STL',
          'Seattle Mariners': 'SEA', 
          'San Diego Padres': 'SDP',
          'San Francisco Giants': 'SFG',
          'Pittsburgh Pirates': 'PIT',
          'Philadelphia Phillies': 'PHI',
          'Oakland Athletics': 'OAK',
          'New York Mets': 'NYM',
          'New York Yankees': 'NYY',
          'Minnesota Twins': 'MIN',
          'Milwaukee Brewers': 'MIL', 
          'Miami Marlins': 'MIA', 
          'Los Angeles Angels': 'LAA',
          'Los Angeles Dodgers': 'LAD',
          'Kansas City Royals': 'KCR',
          'Houston Astros': 'HOU',
          'Detroit Tigers': 'DET',
          'Colorado Rockies': 'COL', 
          'Cleveland Guardians': 'CLE',
          'Cincinnati Reds': 'CIN',
          'Chicago White Sox': 'CWS',
          'Chicago Cubs': 'CHC',
          'Boston Red Sox': 'BOS',
          'Baltimore Orioles': 'BAL',
          'Atlanta Braves': 'ATL',
          'Arizona Diamondbacks': 'ARI'}

print(len(teams))

30


In [27]:
# we are going to be expecting a csv with standings and:
# team name, wins, losses, pythag. win/loss
# Tm,W,L,pythWL

dtype = {"Tm": str, "W": int, "L" : int, "pythWL": str}
standings = pd.read_csv('standings_20240726.csv', dtype=dtype)

standings['tm'] = standings.apply(lambda row: teams[row.Tm], axis=1)
standings['xW'] = standings.apply(lambda row: row.pythWL[:2], axis=1)
standings['xL'] = standings.apply(lambda row: row.pythWL[-2:], axis=1)
standings['GP'] = standings.apply(lambda row: row.W + row.L, axis=1)
standings['wp'] = standings.apply(lambda row: round(float(row.W / row.GP), 3), axis=1)
standings['xWP'] = standings.apply(lambda row: round(float(int(row.xW) / row.GP), 3), axis=1)
standings = standings[['tm', 'GP', 'W', 'L', 'wp', 'xW', 'xL', 'xWP', 'ERA']]
standings.head(10)

Unnamed: 0,tm,GP,W,L,wp,xW,xL,xWP,ERA
0,ARI,103,53,50,0.515,54,49,0.524,4.5
1,ATL,101,54,47,0.535,56,45,0.554,3.49
2,BAL,102,61,41,0.598,60,42,0.588,3.73
3,BOS,101,54,47,0.535,53,48,0.525,3.8
4,CHC,104,49,55,0.471,51,53,0.49,3.7
5,CWS,105,27,78,0.257,31,74,0.295,4.61
6,CIN,102,49,53,0.48,55,47,0.539,3.86
7,CLE,102,61,41,0.598,59,43,0.578,3.65
8,COL,103,38,65,0.369,38,65,0.369,5.5
9,DET,104,51,53,0.49,52,52,0.5,3.89


In [28]:
def calc_pitcher_rd(pitcher_era, team_era, pitcher_ippg=5.33):
    if pitcher_era == "na":
        return 0
    
    team_ra_per_inning = team_era / 9
    pitcher_ra_per_inning = float(pitcher_era) / 9
    team_exp_ra = pitcher_ippg * team_ra_per_inning
    pitcher_exp_ra = pitcher_ippg * pitcher_ra_per_inning
    diff = pitcher_exp_ra - team_exp_ra

    return diff # negative is good, positive bad

In [29]:
calc_pitcher_rd(5.97, 3.89)

1.2318222222222222

In [30]:
xwp_dict = dict(zip(standings['tm'], standings['xWP']))
era_dict = dict(zip(standings['tm'], standings['ERA']))


games = pd.read_csv('games_20240727.csv')
games['away_xWP'] = games.apply(lambda row: xwp_dict[row.away], axis=1)
games['home_xWP'] = games.apply(lambda row: xwp_dict[row.home], axis=1)
games['homeProb'] = games.apply(lambda row: round(win_prob(row.home_xWP, row.away_xWP) + 0.025, 3), axis=1)
games['awayProb'] = games.apply(lambda row: (1 - row.homeProb), axis=1)
games['awayFDImplied'] = games.apply(lambda row: round(pybettor.implied_prob(row.fd_away, category="us")[0], 3), axis=1)
games['homeFDImplied'] = games.apply(lambda row: round(pybettor.implied_prob(row.fd_home, category="us")[0], 3), axis=1)
games['awayAdvTVvFD'] = games.apply(lambda row: row.awayProb - row.awayFDImplied, axis=1)
games['homeAdvTVvFD'] = games.apply(lambda row: row.homeProb - row.homeFDImplied, axis=1)
games['awayAdvFGvFD'] = games.apply(lambda row: row.fg_wp_away - row.awayFDImplied, axis=1)
games['homeAdvFGvFD'] = games.apply(lambda row: row.fg_wp_home - row.homeFDImplied, axis=1)
games['away_pitcher_rd'] = games.apply(lambda row: calc_pitcher_rd(row.away_sp_era, era_dict[row.away]), axis=1)
games['home_pitcher_rd'] = games.apply(lambda row: calc_pitcher_rd(row.home_sp_era, era_dict[row.home]), axis=1)

games

Unnamed: 0,away,home,fd_away,fd_home,fg_wp_away,fg_wp_home,away_sp,home_sp,away_sp_era,home_sp_era,...,homeProb,awayProb,awayFDImplied,homeFDImplied,awayAdvTVvFD,homeAdvTVvFD,awayAdvFGvFD,homeAdvFGvFD,away_pitcher_rd,home_pitcher_rd
0,MIN,DET,-112,-104,0.486,0.514,Ryan,Skubai,3.65,2.34,...,0.48,0.52,0.528,0.51,-0.008,-0.03,-0.042,0.004,-0.272422,-0.917944
1,CLE,PHI,126,-148,0.411,0.589,Carrasco,Phillips,5.32,2.81,...,0.556,0.444,0.442,0.597,0.002,-0.041,-0.031,-0.008,0.989011,-0.384944
2,CIN,TBR,108,-126,0.433,0.567,Abbot,Littell,3.19,4.46,...,0.434,0.566,0.481,0.558,0.085,-0.124,-0.048,0.009,-0.396789,0.171744
3,SDP,BAL,-112,-104,0.479,0.521,King,Kremer,3.28,4.43,...,0.581,0.419,0.528,0.51,-0.109,0.071,-0.049,0.011,-0.444167,0.414556
4,TEX,TOR,112,-132,0.424,0.576,Lorenzen,Gausman,3.53,4.55,...,0.442,0.558,0.472,0.569,0.086,-0.127,-0.048,0.007,-0.260578,0.005922
5,ATL,NYM,102,-120,0.491,0.509,Schwellenbach,Megill,4.62,5.08,...,0.491,0.509,0.495,0.545,0.014,-0.054,-0.004,-0.036,0.669211,0.550767
6,NYY,BOS,102,-120,0.46,0.54,Stroman,Crawford,3.51,3.37,...,0.453,0.547,0.495,0.545,0.052,-0.092,-0.035,-0.005,-0.059222,-0.254656
7,MIA,MIL,136,-162,0.442,0.558,Meyer,Civale,2.12,5.0,...,0.751,0.249,0.424,0.618,-0.175,0.133,0.018,-0.06,-1.409489,0.758044
8,SEA,CWS,-138,118,0.583,0.417,Woo,Fedde,2.54,2.98,...,0.312,0.688,0.58,0.459,0.108,-0.147,0.003,-0.042,-0.515233,-0.965322
9,LAD,HOU,128,-152,0.452,0.548,Wrobleski,Blanco,4.4,2.75,...,0.477,0.523,0.439,0.603,0.084,-0.126,0.013,-0.055,0.379022,-0.752122


In [31]:
games_diff = games[['away', 'home', 'awayAdvTVvFD', 'homeAdvTVvFD', 'awayAdvFGvFD', 'homeAdvFGvFD', 'away_pitcher_rd', 'home_pitcher_rd' ]]
games_diff = games_diff.round(3)
games_diff

Unnamed: 0,away,home,awayAdvTVvFD,homeAdvTVvFD,awayAdvFGvFD,homeAdvFGvFD,away_pitcher_rd,home_pitcher_rd
0,MIN,DET,-0.008,-0.03,-0.042,0.004,-0.272,-0.918
1,CLE,PHI,0.002,-0.041,-0.031,-0.008,0.989,-0.385
2,CIN,TBR,0.085,-0.124,-0.048,0.009,-0.397,0.172
3,SDP,BAL,-0.109,0.071,-0.049,0.011,-0.444,0.415
4,TEX,TOR,0.086,-0.127,-0.048,0.007,-0.261,0.006
5,ATL,NYM,0.014,-0.054,-0.004,-0.036,0.669,0.551
6,NYY,BOS,0.052,-0.092,-0.035,-0.005,-0.059,-0.255
7,MIA,MIL,-0.175,0.133,0.018,-0.06,-1.409,0.758
8,SEA,CWS,0.108,-0.147,0.003,-0.042,-0.515,-0.965
9,LAD,HOU,0.084,-0.126,0.013,-0.055,0.379,-0.752


In [10]:
odds_diff_dict = {}

for index, row in games_diff.iterrows():
    odds_diff_dict[row['away']] = round(row['awayAdvFD'], 3)
    odds_diff_dict[row['home']] = round(row['homeAdvFD'], 3)

odds_diff_dict

odds_series = pd.Series(odds_diff_dict)
odds_series.sort_values(ascending=False)

KeyError: 'awayAdvFD'

<h1> section 2 </h1>

In [48]:
bets = pd.read_csv('bets.csv')
bets

Unnamed: 0,date,bet,odds,result,stake,pl,reason
0,20240719,KCR - 1.5,110,H,8.0,8.80,vibes
1,20240719,LAD ML,190,H,5.1,9.69,vibes
2,20240720,NYY ML,-166,M,7.0,-7.00,research
3,20240720,ATL ML,-142,H,8.0,5.63,research
4,20240720,DET ML,102,H,8.0,8.16,research
...,...,...,...,...,...,...,...
86,20240728,ATL ML,-104,H,9.0,8.65,chute
87,20240728,MIA/MIL U8,-105,W,2.0,0.00,research
88,20240728,SFG -1.5,116,M,2.0,-2.00,research
89,20240728,OAK ML,116,M,2.0,-2.00,research


In [44]:
bets['fav'] = bets.apply(lambda row: True if row.odds > 0 else False, axis=1)

In [None]:
pip install MLB-StatsAPIzzz