use this notebook to see today's bet at the beginning of a day

In [1]:
import pandas as pd, numpy as np
import statsapi
from statsapi import player_stat_data
import requests
from datetime import datetime, timedelta
import numpy as np
import math
import meteostat
import pprint
import pickle
import copy

import pycaret
from pycaret import classification
import model.common
import importlib

In [2]:
from static_data.load_static_data import *

In [79]:
date_today_str = datetime.today().strftime("%Y-%m-%d")
date_yesterday_str = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
date_today_str, date_yesterday_str

('2023-06-01', '2023-05-31')

In [4]:
importlib.reload(model.common)

<module 'model.common' from '/home/junlim/projects/mlb-props/model/common.py'>

In [30]:
collect_data_Base_dir = 'collect_data'
df_game_matchup_total = pd.read_pickle(f'{collect_data_Base_dir}/df_game_matchup_total.pkl')
df_game_matchup_2023 = pd.read_pickle(f'{collect_data_Base_dir}/df_game_matchup_2023.pkl')
print(f'df_game_matchup_total: {len(df_game_matchup_total)}, df_game_matchup_2023: {len(df_game_matchup_2023)}')

df_game_matchup_total: 320540, df_game_matchup_2023: 12183


In [80]:
# live matchup does not have the result
df_live_game_matchup = pd.read_pickle(f'collect_data/df_live_game_matchup_{date_today_str}.pkl')
df_live_odds_hits = pd.read_pickle(f"odds_data/df_odds_hits_{date_today_str}.pkl")

In [6]:
df_odds_hits_20230529 = pd.read_pickle(f"odds_data/df_odds_hits_2023-05-29.pkl")
df_odds_hits_20230530 = pd.read_pickle(f"odds_data/df_odds_hits_2023-05-30.pkl")

In [93]:
date_today = datetime(*datetime.today().timetuple()[:3])
t = datetime(*datetime(2023, 5, 29, 15, 0, 0, 0).timetuple()[:3])
t_since_20230529 = []
while t < date_today:
    t_since_20230529.append(t)
    t += timedelta(days=1)
df_odds_hits_since_20230529 = pd.concat([pd.read_pickle(f"odds_data/df_odds_hits_{t.strftime('%Y-%m-%d')}.pkl") for t in t_since_20230529])

In [7]:
# matchup decorated with previous date should have result
df_live_game_matchup_20230529 = pd.read_pickle(f'collect_data/df_live_game_matchup_2023-05-29.pkl')
df_live_game_matchup_20230530 = pd.read_pickle(f'collect_data/df_live_game_matchup_2023-05-30.pkl')

# live prediction

In [8]:
regression_model = pycaret.classification.load_model(model.common.model_file_name)

Transformation Pipeline and Model Successfully Loaded


In [75]:
live_bet_columns = model.common.features + ["batting_hit_recorded", 'prediction_label', 'prediction_score', 'theo_odds']

def get_df_prediction_odd(df_matchup, regression_model):
    df_live_prediction = pycaret.classification.predict_model(data = df_matchup, estimator = regression_model)
    df_live_prediction = pd.merge(df_live_prediction, df_player_team_positions[['player_id','player_team_name']], left_on='batting_id', right_on='player_id', how='left')
    df_live_prediction["theo_odds"] = df_live_prediction["prediction_score"].apply(model.common.odds_calculator)
    return df_live_prediction

def get_df_prediction_hits_odds(df_prediction, df_hits_odds):
    df_hits_odds = copy.copy(df_hits_odds)
    df_hits_odds["over_prob"] = df_hits_odds["over_odds"].apply(model.common.odds_to_probability)
    df_prediction_odds = df_prediction[live_bet_columns + ['game_id']].set_index(['game_id', 'batting_name']).join(\
        df_hits_odds.rename(columns={'player_name': 'batting_name'}).set_index(['game_id', 'batting_name']), lsuffix='', rsuffix='_odds').reset_index()
    df_prediction_odds = df_prediction_odds[df_prediction_odds.over_line < 1.0]
    return df_prediction_odds

def get_df_confident_prediction_odds(df_prediction_odds, score_threshold = 0.70):
    df_confident_prediction_odds = df_prediction_odds[(df_prediction_odds["prediction_score"] >= score_threshold)].sort_values(by = "prediction_score", ascending = False).drop_duplicates("batting_name")
    hits = df_confident_prediction_odds.batting_hit_recorded.sum().values[0]
    l = len(df_confident_prediction_odds)
    print(f'hit recorded ratio: {1.0 * hits / l} ({hits} out of {l})')
    
    return df_confident_prediction_odds[['game_date', 'team_away', 'team_home', 'batting_name', "batting_hit_recorded", "prediction_score", "theo_odds", 'over_prob', 'over_odds', 'over_line']]

def get_df_advantageous_prediction_odds(df_prediction_odds, prediction_diff_threshold = 0.05, score_threshold = 0.60):
    df_prediction_odds_  = copy.copy(df_prediction_odds)
    df_prediction_odds_['prediction_diff'] = df_prediction_odds_['prediction_score'] - df_prediction_odds_['over_prob']
    df_advantageous_prediction_odds = df_prediction_odds_.sort_values(by = "prediction_diff", ascending = False).drop_duplicates("batting_name")
    df_advantageous_prediction_odds = df_advantageous_prediction_odds[(df_advantageous_prediction_odds["prediction_score"] >= score_threshold)]
    df_advantageous_prediction_odds = df_advantageous_prediction_odds[(df_advantageous_prediction_odds["prediction_diff"] >= prediction_diff_threshold)]
    hits = df_advantageous_prediction_odds.batting_hit_recorded.sum().values[0]
    l = len(df_advantageous_prediction_odds)
    print(f'hit recorded ratio: {1.0 * hits / l} ({hits} out of {l})')
    
    return df_advantageous_prediction_odds[['game_date', 'team_away', 'team_home', 'batting_name', "prediction_diff", "batting_hit_recorded", "prediction_score", "theo_odds", 'over_prob', 'over_odds', 'over_line']]

In [12]:
df_live_prediction = get_df_prediction_odd(df_live_game_matchup, regression_model)
df_live_prediction_hits_odds = get_df_prediction_hits_odds(df_live_prediction, df_live_odds_hits)

In [71]:
df_live_confident_prediction_odds = get_df_confident_prediction_odds(df_live_prediction_hits_odds, score_threshold = 0.70)
df_live_confident_prediction_odds

hit recorded ratio: 0.25 (3 out of 12)


Unnamed: 0,game_date,team_away,team_home,batting_name,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
69,2023-06-01,Philadelphia Phillies,New York Mets,Edmundo Sosa,0,0,0.7873,-370,0.591837,-145,0.5
118,2023-06-01,San Diego Padres,Miami Marlins,Jonathan Davis,0,0,0.7752,-345,0.622642,-165,0.5
55,2023-06-01,Colorado Rockies,Arizona Diamondbacks,Pavin Smith,0,0,0.7733,-341,0.736842,-280,0.5
41,2023-06-01,Colorado Rockies,Arizona Diamondbacks,Josh Rojas,0,0,0.766,-327,0.69697,-230,0.5
30,2023-06-01,Colorado Rockies,Arizona Diamondbacks,Christian Walker,0,0,0.7637,-323,0.71831,-255,0.5
97,2023-06-01,Philadelphia Phillies,New York Mets,Trea Turner,1,1,0.7406,-286,0.726027,-265,0.5
39,2023-06-01,Colorado Rockies,Arizona Diamondbacks,Jake McCarthy,0,0,0.7337,-276,0.736842,-280,0.5
140,2023-06-01,Milwaukee Brewers,Toronto Blue Jays,Christian Yelich,1,1,0.727,-266,0.666667,-200,0.5
131,2023-06-01,San Diego Padres,Miami Marlins,Xander Bogaerts,1,1,0.715,-251,0.666667,-200,0.5
21,2023-06-01,Cincinnati Reds,Boston Red Sox,Rafael Devers,0,0,0.7144,-250,0.736842,-280,0.5


In [76]:
df_live_advantageous_prediction_odds = get_df_advantageous_prediction_odds(df_live_prediction_hits_odds, prediction_diff_threshold = 0.05, score_threshold = 0.60)
df_live_advantageous_prediction_odds

hit recorded ratio: 0.16666666666666666 (1 out of 6)


Unnamed: 0,game_date,team_away,team_home,batting_name,prediction_diff,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
69,2023-06-01,Philadelphia Phillies,New York Mets,Edmundo Sosa,0.195463,0,0,0.7873,-370,0.591837,-145,0.5
120,2023-06-01,San Diego Padres,Miami Marlins,Jonathan Davis,0.152558,0,0,0.7752,-345,0.622642,-165,0.5
40,2023-06-01,Colorado Rockies,Arizona Diamondbacks,Jose Herrera,0.131683,0,0,0.6969,-230,0.565217,-130,0.5
13,2023-06-01,Cincinnati Reds,Boston Red Sox,Jose Barrero,0.120844,0,0,0.6764,-209,0.555556,-125,0.5
41,2023-06-01,Colorado Rockies,Arizona Diamondbacks,Josh Rojas,0.06903,0,0,0.766,-327,0.69697,-230,0.5
140,2023-06-01,Milwaukee Brewers,Toronto Blue Jays,Christian Yelich,0.060333,1,1,0.727,-266,0.666667,-200,0.5


# past prediction

## past predictions altogether

In [94]:
df_live_prediction_since_20230529 = get_df_prediction_odd(df_game_matchup_2023[df_game_matchup_2023.game_date >= '2023-05-29'].reset_index(drop=True), regression_model)
df_live_prediction_hits_odds_since_20230529 = get_df_prediction_hits_odds(df_live_prediction_since_20230529, df_odds_hits_since_20230529)

In [101]:
get_df_confident_prediction_odds(df_live_prediction_hits_odds_since_20230529, score_threshold = 0.75)

hit recorded ratio: 0.6 (3 out of 5)


Unnamed: 0,game_date,team_away,team_home,batting_name,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
507,2023-05-30,Minnesota Twins,Houston Astros,Carlos Correa,1,1,0.7889,-374,0.672131,-205,0.5
1008,2023-05-29,Cleveland Guardians,Baltimore Orioles,Ryan McKenna,0,0,0.7817,-358,0.649123,-185,0.5
51,2023-05-31,New York Yankees,Seattle Mariners,Jarred Kelenic,0,0,0.7662,-328,0.62963,-170,0.5
135,2023-05-31,Philadelphia Phillies,New York Mets,Edmundo Sosa,1,1,0.7597,-316,0.62963,-170,0.5
894,2023-05-29,Atlanta Braves,Oakland Athletics,Austin Riley,1,1,0.7563,-310,0.714286,-250,0.5


In [105]:
get_df_advantageous_prediction_odds(df_live_prediction_hits_odds_since_20230529, prediction_diff_threshold = 0.07, score_threshold = 0.70)

hit recorded ratio: 0.5714285714285714 (4 out of 7)


Unnamed: 0,game_date,team_away,team_home,batting_name,prediction_diff,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
929,2023-05-29,Los Angeles Angels,Chicago White Sox,Mickey Moniak,0.151732,1,1,0.7262,-265,0.574468,-135,0.5
51,2023-05-31,New York Yankees,Seattle Mariners,Jarred Kelenic,0.13657,0,0,0.7662,-328,0.62963,-170,0.5
1008,2023-05-29,Cleveland Guardians,Baltimore Orioles,Ryan McKenna,0.132577,0,0,0.7817,-358,0.649123,-185,0.5
136,2023-05-31,Philadelphia Phillies,New York Mets,Edmundo Sosa,0.13007,1,1,0.7597,-316,0.62963,-170,0.5
508,2023-05-30,Minnesota Twins,Houston Astros,Carlos Correa,0.116769,1,1,0.7889,-374,0.672131,-205,0.5
906,2023-05-29,Atlanta Braves,Oakland Athletics,Ozzie Albies,0.082583,1,1,0.7436,-290,0.661017,-195,0.5
506,2023-05-30,Minnesota Twins,Houston Astros,Byron Buxton,0.082358,0,0,0.705,-239,0.622642,-165,0.5


## past predictions daily

In [60]:
#df_live_prediction_20230529 = get_df_prediction_odd(df_live_game_matchup_20230529, regression_model)
df_live_prediction_20230529 = get_df_prediction_odd(df_game_matchup_2023[df_game_matchup_2023.game_date == '2023-05-29'].reset_index(drop=True), regression_model)
df_live_prediction_hits_odds_20230529 = get_df_prediction_hits_odds(df_live_prediction_20230529, df_odds_hits_20230529)

In [72]:
df_live_confident_prediction_odds_20230529 = get_df_confident_prediction_odds(df_live_prediction_hits_odds_20230529, score_threshold = 0.70)
df_live_confident_prediction_odds_20230529

hit recorded ratio: 0.3125 (5 out of 16)


Unnamed: 0,game_date,team_away,team_home,batting_name,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
118,2023-05-29,Cleveland Guardians,Baltimore Orioles,Ryan McKenna,0,0,0.7817,-358,0.649123,-185,0.5
31,2023-05-29,Atlanta Braves,Oakland Athletics,Austin Riley,1,1,0.7563,-310,0.714286,-250,0.5
5,2023-05-29,Minnesota Twins,Houston Astros,Jose Altuve,0,0,0.7475,-296,0.722222,-260,0.5
43,2023-05-29,Atlanta Braves,Oakland Athletics,Ozzie Albies,1,1,0.7436,-290,0.661017,-195,0.5
225,2023-05-29,Texas Rangers,Detroit Tigers,Adolis Garcia,0,0,0.7387,-283,0.69697,-230,0.5
3,2023-05-29,Minnesota Twins,Houston Astros,Jose Abreu,0,0,0.737,-280,0.722222,-260,0.5
184,2023-05-29,Colorado Rockies,Arizona Diamondbacks,Kris Bryant,0,0,0.7365,-280,0.71831,-255,0.5
247,2023-05-29,Texas Rangers,Detroit Tigers,Marcus Semien,0,0,0.7352,-278,0.72973,-270,0.5
230,2023-05-29,Texas Rangers,Detroit Tigers,Corey Seager,0,0,0.7281,-268,0.722222,-260,0.5
67,2023-05-29,Los Angeles Angels,Chicago White Sox,Mickey Moniak,1,1,0.7262,-265,0.574468,-135,0.5


In [77]:
df_live_advantageous_prediction_odds_20230529 = get_df_advantageous_prediction_odds(df_live_prediction_hits_odds_20230529, prediction_diff_threshold = 0.05, score_threshold = 0.60)
df_live_advantageous_prediction_odds_20230529

hit recorded ratio: 0.5 (4 out of 8)


Unnamed: 0,game_date,team_away,team_home,batting_name,prediction_diff,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
65,2023-05-29,Los Angeles Angels,Chicago White Sox,Matt Thaiss,0.16519,1,1,0.689,-222,0.52381,-110,0.5
66,2023-05-29,Los Angeles Angels,Chicago White Sox,Mickey Moniak,0.151732,1,1,0.7262,-265,0.574468,-135,0.5
118,2023-05-29,Cleveland Guardians,Baltimore Orioles,Ryan McKenna,0.132577,0,0,0.7817,-358,0.649123,-185,0.5
25,2023-05-29,New York Yankees,Seattle Mariners,Taylor Trammell,0.127595,0,0,0.6154,-160,0.487805,105,0.5
23,2023-05-29,New York Yankees,Seattle Mariners,Jarred Kelenic,0.092215,1,1,0.7076,-242,0.615385,-160,0.5
43,2023-05-29,Atlanta Braves,Oakland Athletics,Ozzie Albies,0.082583,1,1,0.7436,-290,0.661017,-195,0.5
101,2023-05-29,Cleveland Guardians,Baltimore Orioles,Cam Gallagher,0.079245,0,0,0.6247,-166,0.545455,-120,0.5
37,2023-05-29,Atlanta Braves,Oakland Athletics,Marcell Ozuna,0.056983,0,0,0.718,-255,0.661017,-195,0.5


In [73]:
df_live_prediction_20230530 = get_df_prediction_odd(df_live_game_matchup_20230530, regression_model)
df_live_prediction_20230530 = get_df_prediction_odd(df_game_matchup_2023[df_game_matchup_2023.game_date == '2023-05-30'].reset_index(drop=True), regression_model)
df_live_prediction_hits_odds_20230530 = get_df_prediction_hits_odds(df_live_prediction_20230530, df_odds_hits_20230530)

In [74]:
df_live_confident_prediction_odds_20230530 = get_df_confident_prediction_odds(df_live_prediction_hits_odds_20230530, score_threshold = 0.70)
df_live_confident_prediction_odds_20230530

hit recorded ratio: 0.6111111111111112 (11 out of 18)


Unnamed: 0,game_date,team_away,team_home,batting_name,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
67,2023-05-30,Minnesota Twins,Houston Astros,Carlos Correa,1,1,0.7889,-374,0.672131,-205,0.5
205,2023-05-30,Kansas City Royals,St. Louis Cardinals,Paul Goldschmidt,1,1,0.7387,-283,0.733333,-275,0.5
380,2023-05-30,Cincinnati Reds,Boston Red Sox,Jonathan India,1,1,0.7326,-274,0.705882,-240,0.5
330,2023-05-30,Texas Rangers,Detroit Tigers,Miguel Cabrera,1,1,0.7258,-265,0.666667,-200,0.5
375,2023-05-30,Milwaukee Brewers,Toronto Blue Jays,Whit Merrifield,1,1,0.7246,-263,0.722222,-260,0.5
164,2023-05-30,Los Angeles Angels,Chicago White Sox,Eloy Jimenez,1,1,0.7241,-262,0.74026,-285,0.5
308,2023-05-30,Texas Rangers,Detroit Tigers,Adolis Garcia,1,1,0.7237,-262,0.733333,-275,0.5
116,2023-05-30,Atlanta Braves,Oakland Athletics,Austin Riley,0,0,0.7211,-259,0.672131,-205,0.5
399,2023-05-30,Cleveland Guardians,Baltimore Orioles,Andres Gimenez,1,1,0.7209,-258,0.701493,-235,0.5
85,2023-05-30,Minnesota Twins,Houston Astros,Jose Altuve,0,0,0.7128,-248,0.677419,-210,0.5


In [78]:
df_live_advantageous_prediction_odds_20230530 = get_df_advantageous_prediction_odds(df_live_prediction_hits_odds_20230530, prediction_diff_threshold = 0.05, score_threshold = 0.60)
df_live_advantageous_prediction_odds_20230530

hit recorded ratio: 0.5 (2 out of 4)


Unnamed: 0,game_date,team_away,team_home,batting_name,prediction_diff,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
67,2023-05-30,Minnesota Twins,Houston Astros,Carlos Correa,0.116769,1,1,0.7889,-374,0.672131,-205,0.5
66,2023-05-30,Minnesota Twins,Houston Astros,Byron Buxton,0.082358,0,0,0.705,-239,0.622642,-165,0.5
178,2023-05-30,Los Angeles Angels,Chicago White Sox,Matt Thaiss,0.062744,0,0,0.6183,-162,0.555556,-125,0.5
330,2023-05-30,Texas Rangers,Detroit Tigers,Miguel Cabrera,0.059133,1,1,0.7258,-265,0.666667,-200,0.5
