use this notebook to see today's bet at the beginning of a day

In [103]:
import pandas as pd, numpy as np
import statsapi
from statsapi import player_stat_data
import requests
from datetime import datetime, timedelta
import numpy as np
import math
import meteostat
import pprint
import pickle
import copy

import pycaret
from pycaret import classification
import model.common
import importlib

In [19]:
from static_data.load_static_data import *

In [154]:
date_today = datetime.today().strftime("%Y-%m-%d")
date_yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
date_today, date_yesterday

('2023-05-30', '2023-05-29')

In [58]:
importlib.reload(model.common)

<module 'model.common' from '/home/junlim/projects/mlb-props/model/common.py'>

In [None]:
df_live_game_matchup = pd.read_pickle(f'collect_data/df_live_game_matchup_{date_today}.pkl')

In [155]:
# live matchup does not have the result
df_live_game_matchup = pd.read_pickle(f'collect_data/df_live_game_matchup_{date_today}.pkl')
df_odds_hits = pd.read_pickle(f"odds_data/df_odds_hits_{date_today}.pkl")

In [156]:
df_odds_hits_20230529 = pd.read_pickle(f"odds_data/df_odds_hits_2023-05-29.pkl")
df_odds_hits_20230530 = pd.read_pickle(f"odds_data/df_odds_hits_2023-05-30.pkl")

In [157]:
# matchup decorated with previous date should have result
df_live_game_matchup_20230529 = pd.read_pickle(f'collect_data/df_live_game_matchup_2023-05-29.pkl')
df_live_game_matchup_20230530 = pd.read_pickle(f'collect_data/df_live_game_matchup_2023-05-30.pkl')

# live prediction

In [27]:
regression_model = pycaret.classification.load_model(model.common.model_file_name)

Transformation Pipeline and Model Successfully Loaded


In [151]:
live_bet_columns = model.common.features + ["batting_hit_recorded", 'prediction_label', 'prediction_score', 'theo_odds']

def get_df_prediction_odd(df_matchup, regression_model):
    df_live_prediction = pycaret.classification.predict_model(data = df_matchup, estimator = regression_model)
    df_live_prediction = pd.merge(df_live_prediction, df_player_team_positions[['player_id','player_team_name']], left_on='batting_id', right_on='player_id', how='left')
    df_live_prediction["theo_odds"] = df_live_prediction["prediction_score"].apply(model.common.odds_calculator)
    return df_live_prediction

def get_df_prediction_hits_odds(df_prediction, df_hits_odds):
    df_odds_hits_ = copy.copy(df_odds_hits)
    df_odds_hits_["over_prob"] = df_odds_hits_["over_odds"].apply(model.common.odds_to_probability)
    df_prediction_odds = df_prediction[live_bet_columns + ['game_id']].set_index(['game_id', 'batting_name']).join(df_odds_hits_.rename(columns={'player_name': 'batting_name'}).set_index(['game_id', 'batting_name']), lsuffix='', rsuffix='_odds').reset_index()
    df_prediction_odds = df_prediction_odds[df_prediction_odds.over_line < 1.0]
    return df_prediction_odds

def get_df_confident_prediction_odds(df_prediction_odds, score_threshold = 0.70):
    df_confident_prediction_odds = df_prediction_odds[(df_prediction_odds["prediction_score"] >= score_threshold)].sort_values(by = "prediction_score", ascending = False).drop_duplicates("batting_name")
    return df_confident_prediction_odds[['game_date', 'team_away', 'team_home', 'batting_name', "batting_hit_recorded", "prediction_score", "theo_odds", 'over_prob', 'over_odds', 'over_line']]

def get_df_advantageous_prediction_odds(df_prediction_odds, prediction_diff_threshold = 0.05, score_threshold = 0.60):
    df_prediction_odds_  = copy.copy(df_prediction_odds)
    df_prediction_odds_['prediction_diff'] = df_prediction_odds_['prediction_score'] - df_prediction_odds_['over_prob']
    df_advantageous_prediction_odds = df_prediction_odds_.sort_values(by = "prediction_diff", ascending = False).drop_duplicates("batting_name")
    df_advantageous_prediction_odds = df_advantageous_prediction_odds[(df_advantageous_prediction_odds["prediction_score"] >= score_threshold)]
    df_advantageous_prediction_odds = df_advantageous_prediction_odds[(df_advantageous_prediction_odds["prediction_diff"] >= prediction_diff_threshold)]
    return df_advantageous_prediction_odds[['game_date', 'team_away', 'team_home', 'batting_name', "prediction_diff", "batting_hit_recorded", "prediction_score", "theo_odds", 'over_prob', 'over_odds', 'over_line']]

In [158]:
df_live_prediction = get_df_prediction_odd(df_live_game_matchup, regression_model)
df_live_prediction_hits_odds = get_df_prediction_hits_odds(df_live_prediction, df_odds_hits)

In [159]:
df_live_confident_prediction_odds = get_df_confident_prediction_odds(df_live_prediction_hits_odds, score_threshold = 0.70)
df_live_confident_prediction_odds

Unnamed: 0,game_date,team_away,team_home,batting_name,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
55,2023-05-30,Minnesota Twins,Houston Astros,Carlos Correa,0,0,0.7889,-374,0.672131,-205,0.5
187,2023-05-30,Kansas City Royals,St. Louis Cardinals,Paul Goldschmidt,0,0,0.7387,-283,0.733333,-275,0.5
363,2023-05-30,Cincinnati Reds,Boston Red Sox,Jonathan India,0,0,0.7326,-274,0.705882,-240,0.5
312,2023-05-30,Texas Rangers,Detroit Tigers,Miguel Cabrera,0,0,0.7258,-265,0.666667,-200,0.5
358,2023-05-30,Milwaukee Brewers,Toronto Blue Jays,Whit Merrifield,0,0,0.7246,-263,0.722222,-260,0.5
151,2023-05-30,Los Angeles Angels,Chicago White Sox,Eloy Jimenez,0,0,0.7241,-262,0.74026,-285,0.5
292,2023-05-30,Texas Rangers,Detroit Tigers,Adolis Garcia,0,0,0.7237,-262,0.733333,-275,0.5
106,2023-05-30,Atlanta Braves,Oakland Athletics,Austin Riley,0,0,0.7211,-259,0.672131,-205,0.5
383,2023-05-30,Cleveland Guardians,Baltimore Orioles,Andres Gimenez,0,0,0.7209,-258,0.701493,-235,0.5
68,2023-05-30,Minnesota Twins,Houston Astros,Jose Altuve,0,0,0.7128,-248,0.677419,-210,0.5


In [160]:
df_live_advantageous_prediction_odds = get_df_advantageous_prediction_odds(df_live_prediction_hits_odds, prediction_diff_threshold = 0.05, score_threshold = 0.60)
df_live_advantageous_prediction_odds

Unnamed: 0,game_date,team_away,team_home,batting_name,prediction_diff,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
56,2023-05-30,Minnesota Twins,Houston Astros,Carlos Correa,0.116769,0,0,0.7889,-374,0.672131,-205,0.5
54,2023-05-30,Minnesota Twins,Houston Astros,Byron Buxton,0.082358,0,0,0.705,-239,0.622642,-165,0.5
73,2023-05-30,Minnesota Twins,Houston Astros,Michael A. Taylor,0.079045,0,0,0.6245,-166,0.545455,-120,0.5
313,2023-05-30,Texas Rangers,Detroit Tigers,Miguel Cabrera,0.059133,0,0,0.7258,-265,0.666667,-200,0.5


In [143]:
df_live_prediction_20230529 = get_df_prediction_odd(df_live_game_matchup_20230529, regression_model)
df_live_prediction_hits_odds_20230529 = get_df_prediction_hits_odds(df_live_prediction_20230529, df_odds_hits_20230529)

In [144]:
df_live_confident_prediction_odds_20230529 = get_df_confident_prediction_odds(df_live_prediction_hits_odds_20230529, score_threshold = 0.70)
df_live_confident_prediction_odds_20230529

Unnamed: 0,game_date,team_away,team_home,batting_name,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
61,2023-05-29,Cleveland Guardians,Baltimore Orioles,Ryan McKenna,0,0,0.7817,-358,0.649123,-185,0.5
5,2023-05-29,Minnesota Twins,Houston Astros,Jose Altuve,0,0,0.7475,-296,0.722222,-260,0.5
157,2023-05-29,Texas Rangers,Detroit Tigers,Adolis Garcia,0,0,0.7387,-283,0.69697,-230,0.5
3,2023-05-29,Minnesota Twins,Houston Astros,Jose Abreu,0,0,0.737,-280,0.722222,-260,0.5
113,2023-05-29,Colorado Rockies,Arizona Diamondbacks,Kris Bryant,0,0,0.7365,-280,0.71831,-255,0.5
176,2023-05-29,Texas Rangers,Detroit Tigers,Marcus Semien,1,1,0.7352,-278,0.72973,-270,0.5
161,2023-05-29,Texas Rangers,Detroit Tigers,Corey Seager,1,1,0.7281,-268,0.722222,-260,0.5
36,2023-05-29,Los Angeles Angels,Chicago White Sox,Mickey Moniak,0,0,0.7262,-265,0.574468,-135,0.5
165,2023-05-29,Texas Rangers,Detroit Tigers,Javier Baez,1,1,0.7135,-249,0.733333,-275,0.5
82,2023-05-29,Kansas City Royals,St. Louis Cardinals,Paul Goldschmidt,0,0,0.7073,-242,0.710145,-245,0.5


In [161]:
df_live_advantageous_prediction_odds_20230529 = get_df_advantageous_prediction_odds(df_live_prediction_hits_odds_20230529, prediction_diff_threshold = 0.05, score_threshold = 0.60)
df_live_advantageous_prediction_odds_20230529

Unnamed: 0,game_date,team_away,team_home,batting_name,prediction_diff,batting_hit_recorded,batting_hit_recorded.1,prediction_score,theo_odds,over_prob,over_odds,over_line
34,2023-05-29,Los Angeles Angels,Chicago White Sox,Matt Thaiss,0.16519,0,0,0.689,-222,0.52381,-110,0.5
35,2023-05-29,Los Angeles Angels,Chicago White Sox,Mickey Moniak,0.151732,0,0,0.7262,-265,0.574468,-135,0.5
61,2023-05-29,Cleveland Guardians,Baltimore Orioles,Ryan McKenna,0.132577,0,0,0.7817,-358,0.649123,-185,0.5
49,2023-05-29,Cleveland Guardians,Baltimore Orioles,Cam Gallagher,0.079245,1,1,0.6247,-166,0.545455,-120,0.5
