In [99]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from scipy.stats import poisson
import datetime as dt
from unidecode import unidecode
from game_log import load_current_line
plt.rcParams['figure.figsize'] = [13, 7]
pd.set_option("display.precision", 3)
pd.set_option('display.max_rows', 5)
pd.set_option('display.min_rows', 5)

In [100]:
today = dt.datetime.today() 
year = today.year
month = today.month
day = today.day
today_str = f'{year}_{month}_{day}'

pp_path = f'Lines/pp/pp_{today_str}.csv'
unabated_path = f'Lines/unabated/unabated_raw_{today_str}.csv'

pp_lines = load_current_line(pp_path,'prop_id','time')
unabated_raw = load_current_line(unabated_path,'prop_id','time')
data = pd.read_csv('data_2024.csv')
data['date'] = pd.to_datetime(data['date'])

In [101]:
for i in range(1,10):
    try:
        current = dt.datetime.today()- dt.timedelta(days=i)
        year = current.year
        month = current.month
        day = current.day 
        current_str = f'{year}_{month}_{day}'
        unabated_path = f'Lines/unabated/unabated_raw_{current_str}.csv'
        new = load_current_line(unabated_path,'prop_id','time')
        unabated_raw = pd.concat((unabated_raw,new))
    except: print(current_str)

2023_10_29
2023_10_28
2023_10_25


In [102]:
from datetime import datetime
from pytz import timezone

# Function to format the date
def format_date(date_str):
    est = timezone('US/Eastern')
    date_obj = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S%z').astimezone(est)
    return date_obj.strftime('%Y-%m-%d')

# Apply the function to the entire column
unabated_raw['date'] = unabated_raw['event_time'].apply(format_date)
unabated_raw

Unnamed: 0,player,player_id,points,price,side,stat,book,league_id,event_time,prob,opp,Team,time,prop_id,date
0,Tua Tagovailoa,35383,2.5,-175.0,under,Rushing Attempts,BetMGM,1,2023-11-05 08:30:00-06:00,0.636,KC,MIA,2023-11-02 13:49:00,4535314d-1,2023-11-05
1,Isaiah Stewart,242076,13.5,-135.0,under,PTS+AST,PrizePicks,3,2023-11-02 19:00:00-05:00,0.574,NOP,DET,2023-11-02 13:49:00,8d6d7eb2-d,2023-11-02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22655,Max Strus,42894,9.5,-109.0,over,PTS,Parx,3,2023-10-25 18:30:00-05:00,0.522,BKN,CLE,2023-10-24 17:28:00,43018467-0,2023-10-25
22656,Emmanuel Rivera,39278,0.5,-135.0,under,Total Bases,PrizePicks,5,2023-10-24 19:07:00-05:00,0.574,PHI,ARI,2023-10-24 17:28:00,5cd3987a-0,2023-10-24


In [103]:
keepers = ['player', 'G', 'date', 'age', 'team', 'H/A', 'Opp', 'W/L', 'GS', 'MP','pos', 'season', 'KM']
melted = data.melt(id_vars=keepers, var_name='stat', value_name='stat_value')

In [104]:
unabated_raw['date'] = pd.to_datetime(melted['date'])

In [105]:
check = melted[['player','date','stat_value','stat']].merge(unabated_raw,on=['player','date','stat'])
check = check.loc[check.points!=0.0]
check = check[check.duplicated(subset=['player', 'stat','book','time'], keep=False) & (check['side'] != check['side'].shift())]

In [106]:
mask_over = check.points < check.stat_value
mask_under = check.points > check.stat_value
mask_push = check.points == check.stat_value

In [113]:

result_over = check[mask_over].groupby(['book','stat']).size().reset_index(name='overs')
result_under = check[mask_under].groupby(['book','stat']).size().reset_index(name='unders')
result_push = check[mask_push].groupby(['book','stat']).size().reset_index(name='push')
# Merge the two results based on the 'book' column
result = result_over.merge(result_under, on=['book','stat'], how='outer')
result = result.merge(result_push, on=['book','stat'], how='outer')
# Fill missing values with 0
result.fillna(0, inplace=True)
pd.set_option('display.max_rows', 20)
pd.set_option('display.min_rows', 20)
result['over_rate'] = (result.overs)/(result.overs+result.unders)
result.sort_values(by='overs')

Unnamed: 0,book,stat,overs,unders,push,over_rate
38,FanDuel,BLK,26,18,0.0,0.591
64,PrizePicks,3P,29,46,0.0,0.387
32,DraftKings,BLK,41,58,0.0,0.414
60,PointsBet,BLK,44,88,0.0,0.333
81,Unibet,BLK,45,65,0.0,0.409
40,FanDuel,STL,49,51,0.0,0.490
50,Parx,BLK,49,55,0.0,0.471
20,BetRivers,BLK,51,51,0.0,0.500
71,Sugarhouse,BLK,52,68,0.0,0.433
44,Four Winds,BLK,53,48,0.0,0.525


In [122]:
import numpy as np

# Assuming you have a DataFrame named 'check' with columns 'book', 'stat', 'points', and 'stat_value'
mse_by_book = check.groupby(['book', 'stat']).apply(lambda group: mean_squared_error(group['points'], group['stat_value'])).reset_index()
mse_by_book.columns = ['book', 'stat', 'error']

# Calculate the variance of the 'points' column
variance_points = check['points'].var()

# Scale the MSE by the variance

average_error_by_stat = mse_by_book.groupby('stat')['error'].mean().reset_index()
average_error_by_stat.columns = ['stat', 'average_error']

# Merge the average errors back into the original DataFrame
mse_by_book = mse_by_book.merge(average_error_by_stat, on='stat', how='left')

# Scale the 'error' column by the 'average_error'
mse_by_book['scaled_error'] = mse_by_book['error'] / mse_by_book['average_error']

# Display the resulting DataFrame with scaled errors
mse_by_book

In [130]:
mse_by_book.groupby('book')['scaled_error'].mean().sort_values()

book
BetMGM              0.836
Bet365              0.861
Caesars             0.887
PointsBet           0.924
888sports           0.945
Underdog Fantasy    0.973
DraftKings          0.974
Sugarhouse          1.017
Unibet              1.035
PrizePicks          1.036
Pinnacle            1.042
BetRivers           1.067
Parx                1.081
Four Winds          1.081
FanDuel             1.252
Name: scaled_error, dtype: float64