In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from scipy.stats import poisson
import datetime as dt
from unidecode import unidecode
#from game_log import load_current_line
from glob import glob
plt.rcParams['figure.figsize'] = [13, 7]
pd.set_option("display.precision", 3)
pd.set_option('display.max_rows', 10)
pd.set_option('display.min_rows', 10)

In [2]:
def load_current_line(path,key,time,date_col) -> pd.DataFrame:
    df = pd.read_csv(path,index_col=0)

    if key in df.columns and time in df.columns:
        df = df.sort_values(by=time,ascending=False)
        return df.loc[df[time]== df[time].max()].reset_index(drop=True)
    else:
        return df

In [3]:
pp_files = glob(f'./Lines//pp/*')
pp_lines = pd.DataFrame()
for file in pp_files:
    new = load_current_line(file,'prop_id','time','time')
    if pp_lines.empty:
        pp_lines = new
    else:
        pp_lines = pd.concat((pp_lines,new))

unabated_files = glob(f'./Lines//unabated/unabated_raw*')
unabated_raw = pd.DataFrame()
for file in unabated_files:
        new = load_current_line(file,'prop_id','time',['event_time'])
        if unabated_raw.empty:
            unabated_raw = new
        else:
            unabated_raw = pd.concat((unabated_raw,new))


In [4]:
today = dt.datetime.today() 
year = today.year
month = today.month
day = today.day
today_str = f'{year}_{month}_{day}'


pdata = pd.read_csv('game_logs/pdata.csv')
pdata['season'] = pdata['date'].apply(lambda x: x.split('-')[0])
data = pd.read_csv('game_logs/data_2024.csv')
data23 = pd.read_csv('game_logs/data_2023.csv')
data = pd.concat((pdata,data,data23))
data= data.sort_values(by=['player','date']).reset_index(drop=True)
data.sample(3)

Unnamed: 0,player,G,date,series,team,H/A,opp,G#,W/L,GS,...,TOV,PF,PTS,GmSc,+/-,pos,KM,season,age,Opp
704,Al Horford,56,2023-03-17,,BOS,0,POR,,14,1,...,0,3,12,16.7,5.0,C,15,2023,36-287,
34598,Trey Murphy III,21,2022-12-04,,NOP,1,DEN,,15,1,...,0,4,12,11.7,-4.0,SF,15,2023,22-169,
23195,Luguentz Dort,22,2022-11-30,,OKC,1,SAS,,8,1,...,2,2,23,17.4,11.0,SF,15,2023,23-225,


In [5]:
pd.to_datetime("2023-10-29 12:00:00-05:00")

Timestamp('2023-10-29 12:00:00-0500', tz='UTC-05:00')

In [13]:
unabated_raw['event_time'] = pd.to_datetime(unabated_raw['event_time'],format="mixed")

In [15]:
from datetime import datetime
from pytz import timezone
#unabated_raw['event_time'] = pd.to_datetime(unabated_raw['event_time'],errors='coerce')
# Function to format the date
def format_date(date_obj):
    est = timezone('US/Eastern')
    return date_obj.strftime('%Y-%m-%d')

# Apply the function to the entire column
unabated_raw['date'] = unabated_raw['event_time'].apply(format_date)
unabated_raw

Unnamed: 0,player,player_id,points,price,side,stat,book,league_id,event_time,prob,opp,Team,time,prop_id,Prob,Opp,date
3,Dallas Goedert,35761,41.5,-135.0,over,Receiving Yards,PrizePicks,1,2023-10-15 20:25:00,0.574,NYJ,PHI,,,,,2023-10-15
7,Dallas Goedert,35761,41.5,-135.0,under,Receiving Yards,PrizePicks,1,2023-10-15 20:25:00,0.574,NYJ,PHI,,,,,2023-10-15
3,DeVonta Smith,35599,63.5,-135.0,over,Receiving Yards,PrizePicks,1,2023-10-15 20:25:00,0.574,NYJ,PHI,,,,,2023-10-15
7,DeVonta Smith,35599,63.5,-135.0,under,Receiving Yards,PrizePicks,1,2023-10-15 20:25:00,0.574,NYJ,PHI,,,,,2023-10-15
7,Zach Wilson,34241,189.5,-135.0,over,Passing Yards,Underdog Fantasy,1,2023-10-15 20:25:00,0.574,NYJ,PHI,,,,,2023-10-15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23117,Joel Embiid,43292,11.5,-125.0,over,TRB,PointsBet,3,2023-11-06 18:00:00-06:00,0.556,PHI,WAS,2023-11-06 11:16:00,54b6981d-0,,,2023-11-06
23118,Joel Embiid,43292,40.5,-135.0,under,PTS+TRB,Underdog Fantasy,3,2023-11-06 18:00:00-06:00,0.574,PHI,WAS,2023-11-06 11:16:00,b29332af-b,,,2023-11-06
23119,Joel Embiid,43292,16.5,-135.0,under,TRB+AST,Underdog Fantasy,3,2023-11-06 18:00:00-06:00,0.574,PHI,WAS,2023-11-06 11:16:00,f345d24b-5,,,2023-11-06
23120,Joel Embiid,43292,5.5,102.0,over,AST,FanDuel,3,2023-11-06 18:00:00-06:00,0.495,PHI,WAS,2023-11-06 11:16:00,c23cdfc9-1,,,2023-11-06


In [16]:
keepers = ['player', 'G', 'date', 'age', 'team', 'H/A', 'Opp', 'W/L', 'GS', 'MP','pos', 'season', 'KM']
melted = data.melt(id_vars=keepers, var_name='stat', value_name='stat_value')

In [17]:
unabated_raw['date'] = pd.to_datetime(melted['date'])

In [18]:
melted['date'] = pd.to_datetime(melted['date'])

In [19]:
check = melted[['player','date','stat_value','stat']].merge(unabated_raw,on=['player','date','stat'])
check = check.loc[check.points!=0.0]
check = check[check.duplicated(subset=['player', 'stat','book','time'], keep=False) & (check['side'] != check['side'].shift())]

In [20]:
mask_over = check.points < check.stat_value
mask_under = check.points > check.stat_value
mask_push = check.points == check.stat_value

In [21]:

result_over = check[mask_over].groupby(['book']).size().reset_index(name='overs')
result_under = check[mask_under].groupby(['book']).size().reset_index(name='unders')
result_push = check[mask_push].groupby(['book']).size().reset_index(name='push')
# Merge the two results based on the 'book' column
result = result_over.merge(result_under, on=['book'], how='outer')
result = result.merge(result_push, on=['book'], how='outer')
# Fill missing values with 0
result.fillna(0, inplace=True)
pd.set_option('display.max_rows', 20)
pd.set_option('display.min_rows', 20)
result['over_rate'] = (result.overs)/(result.overs+result.unders)
result.sort_values(by='overs')

Unnamed: 0,book,overs,unders,push,over_rate
9,Pinnacle,136,144,0.0,0.486
11,PrizePicks,156,159,10.0,0.495
14,Unibet,192,194,0.0,0.497
10,PointsBet,207,252,0.0,0.451
12,Sugarhouse,212,199,0.0,0.516
8,Parx,215,211,0.0,0.505
7,Four Winds,220,188,0.0,0.539
3,BetRivers,221,212,0.0,0.51
0,888sports,226,254,0.0,0.471
13,Underdog Fantasy,261,253,18.0,0.508


In [None]:
import numpy as np

# Assuming you have a DataFrame named 'check' with columns 'book', 'stat', 'points', and 'stat_value'
mse_by_book = check.groupby(['book', 'stat']).apply(lambda group: mean_squared_error(group['points'], group['stat_value'])).reset_index()
mse_by_book.columns = ['book', 'stat', 'error']

# Calculate the variance of the 'points' column
variance_points = check['points'].var()

# Scale the MSE by the variance

average_error_by_stat = mse_by_book.groupby('stat')['error'].mean().reset_index()
average_error_by_stat.columns = ['stat', 'average_error']

# Merge the average errors back into the original DataFrame
mse_by_book = mse_by_book.merge(average_error_by_stat, on='stat', how='left')

# Scale the 'error' column by the 'average_error'
mse_by_book['scaled_error'] = mse_by_book['error'] / mse_by_book['average_error']

# Display the resulting DataFrame with scaled errors
mse_by_book

In [None]:
mse_by_book.groupby('book')['scaled_error'].mean().sort_values()

book
BetMGM              0.836
Bet365              0.861
Caesars             0.887
PointsBet           0.924
888sports           0.945
Underdog Fantasy    0.973
DraftKings          0.974
Sugarhouse          1.017
Unibet              1.035
PrizePicks          1.036
Pinnacle            1.042
BetRivers           1.067
Parx                1.081
Four Winds          1.081
FanDuel             1.252
Name: scaled_error, dtype: float64

In [47]:
pp_nba = pp_lines.loc[(pp_lines.league==7) | (pp_lines.league_id==7)]

In [53]:
melted['date'] = melted['date'].astype(str)

In [57]:
melted.sample(2)

Unnamed: 0,player,G,date,age,team,H/A,Opp,W/L,GS,MP,pos,season,KM,stat,stat_value
512232,Wendell Moore Jr.,15,2022-12-21,21-094,MIN,1,,-5,0,4.7,SG,2023,15,DRB,1
462146,Lamar Stevens,16,2022-12-14,25-158,CLE,0,,15,1,38.417,SF,2023,15,ORB,0


In [58]:
pp_nba.sample(2)

Unnamed: 0,player,team,line,stat,league,date,time,event_time,prop_id,opp,league_id,league_name,pp_player_id
2127,LeBron James,LAL,17.0,FGA,,2023-11-06,2023-11-06 11:16:00,2023-11-06 18:40:00-06:00,d6aab2d3-4,MIA,7.0,NBA,58309.0
2874,Cade Cunningham + LaMelo Ball,CHA/DET,14.0,Assists (Combo),7.0,2023-10-27,2023-10-27 16:15:00,2023-10-27 18:00:00-05:00,9d9f4086-f,,,,


In [61]:
check = melted[['player','date','stat_value','stat']].merge(pp_nba,on=['player','date','stat'])
check.head(3)

Unnamed: 0,player,date,stat_value,stat,team,line,league,time,event_time,prop_id,opp,league_id,league_name,pp_player_id
0,Aaron Gordon,2023-10-27,11,FGA,MEM,10.0,7.0,2023-10-27 16:15:00,2023-10-27 18:10:00-05:00,4b2464f6-4,,,,
1,Alperen Sengun,2023-10-27,18,FGA,SAS,11.5,7.0,2023-10-27 16:15:00,2023-10-27 19:10:00-05:00,0d32e27f-4,,,,
2,Andrew Wiggins,2023-10-24,12,FGA,PHX,14.5,7.0,2023-10-24 17:29:00,2023-10-24 21:10:00-05:00,b75e74ab-2,,,,


In [65]:
print(len(check[check.stat_value > check.line]),'overs')
print(len(check[check.stat_value == check.line]),'push')
print(len(check[check.stat_value < check.line]),'unders')

1173 overs
69 push
1217 unders
