In [1]:
import torch
import seaborn as sea
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from game_log import load_current_line, GameLog
plt.rcParams['figure.figsize'] = [15, 8]
pd.set_option("display.precision", 3)

In [2]:
pd.set_option('display.max_rows', 10)
pd.set_option('display.min_rows', 10)
torch.set_printoptions(sci_mode=False)

In [13]:
opp_stats = pd.read_csv('reference_data/opponent_stats.csv',index_col=0)
schedule = pd.read_csv('reference_data/schedule.csv',index_col=0)
KM_vals = pd.read_csv('reference_data/KM_vals.csv',index_col=0)

today = dt.datetime.today()
year = today.year
month = today.month
day = today.day
today_str = f'{year}_{month}_{day}'


pp_path = f'Lines/pp/pp_{today_str}.csv'
unabated_path = f'Lines/unabated/unabated_{today_str}.csv'

pp_lines = load_current_line(pp_path,'prop_id','time')
unabated = load_current_line(unabated_path,'prop_id','time')

In [5]:
pdata = pd.read_csv('game_logs/pdata.csv')
pdata['season'] = pdata['date'].apply(lambda x: x.split('-')[0])
data = pd.read_csv('game_logs/data_2024.csv')
data23 = pd.read_csv('game_logs/data_2023.csv')
data = pd.concat((pdata,data,data23))
data= data.sort_values(by=['player','date']).reset_index(drop=True)
data.sample(3)

Unnamed: 0,player,G,date,series,team,H/A,opp,G#,W/L,GS,...,TOV,PF,PTS,GmSc,+/-,pos,KM,season,age,Opp
13601,Jaden McDaniels,35,2023-01-02,,MIN,1,DEN,,13,1,...,2,4,21,20.1,13.0,SF,15,2023,22-095,
27644,Orlando Robinson,4,2022-12-17,,MIA,0,SAS,,10,0,...,0,0,2,2.7,0.0,C,15,2023,22-160,
30271,Rudy Gay,42,2023-02-10,,UTA,0,TOR,,6,0,...,2,0,12,8.7,2.0,PF,15,2023,36-177,


In [6]:
gl = GameLog(data)

In [7]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def pt_corr(player_a_name, player_b_name, team,stat1='PTS',stat2='PTS'):
    team_games = data.loc[data.team==team]
    # extract two columns of interest (i.e. points scored by the two players)
    player_a_dates = set(team_games[team_games['player'] == player_a_name]['date'])
    player_b_dates = set(team_games[team_games['player'] == player_b_name]['date'])
    common_dates = player_a_dates.intersection(player_b_dates)
    stats = team_games[team_games['date'].isin(common_dates)]
    
    player_a_points = stats[stats['player'] == player_a_name][stat1].reset_index(drop=True)
    player_b_points = stats[stats['player'] == player_b_name][stat2].reset_index(drop=True)
    sample = len(player_a_points)
    # calculate the correlation coefficient between the two columns
    corr_coeff = player_a_points.corr(player_b_points)
    # return the correlation coefficient
    return corr_coeff,sample
pt_corr('Stephen Curry','Klay Thompson','GSW')

(0.03776803308315927, 190)

In [8]:
team_list = data.team.unique()

In [9]:
score_avg = data.groupby('player')['PTS'].mean().reset_index().sort_values(by='PTS',ascending=False)
top_scorers = score_avg.player.unique()[0:125]

In [38]:
unabated.columns

Index(['prop_id', 'player', 'player_id', 'line', 'stat', 'league_id',
       'event_time', 'opp', 'Team', 'count', 'over_prob', 'under_prob', 'pred',
       'time'],
      dtype='object')

In [41]:
nba = unabated.loc[unabated.league_id==3]
eligible_players =nba.player.unique()

In [11]:
team2players = {}
for team in team_list:
    team_games = data.loc[data.team==team]
    players = team_games.loc[team_games.GS==1].player.unique()
    team2players[team]=players

In [59]:
stats = ['PTS','AST','TRB']
def get_correlations(team_list,player_list,stat1_list,stat2_list):
    correlate = []
    for team in team_list:
        for p in team2players[team]:
            if p in player_list:
                for p2 in team2players[team]:
                    if (p != p2) and (p2 in player_list):
                        for stat1 in stat1_list:
                            for stat2 in stat2_list:
                                corr,sample = pt_corr(p,p2,team,stat1,stat2)
                                correlate.append([p,stat1,p2,stat2,corr,team,sample])  
                                corr,sample = pt_corr(p2,p,team,stat2,stat1)
                                correlate.append([p,stat1,p2,stat2,corr,team,sample])
    all_corr = pd.DataFrame(correlate,columns=['player_1','stat_1','player_2','stat_2','correlation','team','sample']).sort_values(by='correlation').dropna()
    all_corr = all_corr.loc[all_corr['sample']>10]
    return all_corr
all_corr = get_correlations(team_list,eligible_players,stats,stats)

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)


In [81]:
nba = pp_lines.loc[pp_lines.league_id==7]
nba_small = nba[['player','stat','line']]
nba_small.head(2)

Unnamed: 0,player,stat,line
1,Michael Porter Jr.,3PA,5.5
22,Nikola Jokic,AST,9.0


In [82]:
temp = all_corr.merge(nba_small,left_on=['player_1','stat_1'],right_on=['player','stat']).drop(['player','stat'],axis=1)
decisions = temp.merge(nba_small,left_on=['player_2','stat_2'],right_on=['player','stat'],suffixes=['_p1','_p2']).drop(['player','stat'],axis=1)
decisions

Unnamed: 0,player_1,stat_1,player_2,stat_2,correlation,team,sample,line_p1,line_p2
0,Mikal Bridges,PTS,Ben Simmons,PTS,-0.569,BKN,13,21.5,7.5
1,Spencer Dinwiddie,AST,Ben Simmons,PTS,-0.603,BKN,15,5.5,7.5
2,Mikal Bridges,TRB,Ben Simmons,PTS,-0.496,BKN,13,5.0,7.5
3,Spencer Dinwiddie,PTS,Ben Simmons,PTS,0.199,BKN,15,15.5,7.5
4,Cam Thomas,PTS,Ben Simmons,PTS,-0.139,BKN,31,23.5,7.5
...,...,...,...,...,...,...,...,...,...
1955,Kyle Kuzma,PTS,Daniel Gafford,TRB,-0.068,WAS,64,21.5,6.0
1956,Deni Avdija,PTS,Daniel Gafford,TRB,0.102,WAS,75,10.5,6.0
1957,Deni Avdija,PTS,Daniel Gafford,TRB,0.102,WAS,75,10.5,6.0
1958,Deni Avdija,TRB,Daniel Gafford,TRB,0.042,WAS,75,6.0,6.0


In [92]:
unabated_small = unabated[['player', 'line', 'stat','over_prob', 'pred']]

In [110]:
temp = decisions.merge(unabated_small,left_on=['player_1','stat_1','line_p1'],right_on=['player','stat','line']).drop(['player','stat','line'],axis=1)
final = temp.merge(unabated_small,left_on=['player_2','stat_2','line_p2'],right_on=['player','stat','line'],suffixes=['_p1','_p2']).drop(['player','stat','line'],axis=1)
final

Unnamed: 0,player_1,stat_1,player_2,stat_2,correlation,team,sample,line_p1,line_p2,over_prob_p1,pred_p1,over_prob_p2,pred_p2
0,Mikal Bridges,PTS,Ben Simmons,PTS,-0.569,BKN,13,21.5,7.5,0.497,21.628,0.514,7.764
1,Spencer Dinwiddie,AST,Ben Simmons,PTS,-0.603,BKN,15,5.5,7.5,0.494,5.633,0.514,7.764
2,Mikal Bridges,TRB,Ben Simmons,PTS,-0.496,BKN,13,5.0,7.5,0.500,4.931,0.514,7.764
3,Spencer Dinwiddie,PTS,Ben Simmons,PTS,0.199,BKN,15,15.5,7.5,0.491,15.474,0.514,7.764
4,Cam Thomas,PTS,Ben Simmons,PTS,-0.139,BKN,31,23.5,7.5,0.494,23.595,0.514,7.764
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1955,Deni Avdija,PTS,Daniel Gafford,TRB,0.102,WAS,75,10.5,6.0,0.473,10.427,0.500,6.253
1956,Deni Avdija,TRB,Daniel Gafford,TRB,0.042,WAS,75,6.0,6.0,0.500,6.345,0.500,6.253
1957,Deni Avdija,TRB,Daniel Gafford,TRB,0.042,WAS,75,6.0,6.0,0.500,6.345,0.500,6.253
1958,Kyle Kuzma,PTS,Daniel Gafford,TRB,-0.068,WAS,64,21.5,6.0,0.484,21.446,0.500,6.253


In [117]:
final = final[['player_1', 'stat_1','over_prob_p1', 'player_2', 'stat_2','over_prob_p2', 'correlation', 'team',
       'sample', 'line_p1', 'line_p2']]

In [118]:
both_over = final.loc[(final.over_prob_p1 > 0.51) & (final.over_prob_p2 >0.51) & (final.correlation < -0.1)].sort_values(by='correlation').drop_duplicates().head(10)

In [124]:
final.loc[((final.over_prob_p1 - 0.50) * (final.over_prob_p2 - 0.5) < 0) & (final.correlation < -0.15)].sort_values(by='over_prob_p1').drop_duplicates().head(10)

Unnamed: 0,player_1,stat_1,over_prob_p1,player_2,stat_2,over_prob_p2,correlation,team,sample,line_p1,line_p2
1455,Draymond Green,AST,0.487,Stephen Curry,AST,0.512,-0.203,GSW,193,6.5,4.5
562,Tyler Herro,PTS,0.488,Kyle Lowry,AST,0.507,-0.24,MIA,59,24.5,4.5
563,Tyler Herro,PTS,0.488,Kyle Lowry,AST,0.507,-0.24,MIA,59,24.5,4.5
336,Dorian Finney-Smith,PTS,0.49,Luka Doncic,PTS,0.511,-0.261,DAL,35,10.5,29.5
1551,Damian Lillard,PTS,0.491,Jerami Grant,PTS,0.514,-0.194,POR,50,24.5,20.5
1550,Damian Lillard,PTS,0.491,Jerami Grant,PTS,0.514,-0.194,POR,50,24.5,20.5
1,Spencer Dinwiddie,AST,0.494,Ben Simmons,PTS,0.514,-0.603,BKN,15,5.5,7.5
0,Mikal Bridges,PTS,0.497,Ben Simmons,PTS,0.514,-0.569,BKN,13,21.5,7.5
749,Tyler Herro,AST,0.497,Jimmy Butler,TRB,0.504,-0.197,MIA,97,3.5,5.5
1640,Khris Middleton,PTS,0.497,Bobby Portis,PTS,0.527,-0.201,MIL,53,8.5,8.5


In [135]:
final.loc[(final.player_1=='Draymond Green') & (final.stat_1 =='AST')].sort_values(by='correlation')

Unnamed: 0,player_1,stat_1,over_prob_p1,player_2,stat_2,over_prob_p2,correlation,team,sample,line_p1,line_p2
1455,Draymond Green,AST,0.487,Stephen Curry,AST,0.512,-0.203,GSW,193,6.5,4.5
1454,Draymond Green,AST,0.487,Stephen Curry,AST,0.512,-0.203,GSW,193,6.5,4.5
1110,Draymond Green,AST,0.487,Jonathan Kuminga,TRB,0.502,-0.121,GSW,79,6.5,3.5
1111,Draymond Green,AST,0.487,Jonathan Kuminga,TRB,0.502,-0.121,GSW,79,6.5,3.5
1375,Draymond Green,AST,0.487,Kevon Looney,TRB,0.489,-0.120,GSW,143,6.5,10.5
...,...,...,...,...,...,...,...,...,...,...,...
1242,Draymond Green,AST,0.487,Stephen Curry,TRB,0.500,0.119,GSW,193,6.5,5.0
1348,Draymond Green,AST,0.487,Stephen Curry,PTS,0.511,0.141,GSW,193,6.5,28.5
1349,Draymond Green,AST,0.487,Stephen Curry,PTS,0.511,0.141,GSW,193,6.5,28.5
1402,Draymond Green,AST,0.487,Klay Thompson,PTS,0.488,0.141,GSW,213,6.5,18.5


In [23]:
all_corr.reset_index(drop=True).drop_duplicates(inplace=True)
all_corr.loc[(all_corr.Sample > 20) & (all_corr.P1.isin(player_bets)) &(all_corr.P2.isin(player_bets))].tail(10)

Unnamed: 0,Team,Sample,P1,P2,Stat1,Corr,stat2
13634,MIA,57,Jimmy Butler,Kyle Lowry,AST,0.377,AST
13635,MIA,57,Kyle Lowry,Jimmy Butler,AST,0.377,AST
13592,MIA,107,Bam Adebayo,Tyler Herro,PTS,0.384,AST
13593,MIA,107,Tyler Herro,Bam Adebayo,AST,0.384,AST
13830,MIA,107,Tyler Herro,Bam Adebayo,AST,0.384,PTS
13831,MIA,107,Bam Adebayo,Tyler Herro,PTS,0.384,PTS
16383,NYK,75,Jalen Brunson,Julius Randle,AST,0.384,AST
16382,NYK,75,Julius Randle,Jalen Brunson,PTS,0.384,AST
16314,NYK,75,Jalen Brunson,Julius Randle,AST,0.384,PTS
16315,NYK,75,Julius Randle,Jalen Brunson,PTS,0.384,PTS


In [32]:
unabated.loc[(unabated.player=='Jimmy Butler') & (unabated.stat == 'PTS')]

Unnamed: 0,prop_id,player,player_id,line,stat,league_id,event_time,opp,Team,count,over_prob,under_prob,pred,time
1147,fc707179-a,Jimmy Butler,43429,19.5,PTS,3,2023-11-03 19:00:00-05:00,MIA,WAS,13,0.509,0.491,19.798,2023-11-03 14:29:00
1148,fc707179-a,Jimmy Butler,43429,20.0,PTS,3,2023-11-03 19:00:00-05:00,MIA,WAS,1,0.5,0.5,19.798,2023-11-03 14:29:00
