In [1]:
import torch
import seaborn as sea
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from game_log import load_current_line, GameLog
plt.rcParams['figure.figsize'] = [15, 8]
pd.set_option("display.precision", 3)

In [2]:
pd.set_option('display.max_rows', 10)
pd.set_option('display.min_rows', 10)
torch.set_printoptions(sci_mode=False)

In [3]:
opp_stats = pd.read_csv('reference_data/opponent_stats.csv',index_col=0)
schedule = pd.read_csv('reference_data/schedule.csv',index_col=0)
KM_vals = pd.read_csv('reference_data/KM_vals.csv',index_col=0)

today = dt.datetime.today()
year = today.year
month = today.month
day = today.day
today_str = f'{year}_{month}_{day}'


pp_path = f'Lines/pp/pp_{today_str}.csv'
unabated_path = f'Lines/unabated/unabated_{today_str}.csv'

pp_lines = load_current_line(pp_path,'prop_id','time')
unabated = load_current_line(unabated_path,'prop_id','time')

In [4]:
pdata = pd.read_csv('game_logs/pdata.csv')
pdata['season'] = pdata['date'].apply(lambda x: x.split('-')[0])
data = pd.read_csv('game_logs/data_2024.csv')
data23 = pd.read_csv('game_logs/data_2023.csv')
data = pd.concat((pdata,data,data23))
data= data.sort_values(by=['player','date']).reset_index(drop=True)
data.sample(3)

Unnamed: 0,player,G,date,series,team,H/A,opp,G#,W/L,GS,...,BLK,TOV,PF,PTS,GmSc,+/-,pos,KM,season,age
22716,Kyrie Irving,58,2023-04-01,,DAL,0,MIA,,-7,1,...,0,1,3,23,19.4,4.0,PG-SG,15,2023,31-009
29965,Precious Achiuwa,9,2022-11-04,,TOR,0,DAL,,-1,0,...,1,2,1,15,13.6,2.0,C,15,2023,23-046
12972,Isaiah Mobley,7,2022-11-28,,CLE,0,TOR,,-12,0,...,0,0,0,2,2.6,7.0,PF,15,2023,23-065


In [5]:
gl = GameLog(data)

In [6]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def pt_corr(player_a_name, player_b_name, team,stat1='PTS',stat2='PTS'):
    team_games = data.loc[data.team==team]
    # extract two columns of interest (i.e. points scored by the two players)
    player_a_dates = set(team_games[team_games['player'] == player_a_name]['date'])
    player_b_dates = set(team_games[team_games['player'] == player_b_name]['date'])
    common_dates = player_a_dates.intersection(player_b_dates)
    stats = team_games[team_games['date'].isin(common_dates)]
    
    player_a_points = stats[stats['player'] == player_a_name][stat1].reset_index(drop=True)
    player_b_points = stats[stats['player'] == player_b_name][stat2].reset_index(drop=True)
    sample = len(player_a_points)
    # calculate the correlation coefficient between the two columns
    corr_coeff = player_a_points.corr(player_b_points)
    # return the correlation coefficient
    return corr_coeff,sample
pt_corr('Stephen Curry','Klay Thompson','GSW')

(0.037518115549773925, 194)

In [7]:
nba = unabated.loc[unabated.league_id==3]
team_list = nba.team.unique()
eligible_players =nba.player.unique()

In [8]:
nba.head(2)

Unnamed: 0,prop_id,player,player_id,line,stat,league_id,event_time,opp,team,count,over_prob,under_prob,pred,time
0,001cc313-b,Jordan Poole,43093,2.5,TRB,3,2023-11-10 18:00:00-06:00,CHA,WAS,12,0.493,0.507,2.643,2023-11-10 11:16:00
2,a5b5c6a4-d,Brandon Ingram,42496,6.5,AST,3,2023-11-10 19:00:00-06:00,HOU,NOP,1,0.441,0.559,6.081,2023-11-10 11:16:00


In [9]:
team2players = {}
for team in team_list:
    team_games = nba.loc[nba.team==team]
    players = team_games.player.unique()
    team2players[team]=players

In [10]:
stats = ['PTS','AST','TRB']
def get_correlations(team_list,player_list,stat1_list,stat2_list):
    correlate = []
    for team in nba.team.unique():
        for p in team2players[team]:
            if p in player_list:
                for p2 in team2players[team]:
                    if (p != p2) and (p2 in player_list):
                        for stat1 in stat1_list:
                            for stat2 in stat2_list:
                                corr,sample = pt_corr(p,p2,team,stat1,stat2)
                                correlate.append([p,stat1,p2,stat2,corr,team,sample])  
                                corr,sample = pt_corr(p2,p,team,stat2,stat1)
                                correlate.append([p,stat1,p2,stat2,corr,team,sample])
    all_corr = pd.DataFrame(correlate,columns=['player_1','stat_1','player_2','stat_2','correlation','team','sample']).sort_values(by='correlation').dropna()
    all_corr = all_corr.loc[all_corr['sample']>10]
    return all_corr
all_corr = get_correlations(team_list,eligible_players,stats,stats)

In [11]:
nba = pp_lines.loc[pp_lines.league_id==7]
nba_small = nba[['player','stat','line']]
nba_small.head(2)

Unnamed: 0,player,stat,line
2,Chet Holmgren,PTS+AST,17.5
3,Chet Holmgren,PTS+TRB,23.0


In [12]:
temp = all_corr.merge(nba_small,left_on=['player_1','stat_1'],right_on=['player','stat']).drop(['player','stat'],axis=1)
decisions = temp.merge(nba_small,left_on=['player_2','stat_2'],right_on=['player','stat'],suffixes=['_p1','_p2']).drop(['player','stat'],axis=1)
decisions

Unnamed: 0,player_1,stat_1,player_2,stat_2,correlation,team,sample,line_p1,line_p2
0,Jalen Duren,PTS,Killian Hayes,PTS,-0.034,DET,70,12.5,11.0
1,Jalen Duren,PTS,Killian Hayes,PTS,-0.034,DET,70,12.5,11.0
2,Jalen Duren,TRB,Killian Hayes,PTS,0.121,DET,70,9.5,11.0
3,Jalen Duren,TRB,Killian Hayes,PTS,0.121,DET,70,9.5,11.0
4,Isaiah Stewart,TRB,Killian Hayes,PTS,0.034,DET,56,6.5,11.0
...,...,...,...,...,...,...,...,...,...
1863,Lauri Markkanen,PTS,Jordan Clarkson,TRB,0.088,UTA,64,24.5,4.0
1864,Lauri Markkanen,TRB,Jordan Clarkson,PTS,0.179,UTA,64,9.0,19.5
1865,Lauri Markkanen,TRB,Jordan Clarkson,PTS,0.179,UTA,64,9.0,19.5
1866,Lauri Markkanen,PTS,Jordan Clarkson,PTS,0.105,UTA,64,24.5,19.5


In [13]:
unabated_small = unabated[['player', 'line', 'stat','over_prob', 'pred']]

In [14]:
temp = decisions.merge(unabated_small,left_on=['player_1','stat_1','line_p1'],right_on=['player','stat','line']).drop(['player','stat','line'],axis=1)
final = temp.merge(unabated_small,left_on=['player_2','stat_2','line_p2'],right_on=['player','stat','line'],suffixes=['_p1','_p2']).drop(['player','stat','line'],axis=1).drop_duplicates().reset_index(drop=True)
final

Unnamed: 0,player_1,stat_1,player_2,stat_2,correlation,team,sample,line_p1,line_p2,over_prob_p1,pred_p1,over_prob_p2,pred_p2
0,Jalen Duren,PTS,Killian Hayes,PTS,-0.034,DET,70,12.5,11.0,0.503,12.695,0.500,11.426
1,Jalen Duren,PTS,Killian Hayes,PTS,-0.034,DET,70,12.5,11.0,0.503,12.695,0.500,11.426
2,Jalen Duren,TRB,Killian Hayes,PTS,0.121,DET,70,9.5,11.0,0.515,9.786,0.500,11.426
3,Isaiah Stewart,TRB,Killian Hayes,PTS,0.034,DET,56,6.5,11.0,0.502,6.682,0.500,11.426
4,Cade Cunningham,AST,Killian Hayes,PTS,-0.059,DET,21,6.5,11.0,0.481,6.545,0.500,11.426
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1293,Lauri Markkanen,TRB,Jordan Clarkson,TRB,0.115,UTA,64,9.0,4.0,0.500,9.330,0.500,3.865
1294,Lauri Markkanen,PTS,Jordan Clarkson,TRB,0.088,UTA,64,24.5,4.0,0.503,24.708,0.500,3.865
1295,Lauri Markkanen,TRB,Jordan Clarkson,PTS,0.179,UTA,64,9.0,19.5,0.500,9.330,0.485,19.236
1296,Lauri Markkanen,PTS,Jordan Clarkson,PTS,0.105,UTA,64,24.5,19.5,0.503,24.708,0.485,19.236


In [15]:
final = final[['player_1', 'stat_1','over_prob_p1', 'player_2', 'stat_2','over_prob_p2', 'correlation', 'team',
       'sample', 'line_p1', 'line_p2']]

In [16]:
both_over = final.loc[(final.over_prob_p1 > 0.51) & (final.over_prob_p2 >0.51) & (final.correlation > 0.1)].sort_values(by='correlation').head(10)
both_over

Unnamed: 0,player_1,stat_1,over_prob_p1,player_2,stat_2,over_prob_p2,correlation,team,sample,line_p1,line_p2
388,Ivica Zubac,TRB,0.535,Norman Powell,PTS,0.518,0.115,LAC,69,8.5,10.5
424,Norman Powell,PTS,0.518,Ivica Zubac,TRB,0.535,0.115,LAC,69,10.5,8.5
995,Shai Gilgeous-Alexander,AST,0.534,Jalen Williams,PTS,0.511,0.118,OKC,71,5.5,15.5
1057,Jalen Williams,PTS,0.511,Shai Gilgeous-Alexander,AST,0.534,0.118,OKC,71,15.5,5.5
1102,Joel Embiid,AST,0.531,Tobias Harris,PTS,0.512,0.129,PHI,106,4.5,16.5
1122,Tobias Harris,PTS,0.512,Joel Embiid,AST,0.531,0.129,PHI,106,16.5,4.5
1015,Shai Gilgeous-Alexander,AST,0.534,Luguentz Dort,PTS,0.514,0.16,OKC,68,5.5,12.5
1052,Luguentz Dort,PTS,0.514,Shai Gilgeous-Alexander,AST,0.534,0.16,OKC,68,12.5,5.5
1016,Shai Gilgeous-Alexander,AST,0.534,Luguentz Dort,PTS,0.514,0.16,OKC,68,5.5,12.5
1053,Luguentz Dort,PTS,0.514,Shai Gilgeous-Alexander,AST,0.534,0.16,OKC,68,12.5,5.5


In [17]:
both_under = final.loc[(final.over_prob_p1 < 0.49) & (final.over_prob_p2 < 0.49) & (final.correlation > 0.1)].sort_values(by='correlation').head(10)
both_under

Unnamed: 0,player_1,stat_1,over_prob_p1,player_2,stat_2,over_prob_p2,correlation,team,sample,line_p1,line_p2
706,Daniel Gafford,TRB,0.476,Deni Avdija,PTS,0.481,0.105,WAS,78,6.5,12.5
714,Deni Avdija,PTS,0.481,Daniel Gafford,TRB,0.476,0.105,WAS,78,12.5,6.5
432,Karl-Anthony Towns,PTS,0.489,Mike Conley,TRB,0.481,0.132,MIN,20,21.5,2.5
553,Mike Conley,TRB,0.481,Karl-Anthony Towns,PTS,0.489,0.132,MIN,20,2.5,21.5
1213,Jeremy Sochan,AST,0.484,Zach Collins,AST,0.479,0.136,SAS,53,4.5,3.5
1243,Zach Collins,AST,0.479,Jeremy Sochan,AST,0.484,0.136,SAS,53,3.5,4.5
1236,Jeremy Sochan,AST,0.484,Zach Collins,PTS,0.486,0.14,SAS,53,4.5,12.5
1246,Zach Collins,PTS,0.486,Jeremy Sochan,AST,0.484,0.14,SAS,53,12.5,4.5
1237,Jeremy Sochan,AST,0.484,Zach Collins,PTS,0.486,0.14,SAS,53,4.5,12.5
1247,Zach Collins,PTS,0.486,Jeremy Sochan,AST,0.484,0.14,SAS,53,12.5,4.5


In [18]:
final.loc[((final.over_prob_p1 - 0.50) * (final.over_prob_p2 - 0.5) > 0) & (final.correlation > 0.15)].sort_values(by='over_prob_p1').head(10)

Unnamed: 0,player_1,stat_1,over_prob_p1,player_2,stat_2,over_prob_p2,correlation,team,sample,line_p1,line_p2
326,Kawhi Leonard,TRB,0.481,Russell Westbrook,PTS,0.494,0.193,LAC,27,6.5,14.5
376,Kawhi Leonard,TRB,0.481,Paul George,PTS,0.489,0.163,LAC,45,6.5,22.5
488,Mike Conley,TRB,0.481,Rudy Gobert,TRB,0.492,0.215,MIN,35,2.5,12.5
260,Domantas Sabonis,AST,0.484,Keegan Murray,PTS,0.497,0.308,SAC,88,7.5,15.5
329,Kawhi Leonard,PTS,0.485,Russell Westbrook,PTS,0.494,0.246,LAC,27,23.5,14.5
378,Kawhi Leonard,PTS,0.485,Paul George,PTS,0.489,0.392,LAC,45,23.5,22.5
348,Paul George,PTS,0.489,Kawhi Leonard,PTS,0.485,0.392,LAC,45,22.5,23.5
367,Paul George,PTS,0.489,Kawhi Leonard,TRB,0.481,0.163,LAC,45,22.5,6.5
439,Rudy Gobert,TRB,0.492,Mike Conley,TRB,0.481,0.215,MIN,35,12.5,2.5
93,Dorian Finney-Smith,PTS,0.494,Royce O'Neale,PTS,0.495,0.231,BKN,36,12.5,10.5
