In [1]:
import torch
import seaborn as sea
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from game_log import load_current_line, GameLog
plt.rcParams['figure.figsize'] = [15, 8]
pd.set_option("display.precision", 3)

In [2]:
pd.set_option('display.max_rows', 10)
pd.set_option('display.min_rows', 10)
torch.set_printoptions(sci_mode=False)

In [3]:
opp_stats = pd.read_csv('reference_data/opponent_stats.csv',index_col=0)
schedule = pd.read_csv('reference_data/schedule.csv',index_col=0)
KM_vals = pd.read_csv('reference_data/KM_vals.csv',index_col=0)

today = dt.datetime.today()
year = today.year
month = today.month
day = today.day
today_str = f'{year}_{month}_{day}'


pp_path = f'Lines/pp/pp_{today_str}.csv'
unabated_path = f'Lines/unabated/unabated_{today_str}.csv'

pp_lines = load_current_line(pp_path,'prop_id','time')
unabated = load_current_line(unabated_path,'prop_id','time')

In [4]:
pdata = pd.read_csv('game_logs/pdata.csv')
pdata['season'] = pdata['date'].apply(lambda x: x.split('-')[0])
data = pd.read_csv('game_logs/data_2024.csv')
data23 = pd.read_csv('game_logs/data_2023.csv')
data = pd.concat((pdata,data,data23))
data= data.sort_values(by=['player','date']).reset_index(drop=True)
data.sample(3)

Unnamed: 0,player,G,date,series,team,H/A,opp,G#,W/L,GS,...,BLK,TOV,PF,PTS,GmSc,+/-,pos,KM,season,age
3796,Caleb Houstan,38,2023-03-07,,ORL,1,MIL,,-11,0,...,0,0,0,3,1.5,5.0,SF,15,2023,20-057
21482,Khris Middleton,20,2021-07-11,FIN,MIL,1,PHX,3.0,20,1,...,0,2,3,18,15.1,8.0,SF,15,2021,
20193,Kentavious Caldwell-Pope,3,2016-04-22,EC1,DET,1,CLE,3.0,-10,1,...,0,1,1,18,15.2,-10.0,SG,15,2016,


In [5]:
gl = GameLog(data)

In [6]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
def pt_corr(player_a_name, player_b_name, team,stat1='PTS',stat2='PTS'):
    team_games = data.loc[data.team==team]
    # extract two columns of interest (i.e. points scored by the two players)
    player_a_dates = set(team_games[team_games['player'] == player_a_name]['date'])
    player_b_dates = set(team_games[team_games['player'] == player_b_name]['date'])
    common_dates = player_a_dates.intersection(player_b_dates)
    stats = team_games[team_games['date'].isin(common_dates)]
    
    player_a_points = stats[stats['player'] == player_a_name][stat1].reset_index(drop=True)
    player_b_points = stats[stats['player'] == player_b_name][stat2].reset_index(drop=True)
    sample = len(player_a_points)
    # calculate the correlation coefficient between the two columns
    corr_coeff = player_a_points.corr(player_b_points)
    # return the correlation coefficient
    return corr_coeff,sample
pt_corr('Stephen Curry','Klay Thompson','GSW')

(0.036091901629984, 193)

In [11]:
nba = unabated.loc[unabated.league_id==3]
team_list = nba.team.unique()
eligible_players =nba.player.unique()

In [12]:
nba.head(2)

Unnamed: 0,prop_id,player,player_id,line,stat,league_id,event_time,opp,team,count,over_prob,under_prob,pred,time
0,001cb870-f,Cade Cunningham,242200,35.5,PTS+TRB+AST,3,2023-11-08 19:00:00-06:00,MIL,DET,13,0.507,0.493,35.832,2023-11-08 08:58:00
1,a9361fa3-d,Brandon Miller,417246,1.5,3P,3,2023-11-08 18:00:00-06:00,WAS,CHA,7,0.543,0.457,1.817,2023-11-08 08:58:00


In [14]:
team2players = {}
for team in team_list:
    team_games = nba.loc[nba.team==team]
    players = team_games.player.unique()
    team2players[team]=players

In [15]:
stats = ['PTS','AST','TRB']
def get_correlations(team_list,player_list,stat1_list,stat2_list):
    correlate = []
    for team in nba.team.unique():
        for p in team2players[team]:
            if p in player_list:
                for p2 in team2players[team]:
                    if (p != p2) and (p2 in player_list):
                        for stat1 in stat1_list:
                            for stat2 in stat2_list:
                                corr,sample = pt_corr(p,p2,team,stat1,stat2)
                                correlate.append([p,stat1,p2,stat2,corr,team,sample])  
                                corr,sample = pt_corr(p2,p,team,stat2,stat1)
                                correlate.append([p,stat1,p2,stat2,corr,team,sample])
    all_corr = pd.DataFrame(correlate,columns=['player_1','stat_1','player_2','stat_2','correlation','team','sample']).sort_values(by='correlation').dropna()
    all_corr = all_corr.loc[all_corr['sample']>10]
    return all_corr
all_corr = get_correlations(team_list,eligible_players,stats,stats)

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)


In [16]:
nba = pp_lines.loc[pp_lines.league_id==7]
nba_small = nba[['player','stat','line']]
nba_small.head(2)

Unnamed: 0,player,stat,line
2,DeMar DeRozan,BLK+STL,1.5
3,DeMar DeRozan,PTS+TRB,24.5


In [17]:
temp = all_corr.merge(nba_small,left_on=['player_1','stat_1'],right_on=['player','stat']).drop(['player','stat'],axis=1)
decisions = temp.merge(nba_small,left_on=['player_2','stat_2'],right_on=['player','stat'],suffixes=['_p1','_p2']).drop(['player','stat'],axis=1)
decisions

Unnamed: 0,player_1,stat_1,player_2,stat_2,correlation,team,sample,line_p1,line_p2
0,Ben Simmons,TRB,Spencer Dinwiddie,AST,-0.625,BKN,17,9.0,5.5
1,Mikal Bridges,PTS,Spencer Dinwiddie,AST,0.083,BKN,85,22.5,5.5
2,Mikal Bridges,PTS,Spencer Dinwiddie,AST,0.083,BKN,85,22.5,5.5
3,Ben Simmons,AST,Spencer Dinwiddie,AST,-0.200,BKN,17,6.0,5.5
4,Mikal Bridges,TRB,Spencer Dinwiddie,AST,-0.197,BKN,85,4.5,5.5
...,...,...,...,...,...,...,...,...,...
2711,Jalen Green,PTS,Alperen Sengun,TRB,0.145,HOU,76,21.5,9.5
2712,Jalen Green,PTS,Alperen Sengun,PTS,0.166,HOU,76,21.5,15.5
2713,Jalen Green,PTS,Alperen Sengun,PTS,0.166,HOU,76,21.5,15.5
2714,Jalen Green,PTS,Alperen Sengun,AST,0.192,HOU,76,21.5,5.5


In [18]:
unabated_small = unabated[['player', 'line', 'stat','over_prob', 'pred']]

In [23]:
temp = decisions.merge(unabated_small,left_on=['player_1','stat_1','line_p1'],right_on=['player','stat','line']).drop(['player','stat','line'],axis=1)
final = temp.merge(unabated_small,left_on=['player_2','stat_2','line_p2'],right_on=['player','stat','line'],suffixes=['_p1','_p2']).drop(['player','stat','line'],axis=1).drop_duplicates().reset_index(drop=True)
final

Unnamed: 0,player_1,stat_1,player_2,stat_2,correlation,team,sample,line_p1,line_p2,over_prob_p1,pred_p1,over_prob_p2,pred_p2
0,Ben Simmons,TRB,Spencer Dinwiddie,AST,-0.625,BKN,17,9.0,5.5,0.500,9.369,0.494,5.630
1,Mikal Bridges,PTS,Spencer Dinwiddie,AST,0.083,BKN,85,22.5,5.5,0.496,22.623,0.494,5.630
2,Ben Simmons,AST,Spencer Dinwiddie,AST,-0.200,BKN,17,6.0,5.5,0.500,6.328,0.494,5.630
3,Mikal Bridges,TRB,Spencer Dinwiddie,AST,-0.197,BKN,85,4.5,5.5,0.500,4.672,0.494,5.630
4,Dorian Finney-Smith,PTS,Spencer Dinwiddie,AST,-0.065,BKN,35,11.5,5.5,0.514,11.785,0.494,5.630
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1859,Desmond Bane,AST,Ziaire Williams,TRB,0.086,MEM,46,4.5,5.0,0.528,4.821,0.500,4.956
1860,Jalen Green,PTS,Alperen Sengun,TRB,0.145,HOU,76,21.5,9.5,0.508,21.765,0.523,9.842
1861,Jalen Green,PTS,Alperen Sengun,PTS,0.166,HOU,76,21.5,15.5,0.508,21.765,0.509,15.757
1862,Jalen Green,PTS,Alperen Sengun,AST,0.192,HOU,76,21.5,5.5,0.508,21.765,0.532,5.880


In [24]:
final = final[['player_1', 'stat_1','over_prob_p1', 'player_2', 'stat_2','over_prob_p2', 'correlation', 'team',
       'sample', 'line_p1', 'line_p2']]

In [30]:
both_over = final.loc[(final.over_prob_p1 > 0.51) & (final.over_prob_p2 >0.51) & (final.correlation > 0.1)].sort_values(by='correlation').head(10)
both_over

Unnamed: 0,player_1,stat_1,over_prob_p1,player_2,stat_2,over_prob_p2,correlation,team,sample,line_p1,line_p2
1631,Jordan Clarkson,PTS,0.516,Lauri Markkanen,PTS,0.518,0.113,UTA,63,16.5,23.5
1689,Lauri Markkanen,PTS,0.518,Jordan Clarkson,PTS,0.516,0.113,UTA,63,23.5,16.5
865,Zach LaVine,TRB,0.526,DeMar DeRozan,PTS,0.512,0.139,CHI,78,3.5,20.5
891,DeMar DeRozan,PTS,0.512,Zach LaVine,TRB,0.526,0.139,CHI,78,20.5,3.5
866,Zach LaVine,TRB,0.526,DeMar DeRozan,PTS,0.512,0.139,CHI,78,3.5,20.5
892,DeMar DeRozan,PTS,0.512,Zach LaVine,TRB,0.526,0.139,CHI,78,20.5,3.5
1508,Luguentz Dort,PTS,0.517,Shai Gilgeous-Alexander,PTS,0.517,0.177,OKC,67,11.5,28.5
1519,Shai Gilgeous-Alexander,PTS,0.517,Luguentz Dort,PTS,0.517,0.177,OKC,67,28.5,11.5
920,Evan Mobley,TRB,0.511,Darius Garland,AST,0.517,0.232,CLE,76,9.5,6.5
951,Darius Garland,AST,0.517,Evan Mobley,TRB,0.511,0.232,CLE,76,6.5,9.5


In [31]:
both_under = final.loc[(final.over_prob_p1 < 0.49) & (final.over_prob_p2 < 0.49) & (final.correlation > 0.1)].sort_values(by='correlation').head(10)
both_under

Unnamed: 0,player_1,stat_1,over_prob_p1,player_2,stat_2,over_prob_p2,correlation,team,sample,line_p1,line_p2
713,Austin Reaves,AST,0.481,Anthony Davis,AST,0.484,0.115,LAL,62,4.5,2.5
773,Anthony Davis,AST,0.484,Austin Reaves,AST,0.481,0.115,LAL,62,2.5,4.5
1029,Bam Adebayo,PTS,0.482,Kyle Lowry,TRB,0.462,0.115,MIA,71,20.5,3.5
1073,Kyle Lowry,TRB,0.462,Bam Adebayo,PTS,0.482,0.115,MIA,71,3.5,20.5
1030,Bam Adebayo,PTS,0.482,Kyle Lowry,TRB,0.462,0.115,MIA,71,20.5,3.5
1074,Kyle Lowry,TRB,0.462,Bam Adebayo,PTS,0.482,0.115,MIA,71,3.5,20.5
1319,De'Anthony Melton,PTS,0.482,Tyrese Maxey,TRB,0.47,0.118,PHI,68,8.5,3.5
1296,Tyrese Maxey,TRB,0.47,De'Anthony Melton,PTS,0.482,0.118,PHI,68,3.5,8.5
1320,De'Anthony Melton,PTS,0.482,Tyrese Maxey,TRB,0.47,0.118,PHI,68,8.5,3.5
1297,Tyrese Maxey,TRB,0.47,De'Anthony Melton,PTS,0.482,0.118,PHI,68,3.5,8.5


In [27]:
final.loc[((final.over_prob_p1 - 0.50) * (final.over_prob_p2 - 0.5) > 0) & (final.correlation > 0.15)].sort_values(by='over_prob_p1').head(10)

Unnamed: 0,player_1,stat_1,over_prob_p1,player_2,stat_2,over_prob_p2,correlation,team,sample,line_p1,line_p2
1256,RJ Barrett,AST,0.479,Mitchell Robinson,PTS,0.481,0.182,NYK,60,2.5,7.5
1162,Jalen Brunson,AST,0.479,RJ Barrett,PTS,0.499,0.157,NYK,71,5.5,19.5
843,Zach LaVine,AST,0.48,Coby White,PTS,0.489,0.352,CHI,78,3.5,12.5
1134,Bennedict Mathurin,TRB,0.48,Tyrese Haliburton,PTS,0.494,0.157,IND,62,3.5,23.5
1125,Bennedict Mathurin,TRB,0.48,Myles Turner,PTS,0.496,0.182,IND,67,3.5,16.5
1126,Bennedict Mathurin,TRB,0.48,Myles Turner,PTS,0.496,0.182,IND,67,3.5,16.5
1215,Mitchell Robinson,PTS,0.481,RJ Barrett,AST,0.479,0.182,NYK,60,7.5,2.5
1623,Talen Horton-Tucker,AST,0.488,Walker Kessler,TRB,0.489,0.198,UTA,68,5.5,8.5
1625,Talen Horton-Tucker,AST,0.488,Walker Kessler,TRB,0.489,0.198,UTA,68,5.5,8.5
1647,Talen Horton-Tucker,AST,0.488,Lauri Markkanen,TRB,0.498,0.157,UTA,61,5.5,8.5
