In [1]:
import pandas as pd
from utils.betting import (
	get_pp_close,
)
pd.set_option("display.precision", 3)

In [2]:
pp = get_pp_close()
pp = pp.loc[pp.league_name == 'NFL'].copy(deep=True).reset_index(drop=True)

In [3]:
games = pd.DataFrame()
for year in range(2000,2025):
    temp = pd.read_csv(f'../reference_data/NFL/game_logs_{year}.csv',index_col=0,parse_dates=['date'])
    games = pd.concat((games, temp))
ref2pp = {
    'GNB':'GB',
    'JAX':'JAC',
    'KAN':'KC',
    'LVR':'LV',
    'NOR':'NO',
    'NWE':'NE',
    'SFO':'SF',     
    'TAM':'TB',
}

games['team'] = games['team'].apply(lambda x: ref2pp.get(x,x))
players = pd.read_csv('../reference_data/NFL/player_info.csv',index_col=0)

games['Rush+Rec TDs'] = games['Rec TDs'] + games['Rush TDs']
games['Completion Percentage'] = 100 * (games['Pass Completions'] / games['Pass Attempts'])
games['Pass+Rush Yds'] = games['Pass Yards'] + games['Rush Yards']
games['Rush+Rec Yds'] = games['Receiving Yards'] + games['Rush Yards']
games[['XPM','FG']] = games[['XPM','FG']].astype(float)
games['Kicking Points'] = games['XPM'] + (3 * games['FG'])
games['Fantasy Score'] = ((games['Pass Yards'] * 0.04) + (games['Pass TDs'] * 4) + (2 * -games['INT']) + (games['Rush Yards'] * 0.1) + (games['Receiving Yards'] * 0.1) + #missing 2 pt conv
                    (games['Receptions']) + (-games['Fumbles Lost']) + (games['Rush+Rec TDs'] * 6))

In [4]:
players.loc[(players.PosRank < 90) & (players.GS > 1)]

Unnamed: 0,Rk,player,team,FantPos,age,G,GS,Cmp,Att,Yds,...,FDPt,VBD,PosRank,OvRank,year,round,pick,Pos,college,draft_year
0,1,Marshall Faulk,STL,RB,27,14,14.0,0.0,0.0,0.0,...,419.4,228.0,1,1.0,2000,1.0,2.0,RB,San Diego St.,1994.0
1,2,Edgerrin James,IND,RB,22,16,16.0,0.0,0.0,0.0,...,363.8,181.0,2,2.0,2000,1.0,4.0,RB,Miami (FL),1999.0
2,3,Jeff Garcia,SFO,QB,30,16,16.0,355.0,561.0,4278.0,...,350.5,149.0,1,3.0,2000,,,,,2000.0
3,4,Daunte Culpepper,MIN,QB,23,16,16.0,297.0,474.0,3937.0,...,354.5,147.0,2,4.0,2000,1.0,11.0,QB,Central Florida,1999.0
4,5,Eddie George,TEN,RB,27,16,16.0,0.0,0.0,0.0,...,309.2,133.0,3,5.0,2000,1.0,14.0,RB,Ohio St.,1996.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15449,363,MyCole Pruitt,PIT,TE,32,2,2.0,0.0,0.0,0.0,...,1.4,,71,,2024,5.0,143.0,TE,Southern Illinois,2015.0
15452,366,Durham Smythe,MIA,TE,29,4,2.0,0.0,0.0,0.0,...,2.1,,70,,2024,4.0,123.0,TE,Notre Dame,2018.0
15455,368,Adam Trautman,DEN,TE,27,4,4.0,0.0,0.0,0.0,...,1.8,,68,,2024,3.0,105.0,TE,Dayton,2020.0
15483,396,Gerald Everett,CHI,TE,30,4,3.0,0.0,0.0,0.0,...,1.5,,85,,2024,2.0,44.0,TE,South Alabama,2017.0


In [5]:
players['FantPos'].value_counts()
k_players = games.loc[games.pos=='K']

In [6]:
top_players = players.loc[(players.PosRank < 90) & (players.GS > 1)]
games = games.merge(top_players[['player','FantPos']]).drop_duplicates()

In [7]:
games = pd.concat((games,k_players))

In [8]:
games['FantPos']= games['FantPos'].fillna(games['pos'])

In [9]:
games.columns

Index(['player', 'team', 'Pass Completions', 'Pass Attempts', 'Pass Yards',
       'Pass TDs', 'INT', 'Sacks', 'Yards Sacked', 'Longest Pass',
       'Passing Rate', 'Rush Attempts', 'Rush Yards', 'Rush TDs',
       'Longest Rush', 'Rec Targets', 'Receptions', 'Receiving Yards',
       'Rec TDs', 'Longest Reception', 'Fumbles', 'Fumbles Lost', 'XPM', 'XPA',
       'FG', 'FGA', 'date', 'opp', 'home', 'roof', 'surface', 'weather',
       'game_line', 'favored', 'total', 'Kicking Points', 'season', 'pos',
       'snaps', 'pct', 'Rush+Rec TDs', 'Completion Percentage',
       'Pass+Rush Yds', 'Rush+Rec Yds', 'Fantasy Score', 'FantPos'],
      dtype='object')

In [10]:
common_cols = [ 'team', 'opp', 'home', 'roof', 'surface', 'weather', 'game_line', 'favored', 'total',
       'season',  'date']
qb_cols =     ['player', 'Pass Completions', 'Pass Attempts', 'Pass Yards',
       'Pass TDs', 'INT', 'Sacks', 'Yards Sacked', 'Longest Pass',
       'Passing Rate',  'snaps', 'pct', 'pos', 'FantPos',]
wr_cols =     ['player', 'Rec Targets', 'Receptions', 'Receiving Yards', 'Rush+Rec Yds', 
       'Rec TDs','Rush+Rec TDs', 'Longest Reception', 'Fantasy Score',  'snaps', 'pct', 'pos', 'FantPos']
rb_cols =     ['player', 'Rush Attempts', 'Rush Yards', 'Rush TDs', 'Rush+Rec Yds', 
       'Longest Rush', 'Rec Targets', 'Receptions', 'Receiving Yards',
       'Rec TDs','Rush+Rec TDs', 'Longest Reception', 'Fumbles', 'Fumbles Lost', 'Fantasy Score', 'snaps', 'pct',
        'pos', 'FantPos']

In [11]:
common_cols = [ 'team', 'opp', 'home', 'roof', 'surface', 'weather', 'game_line', 'favored', 'total',
       'season',  'date']
qb_cols =     ['player', 'Pass Completions', 'Pass Attempts', 'Pass Yards',
       'Pass TDs', 'INT', 'Sacks', 'Yards Sacked', 'Longest Pass',
       'Passing Rate',  'snaps', 'pct', 'pos', 'FantPos',]
wr_cols =     ['player', 'Rec Targets', 'Receptions', 'Receiving Yards', 'Rush+Rec Yds', 
       'Rec TDs','Rush+Rec TDs', 'Longest Reception', 'Fantasy Score',  'snaps', 'pct', 'pos', 'FantPos']
rb_cols =     ['player', 'Rush Attempts', 'Rush Yards', 'Rush TDs', 'Rush+Rec Yds', 
       'Longest Rush', 'Rec Targets', 'Receptions', 'Receiving Yards',
       'Rec TDs','Rush+Rec TDs', 'Longest Reception', 'Fumbles', 'Fumbles Lost', 'Fantasy Score', 'snaps', 'pct',
        'pos', 'FantPos']

k_cols = ['player', 'XPM','Kicking Points', 'Fantasy Score', 'FG','snaps', 'pct', 'pos', 'FantPos']

small = games.loc[games['date'] > '2014-09-01']

qb = small.loc[small.FantPos=='QB'][
    qb_cols + common_cols
]
wr = small.loc[small.FantPos=='WR'][
    wr_cols + common_cols
]
rb = small.loc[small.FantPos=='RB'][
    rb_cols + common_cols
]
te = small.loc[small.FantPos=='TE'][
    wr_cols + common_cols
]
k = small.loc[small.FantPos=='K'][
    k_cols + common_cols
]

In [12]:
context_cols = ['player', 'snaps','pct', 'FantPos','pos'] + common_cols

pairs = pd.DataFrame()
for pair in [(qb,wr), (qb,rb), (qb,te), (wr,wr),(wr,rb),(wr,te),(rb,rb),(rb,te), (qb,k), (wr,k), (rb,k)]:
    temp1 = pair[0].melt(id_vars=context_cols)
    temp2 = pair[1].melt(id_vars=context_cols)
    temp3 = temp1.merge(temp2,on=common_cols)
    pairs=pd.concat((pairs, temp3))
pairs = pairs.loc[pairs.player_x!=pairs.player_y].drop_duplicates()

In [13]:
print(len(pairs))
pairs.head(5)

22355141


Unnamed: 0,player_x,snaps_x,pct_x,FantPos_x,pos_x,team,opp,home,roof,surface,...,date,variable_x,value_x,player_y,snaps_y,pct_y,FantPos_y,pos_y,variable_y,value_y
0,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Andre Caldwell,41.0,55%,WR,WR,Rec Targets,5.0
1,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Demaryius Thomas,67.0,89%,WR,WR,Rec Targets,11.0
2,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Emmanuel Sanders,70.0,93%,WR,WR,Rec Targets,9.0
3,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Andre Caldwell,41.0,55%,WR,WR,Receptions,2.0
4,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Demaryius Thomas,67.0,89%,WR,WR,Receptions,4.0


In [14]:
small = pairs
small
#16263501 
#8639003  
#8639003
#2401432 

Unnamed: 0,player_x,snaps_x,pct_x,FantPos_x,pos_x,team,opp,home,roof,surface,...,date,variable_x,value_x,player_y,snaps_y,pct_y,FantPos_y,pos_y,variable_y,value_y
0,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Andre Caldwell,41.0,55%,WR,WR,Rec Targets,5.0
1,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Demaryius Thomas,67.0,89%,WR,WR,Rec Targets,11.0
2,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Emmanuel Sanders,70.0,93%,WR,WR,Rec Targets,9.0
3,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Andre Caldwell,41.0,55%,WR,WR,Receptions,2.0
4,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Demaryius Thomas,67.0,89%,WR,WR,Receptions,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
704083,De'Von Achane,44.0,73%,RB,RB,MIA,TEN,1,outdoors,grass,...,2024-09-30,Fumbles Lost,0.0,Jason Sanders,0.0,0%,K,K,FG,2.0
704084,De'Von Achane,44.0,73%,RB,RB,MIA,TEN,1,outdoors,grass,...,2024-09-30,Fantasy Score,5.9,Jason Sanders,0.0,0%,K,K,XPM,
704085,De'Von Achane,44.0,73%,RB,RB,MIA,TEN,1,outdoors,grass,...,2024-09-30,Fantasy Score,5.9,Jason Sanders,0.0,0%,K,K,Kicking Points,
704086,De'Von Achane,44.0,73%,RB,RB,MIA,TEN,1,outdoors,grass,...,2024-09-30,Fantasy Score,5.9,Jason Sanders,0.0,0%,K,K,Fantasy Score,


In [15]:
pos_corr= small.groupby(['FantPos_x','FantPos_y','variable_x','variable_y'])[['value_x','value_y']].corr()
pos_corr = pos_corr.loc[pos_corr.value_x != 1].reset_index().drop(['value_y' ,'level_4'],axis=1)
pos_corr.sort_values(by='value_x')

Unnamed: 0,FantPos_x,FantPos_y,variable_x,variable_y,value_x
497,RB,RB,Rush Attempts,Rush Attempts,-0.310
31,QB,K,Sacks,XPM,-0.268
35,QB,K,Yards Sacked,XPM,-0.252
323,RB,K,Longest Rush,Fantasy Score,-0.242
525,RB,RB,Rush Yards,Rush Attempts,-0.240
...,...,...,...,...,...
272,QB,WR,Pass TDs,Rush+Rec TDs,0.304
268,QB,WR,Pass TDs,Rec TDs,0.313
21,QB,K,Pass Yards,Fantasy Score,0.344
19,QB,K,Pass TDs,XPM,0.410


In [16]:
corr = small.groupby(['player_x','player_y', 'FantPos_x','FantPos_y','variable_x','variable_y'])[['value_x','value_y']].corr()
corr

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,value_x,value_y
player_x,player_y,FantPos_x,FantPos_y,variable_x,variable_y,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A.J. Brown,Adam Humphries,WR,WR,Fantasy Score,Fantasy Score,value_x,1.000,-0.195
A.J. Brown,Adam Humphries,WR,WR,Fantasy Score,Fantasy Score,value_y,-0.195,1.000
A.J. Brown,Adam Humphries,WR,WR,Fantasy Score,Longest Reception,value_x,1.000,-0.085
A.J. Brown,Adam Humphries,WR,WR,Fantasy Score,Longest Reception,value_y,-0.085,1.000
A.J. Brown,Adam Humphries,WR,WR,Fantasy Score,Rec TDs,value_x,1.000,-0.044
...,...,...,...,...,...,...,...,...
Zonovan Knight,Tyler Conklin,RB,TE,Rush+Rec Yds,Receptions,value_y,-0.003,1.000
Zonovan Knight,Tyler Conklin,RB,TE,Rush+Rec Yds,Rush+Rec TDs,value_x,1.000,
Zonovan Knight,Tyler Conklin,RB,TE,Rush+Rec Yds,Rush+Rec TDs,value_y,,
Zonovan Knight,Tyler Conklin,RB,TE,Rush+Rec Yds,Rush+Rec Yds,value_x,1.000,0.012


In [17]:
gp = small.groupby(['player_x','player_y','FantPos_x','FantPos_y','team', 'variable_x','variable_y'])['date'].count().reset_index()
gp.columns = ['player_x','player_y', 'FantPos_x','FantPos_y','team','variable_x','variable_y', 'gp' ]
gp = gp[['player_x','player_y','gp']].drop_duplicates()
gp.sort_values(by='gp')

Unnamed: 0,player_x,player_y,gp
1496056,Melvin Gordon,Kenyan Drake,1
578464,Darrius Heyward-Bey,Ben Tate,1
1754450,Robert Foster,Jeremy Sprinkle,1
1600958,Nick Foles,Albert Wilson,1
1874032,Scott Miller,Ryan Griffin,1
...,...,...,...
1650496,Patrick Mahomes,Harrison Butker,114
1652602,Patrick Mahomes,Travis Kelce,116
19706,Aaron Rodgers,Davante Adams,117
584400,Davante Adams,Mason Crosby,123


In [18]:
corr = corr.reset_index()
corr2 = corr.loc[corr.value_x != 1]
corr2 = corr2.merge(gp)
corr2.loc[corr2.gp > 5].sort_values(by='value_x')

Unnamed: 0,player_x,player_y,FantPos_x,FantPos_y,variable_x,variable_y,level_6,value_x,value_y,gp
1854168,Mike Glennon,Graham Gano,QB,K,Pass Completions,XPM,value_y,-1.0,1.0,6
2256285,Shane Vereen,Brandon Bolden,RB,RB,Receptions,Longest Reception,value_y,-1.0,1.0,10
2540959,Victor Cruz,Andre Williams,WR,RB,Fantasy Score,Longest Reception,value_y,-1.0,1.0,6
755326,DeMarco Murray,Anthony Fasano,RB,TE,Fumbles,Receptions,value_y,-1.0,1.0,9
210637,Benny Cunningham,Adam Shaheen,RB,TE,Rush Attempts,Rush+Rec TDs,value_y,-1.0,1.0,8
...,...,...,...,...,...,...,...,...,...,...
2607746,Zonovan Knight,Tyler Conklin,RB,TE,Rush Yards,Rush+Rec TDs,value_y,,,7
2607750,Zonovan Knight,Tyler Conklin,RB,TE,Rush+Rec TDs,Rec TDs,value_y,,,7
2607754,Zonovan Knight,Tyler Conklin,RB,TE,Rush+Rec TDs,Rush+Rec TDs,value_y,,,7
2607758,Zonovan Knight,Tyler Conklin,RB,TE,Rush+Rec Yds,Rec TDs,value_y,,,7


In [19]:
corr2.columns

Index(['player_x', 'player_y', 'FantPos_x', 'FantPos_y', 'variable_x',
       'variable_y', 'level_6', 'value_x', 'value_y', 'gp'],
      dtype='object')

In [20]:
out = corr2.merge(pos_corr[['FantPos_x', 'FantPos_y', 'variable_x',
       'variable_y', 'value_x']], on=['FantPos_x', 'FantPos_y', 'variable_x',
       'variable_y'], suffixes=['','_pos']).drop(['level_6','value_y'],axis=1)
out

Unnamed: 0,player_x,player_y,FantPos_x,FantPos_y,variable_x,variable_y,value_x,gp,value_x_pos
0,A.J. Brown,Adam Humphries,WR,WR,Fantasy Score,Fantasy Score,-0.195,18,-0.049
1,A.J. Brown,Chester Rogers,WR,WR,Fantasy Score,Fantasy Score,-0.451,13,-0.049
2,A.J. Brown,Corey Davis,WR,WR,Fantasy Score,Fantasy Score,0.057,31,-0.049
3,A.J. Brown,DeVonta Smith,WR,WR,Fantasy Score,Fantasy Score,-0.028,38,-0.049
4,A.J. Brown,Delanie Walker,WR,WR,Fantasy Score,Fantasy Score,-0.162,6,-0.049
...,...,...,...,...,...,...,...,...,...
2607759,Zonovan Knight,Brock Wright,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025
2607760,Zonovan Knight,C.J. Uzomah,RB,TE,Rush+Rec Yds,Rush+Rec Yds,-0.435,6,-0.025
2607761,Zonovan Knight,Jeremy Ruckert,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025
2607762,Zonovan Knight,Jeremy Ruckert,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025


In [21]:
out['C'] = ((20 * out['value_x_pos']) + (out['gp'] * out['value_x'])) / (20 + out['gp'])
out

Unnamed: 0,player_x,player_y,FantPos_x,FantPos_y,variable_x,variable_y,value_x,gp,value_x_pos,C
0,A.J. Brown,Adam Humphries,WR,WR,Fantasy Score,Fantasy Score,-0.195,18,-0.049,-0.118
1,A.J. Brown,Chester Rogers,WR,WR,Fantasy Score,Fantasy Score,-0.451,13,-0.049,-0.207
2,A.J. Brown,Corey Davis,WR,WR,Fantasy Score,Fantasy Score,0.057,31,-0.049,0.015
3,A.J. Brown,DeVonta Smith,WR,WR,Fantasy Score,Fantasy Score,-0.028,38,-0.049,-0.035
4,A.J. Brown,Delanie Walker,WR,WR,Fantasy Score,Fantasy Score,-0.162,6,-0.049,-0.075
...,...,...,...,...,...,...,...,...,...,...
2607759,Zonovan Knight,Brock Wright,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025,
2607760,Zonovan Knight,C.J. Uzomah,RB,TE,Rush+Rec Yds,Rush+Rec Yds,-0.435,6,-0.025,-0.120
2607761,Zonovan Knight,Jeremy Ruckert,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025,
2607762,Zonovan Knight,Jeremy Ruckert,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025,


In [22]:
out.columns = ['player_1', 'player_2', 'FantPos_1', 'FantPos_2', 'stat_1',
       'stat_2', 'corr', 'gp', 'pos_corr', 'w_corr']

In [23]:
out

Unnamed: 0,player_1,player_2,FantPos_1,FantPos_2,stat_1,stat_2,corr,gp,pos_corr,w_corr
0,A.J. Brown,Adam Humphries,WR,WR,Fantasy Score,Fantasy Score,-0.195,18,-0.049,-0.118
1,A.J. Brown,Chester Rogers,WR,WR,Fantasy Score,Fantasy Score,-0.451,13,-0.049,-0.207
2,A.J. Brown,Corey Davis,WR,WR,Fantasy Score,Fantasy Score,0.057,31,-0.049,0.015
3,A.J. Brown,DeVonta Smith,WR,WR,Fantasy Score,Fantasy Score,-0.028,38,-0.049,-0.035
4,A.J. Brown,Delanie Walker,WR,WR,Fantasy Score,Fantasy Score,-0.162,6,-0.049,-0.075
...,...,...,...,...,...,...,...,...,...,...
2607759,Zonovan Knight,Brock Wright,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025,
2607760,Zonovan Knight,C.J. Uzomah,RB,TE,Rush+Rec Yds,Rush+Rec Yds,-0.435,6,-0.025,-0.120
2607761,Zonovan Knight,Jeremy Ruckert,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025,
2607762,Zonovan Knight,Jeremy Ruckert,RB,TE,Rush+Rec Yds,Rush+Rec Yds,,1,-0.025,


In [24]:
out.sort_values(by='w_corr').dropna().to_csv('../reference_data/NFL/player_corr.csv')

In [25]:
out.loc[(out.player_1 =='David Montgomery') & (out.player_2=='Jahmyr Gibbs')].sort_values(by='w_corr').dropna().head(15)

Unnamed: 0,player_1,player_2,FantPos_1,FantPos_2,stat_1,stat_2,corr,gp,pos_corr,w_corr
2050810,David Montgomery,Jahmyr Gibbs,RB,RB,Rush TDs,Longest Rush,-0.564,20,-0.026,-0.295
2125184,David Montgomery,Jahmyr Gibbs,RB,RB,Rush+Rec TDs,Longest Rush,-0.564,20,-0.025,-0.295
2026727,David Montgomery,Jahmyr Gibbs,RB,RB,Rush Attempts,Rush Attempts,-0.215,20,-0.31,-0.262
2005271,David Montgomery,Jahmyr Gibbs,RB,RB,Rush Attempts,Fantasy Score,-0.304,20,-0.21,-0.257
2113845,David Montgomery,Jahmyr Gibbs,RB,RB,Rush+Rec TDs,Fantasy Score,-0.424,20,-0.088,-0.256
2038806,David Montgomery,Jahmyr Gibbs,RB,RB,Rush TDs,Fantasy Score,-0.424,20,-0.085,-0.254
2029103,David Montgomery,Jahmyr Gibbs,RB,RB,Rush Attempts,Rush TDs,-0.387,20,-0.116,-0.251
2147855,David Montgomery,Jahmyr Gibbs,RB,RB,Rush+Rec TDs,Rush+Rec TDs,-0.423,20,-0.064,-0.244
2074811,David Montgomery,Jahmyr Gibbs,RB,RB,Rush TDs,Rush+Rec TDs,-0.423,20,-0.062,-0.242
1672158,David Montgomery,Jahmyr Gibbs,RB,RB,Fantasy Score,Longest Rush,-0.387,20,-0.093,-0.24


In [26]:
pd.set_option('display.float_format', '{:.3f}'.format)

In [42]:
pos_corr.loc[pos_corr.variable_x == "INT"].sort_values(by='value_x',ascending=False).head(40)

Unnamed: 0,FantPos_x,FantPos_y,variable_x,variable_y,value_x
1,QB,K,INT,Fantasy Score,0.259
237,QB,WR,INT,Rec Targets,0.084
42,QB,RB,INT,Rec Targets,0.07
165,QB,TE,INT,Rec Targets,0.061
44,QB,RB,INT,Receptions,0.049
43,QB,RB,INT,Receiving Yards,0.024
167,QB,TE,INT,Receptions,0.022
38,QB,RB,INT,Fumbles Lost,0.017
239,QB,WR,INT,Receptions,0.016
37,QB,RB,INT,Fumbles,0.011


In [45]:
pos_corr.loc[pos_corr.variable_y == "Rec Targets"].sort_values(by='value_x',ascending=False).head(30)

Unnamed: 0,FantPos_x,FantPos_y,variable_x,variable_y,value_x
253,QB,WR,Pass Attempts,Rec Targets,0.199
84,QB,RB,Pass Completions,Rec Targets,0.183
70,QB,RB,Pass Attempts,Rec Targets,0.178
261,QB,WR,Pass Completions,Rec Targets,0.174
277,QB,WR,Pass Yards,Rec Targets,0.152
181,QB,TE,Pass Attempts,Rec Targets,0.149
189,QB,TE,Pass Completions,Rec Targets,0.139
112,QB,RB,Pass Yards,Rec Targets,0.112
205,QB,TE,Pass Yards,Rec Targets,0.11
237,QB,WR,INT,Rec Targets,0.084


In [29]:
current = players.loc[players.year==2024,['player','FantPos']]
nfl = pp.loc[(pp.event_time > '2024-10-10') & (pp.alt_line=='standard')]
nfl = nfl[['player','team','line','stat','opp','event_time']].merge(current)
pp_pairs = nfl.merge(nfl,on=['team','opp', 'event_time'])
pp_pairs = pp_pairs.loc[pp_pairs.player_x!=pp_pairs.player_y]
pp_pairs

Unnamed: 0,player_x,team,line_x,stat_x,opp,event_time,FantPos_x,player_y,line_y,stat_y,FantPos_y
10,Kenneth Walker III,SEA,3.000,Receptions,SF,2024-10-10 18:00:00-06:00,RB,Geno Smith,0.500,INT,QB
11,Kenneth Walker III,SEA,3.000,Receptions,SF,2024-10-10 18:00:00-06:00,RB,Geno Smith,13.500,Rush Yards,QB
12,Kenneth Walker III,SEA,3.000,Receptions,SF,2024-10-10 18:00:00-06:00,RB,Geno Smith,9.500,Longest Rush,QB
13,Kenneth Walker III,SEA,3.000,Receptions,SF,2024-10-10 18:00:00-06:00,RB,Geno Smith,3.000,Rush Attempts,QB
14,Kenneth Walker III,SEA,3.000,Receptions,SF,2024-10-10 18:00:00-06:00,RB,Geno Smith,23.500,Pass Completions,QB
...,...,...,...,...,...,...,...,...,...,...,...
51927,Aaron Jones,MIN,0.500,Rush+Rec TDs,DET,2024-10-20 11:00:00-06:00,RB,Sam Darnold,17.500,Fantasy Score,QB
51928,Aaron Jones,MIN,0.500,Rush+Rec TDs,DET,2024-10-20 11:00:00-06:00,RB,Johnny Mundt,11.500,Longest Reception,TE
51929,Aaron Jones,MIN,0.500,Rush+Rec TDs,DET,2024-10-20 11:00:00-06:00,RB,Johnny Mundt,17.500,Receiving Yards,TE
51930,Aaron Jones,MIN,0.500,Rush+Rec TDs,DET,2024-10-20 11:00:00-06:00,RB,Johnny Mundt,2.000,Receptions,TE


In [46]:
p1 = 'Bijan Robinson'
p2 = 'Tyler Allgeier'
s1 = 'Rush Attempts'
s2 = 'Rush Attempts'
out.loc[(out.player_1==p1)&
        (out.player_2==p2)&
        (out.stat_1==s1)&
        (out.stat_2==s2)]

Unnamed: 0,player_1,player_2,FantPos_1,FantPos_2,stat_1,stat_2,corr,gp,pos_corr,w_corr
2026225,Bijan Robinson,Tyler Allgeier,RB,RB,Rush Attempts,Rush Attempts,-0.327,23,-0.31,-0.319


In [31]:
pp_pairs.loc[(pp_pairs.player_x==p1)&
        (pp_pairs.player_y==p2)&
        (pp_pairs.stat_x==s1)&
        (pp_pairs.stat_y==s2)]

Unnamed: 0,player_x,team,line_x,stat_x,opp,event_time,FantPos_x,player_y,line_y,stat_y,FantPos_y


In [32]:
pos_corr.loc[pos_corr.FantPos_x =='WR'].sort_values(by='value_x').head(60)

Unnamed: 0,FantPos_x,FantPos_y,variable_x,variable_y,value_x
941,WR,WR,Rush+Rec Yds,Rush+Rec Yds,-0.062
937,WR,WR,Rush+Rec Yds,Rec Targets,-0.061
909,WR,WR,Rec Targets,Rush+Rec Yds,-0.061
917,WR,WR,Receiving Yards,Rush+Rec Yds,-0.06
938,WR,WR,Rush+Rec Yds,Receiving Yards,-0.06
881,WR,WR,Fantasy Score,Rec Targets,-0.06
902,WR,WR,Rec Targets,Fantasy Score,-0.06
694,WR,K,Rush+Rec TDs,FG,-0.059
914,WR,WR,Receiving Yards,Receiving Yards,-0.058
678,WR,K,Rec TDs,FG,-0.058


In [33]:
small.head(5)

Unnamed: 0,player_x,snaps_x,pct_x,FantPos_x,pos_x,team,opp,home,roof,surface,...,date,variable_x,value_x,player_y,snaps_y,pct_y,FantPos_y,pos_y,variable_y,value_y
0,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Andre Caldwell,41.0,55%,WR,WR,Rec Targets,5.0
1,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Demaryius Thomas,67.0,89%,WR,WR,Rec Targets,11.0
2,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Emmanuel Sanders,70.0,93%,WR,WR,Rec Targets,9.0
3,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Andre Caldwell,41.0,55%,WR,WR,Receptions,2.0
4,Peyton Manning,75.0,100%,QB,QB,DEN,IND,1,outdoors,grass,...,2014-09-07,Pass Completions,22.0,Demaryius Thomas,67.0,89%,WR,WR,Receptions,4.0


In [34]:
import pingouin as pg

def compute_partial_corr(group):
    return pg.partial_corr(data=group, x='value_x', y='value_y', covar='total')

# Apply the partial correlation computation for each player position
partial_corr_by_position = small.groupby(['FantPos_x','FantPos_y','variable_x','variable_y']).apply(compute_partial_corr)

In [35]:
partial_corr_by_position.sort_values(by='r')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,n,r,CI95%,p-val
FantPos_x,FantPos_y,variable_x,variable_y,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
QB,K,Yards Sacked,Fantasy Score,pearson,12,-0.392,"[-0.8, 0.27]",0.233
RB,RB,Rush Attempts,Rush Attempts,pearson,19208,-0.311,"[-0.32, -0.3]",0.000
QB,K,Sacks,XPM,pearson,5743,-0.263,"[-0.29, -0.24]",0.000
QB,K,Yards Sacked,XPM,pearson,5743,-0.248,"[-0.27, -0.22]",0.000
RB,RB,Rush Attempts,Rush Yards,pearson,19208,-0.240,"[-0.25, -0.23]",0.000
...,...,...,...,...,...,...,...,...
QB,WR,Pass TDs,Rush+Rec TDs,pearson,21562,0.298,"[0.29, 0.31]",0.000
QB,WR,Pass TDs,Rec TDs,pearson,21562,0.307,"[0.3, 0.32]",0.000
QB,K,Pass Yards,Fantasy Score,pearson,12,0.367,"[-0.3, 0.79]",0.267
QB,K,Pass TDs,XPM,pearson,5743,0.394,"[0.37, 0.42]",0.000


In [36]:
import statsmodels.api as sm

# X variables (Player B's stat and control variables)
X = small[['Player_B_stat', 'other_stat1', 'other_stat2']]
X = sm.add_constant(X)  # Adds a constant term to the model
y = small['Player_A_stat']  # Dependent variable (Player A's stat)

# Fit the regression model
model = sm.OLS(y, X).fit()
print(model.summary())

KeyError: "None of [Index(['Player_B_stat', 'other_stat1', 'other_stat2'], dtype='object')] are in the [columns]"

In [72]:
common_cols

['team',
 'opp',
 'home',
 'roof',
 'surface',
 'weather',
 'line',
 'favored',
 'total',
 'season',
 'date']

In [74]:
small.iloc[0]

player_x                                              Tom Brady
snaps_x                                                    71.0
pct_x                                                       89%
FantPos_x                                                    QB
pos_x                                                        QB
team                                                         NE
opp                                                         CLE
home                                                          0
roof                                                   outdoors
surface                                                   grass
weather       58 degrees, relative humidity 58%, wind 13 mph...
line                                                      -10.0
favored                                                      NE
total                                                      47.5
season                                                     2016
date                                    

In [79]:
games[['team','opp','favored','line','total', 'date']]

Unnamed: 0,team,opp,favored,line,total,date
0,CHI,MIN,MIN,-4.5,46.5,2000-09-03
1,CHI,TAM,TB,-7.0,36.5,2000-09-10
2,CHI,NYG,CHI,-2.0,42.0,2000-09-17
3,CHI,DET,CHI,-1.0,37.5,2000-09-24
4,CHI,GNB,GB,-5.5,41.0,2000-10-01
...,...,...,...,...,...,...
712902,CIN,KAN,KC,-6.0,48.0,2024-09-15
712903,CIN,WAS,CIN,-7.5,46.5,2024-09-23
712904,CIN,CAR,CIN,-4.5,47.0,2024-09-29
712905,HOU,CHI,HOU,-6.0,45.0,2024-09-15
