In [1]:
import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

from joblib import dump, load

import numpy as np

In [3]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
xl1.columns[0:20]
xl1 = xl1.loc[xl1.pos=='WR']

In [4]:
# Define features and labels
y = xl1[[
'passA',
'passC',
'passY',
'passT',
'passI',
'pass2',
'rushA',
'rushY',
'rushT',
'rush2',
'recC',
'recY',
'recT',
'rec2',
'fum',
'XPA',
'XPM',
'FGA',
'FGM',
'FG50',
'defSack',
'defI',
'defSaf',
'defFum',
'defBlk',
'defT',
'defPtsAgainst',
'defPassYAgainst',
'defRushYAgainst',
'defYdsAgainst'                                 
]]

X = xl1[[
    'week',
    'age',
    'passA_curr',
    'passC_curr',
    'passY_curr',
    'passT_curr',
    'passI_curr',
    'pass2_curr',
    'rushA_curr',
    'rushY_curr',
    'rushT_curr',
    'rush2_curr',
    'recC_curr',
    'recY_curr',
    'recT_curr',
    'rec2_curr',
    'fum_curr',
    'XPA_curr',
    'XPM_curr',
    'FGA_curr',
    'FGM_curr',
    'FG50_curr',
    'defSack_curr',
    'defI_curr',
    'defSaf_curr',
    'defFum_curr',
    'defBlk_curr',
    'defT_curr',
    'defPtsAgainst_curr',
    'defPassYAgainst_curr',
    'defRushYAgainst_curr',
    'defYdsAgainst_curr',
    'gamesPlayed_curr',
    'gamesPlayed_prior1',
    'passA_prior1',
    'passC_prior1',
    'passY_prior1',
    'passT_prior1',
    'passI_prior1',
    'pass2_prior1',
    'rushA_prior1',
    'rushY_prior1',
    'rushT_prior1',
    'rush2_prior1',
    'recC_prior1',
    'recY_prior1',
    'recT_prior1',
    'rec2_prior1',
    'fum_prior1',
    'XPA_prior1',
    'XPM_prior1',
    'FGA_prior1',
    'FGM_prior1',
    'FG50_prior1',
    'defSack_prior1',
    'defI_prior1',
    'defSaf_prior1',
    'defFum_prior1',
    'defBlk_prior1',
    'defT_prior1',
    'defPtsAgainst_prior1',
    'defPassYAgainst_prior1',
    'defRushYAgainst_prior1',
    'defYdsAgainst_prior1',
    'gamesPlayed_prior2',
    'passA_prior2',
    'passC_prior2',
    'passY_prior2',
    'passT_prior2',
    'passI_prior2',
    'pass2_prior2',
    'rushA_prior2',
    'rushY_prior2',
    'rushT_prior2',
    'rush2_prior2',
    'recC_prior2',
    'recY_prior2',
    'recT_prior2',
    'rec2_prior2',
    'fum_prior2',
    'XPA_prior2',
    'XPM_prior2',
    'FGA_prior2',
    'FGM_prior2',
    'FG50_prior2',
    'defSack_prior2',
    'defI_prior2',
    'defSaf_prior2',
    'defFum_prior2',
    'defBlk_prior2',
    'defT_prior2',
    'defPtsAgainst_prior2',
    'defPassYAgainst_prior2',
    'defRushYAgainst_prior2',
    'defYdsAgainst_prior2',
    'defSack_curr_opp',
    'defI_curr_opp',
    'defSaf_curr_opp',
    'defFum_curr_opp',
    'defBlk_curr_opp',
    'defT_curr_opp',
    'defPtsAgainst_curr_opp',
    'defPassYAgainst_curr_opp',
    'defRushYAgainst_curr_opp',
    'defYdsAgainst_curr_opp',
    'defSack_prior1_opp',
    'defI_prior1_opp',
    'defSaf_prior1_opp',
    'defFum_prior1_opp',
    'defBlk_prior1_opp',
    'defT_prior1_opp',
    'defPtsAgainst_prior1_opp',
    'defPassYAgainst_prior1_opp',
    'defRushYAgainst_prior1_opp',
    'defYdsAgainst_prior1_opp',
    'pos',
    'posRank'
]]

droppedCols = xl1[['season', 'week', 'team', 'player', 'age', 'pos', 'posRank', 'opponent']]

In [5]:
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
X

Unnamed: 0,week,age,passA_curr,passC_curr,passY_curr,passT_curr,passI_curr,pass2_curr,rushA_curr,rushY_curr,...,defBlk_prior1_opp,defT_prior1_opp,defPtsAgainst_prior1_opp,defPassYAgainst_prior1_opp,defRushYAgainst_prior1_opp,defYdsAgainst_prior1_opp,pos_WR,posRank_WR1,posRank_WR2,posRank_WR3
20290,1.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0625,0.0625,25.0000,255.1250,121.2500,376.3750,1,1,0,0
20291,1.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0000,0.1250,21.1250,201.3125,132.0000,333.3125,1,1,0,0
20292,1.0,21.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.1875,0.4375,27.1250,232.1250,139.0000,371.1250,1,1,0,0
20293,1.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0625,0.0625,22.9375,222.0625,117.7500,339.8125,1,1,0,0
20294,1.0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.1250,0.1250,19.5625,249.5000,95.6250,345.1250,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38603,17.0,22.0,0.0,0.0,0.0,0.0,0.0,0.0,0.428571,1.785714,...,0.0000,0.1875,20.5625,191.7500,112.8125,304.5625,1,0,1,0
38604,17.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.1250,0.0000,29.6875,258.8125,134.4375,393.2500,1,0,1,0
38605,17.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.0000,0.1250,23.0625,221.1875,112.8125,334.0000,1,1,0,0
38606,17.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.153846,...,0.0000,0.2500,23.4375,232.8750,119.6250,352.5000,1,1,0,0


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)


In [7]:
# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.6710288882007978
0.5572068723241955


In [30]:
# Read player model and ages
xl2 = pd.read_csv('data_cleaned/toPredict.csv', index_col=0)
xl2 = xl2.loc[xl2.posRank.isin(['WR1', 'WR2', 'WR3'])]
xl2 = xl2.loc[xl2.pos=='WR']
xl2 = xl2.dropna()
xl2.reset_index(inplace=True, drop=True)

X = xl2[[
    'week',
    'age',
    'pos',
    'posRank',
    'passA_curr',
    'passC_curr',
    'passY_curr',
    'passT_curr',
    'passI_curr',
    'pass2_curr',
    'rushA_curr',
    'rushY_curr',
    'rushT_curr',
    'rush2_curr',
    'recC_curr',
    'recY_curr',
    'recT_curr',
    'rec2_curr',
    'fum_curr',
    'XPA_curr',
    'XPM_curr',
    'FGA_curr',
    'FGM_curr',
    'FG50_curr',
    'defSack_curr',
    'defI_curr',
    'defSaf_curr',
    'defFum_curr',
    'defBlk_curr',
    'defT_curr',
    'defPtsAgainst_curr',
    'defPassYAgainst_curr',
    'defRushYAgainst_curr',
    'defYdsAgainst_curr',
    'gamesPlayed_curr',
    'gamesPlayed_prior1',
    'passA_prior1',
    'passC_prior1',
    'passY_prior1',
    'passT_prior1',
    'passI_prior1',
    'pass2_prior1',
    'rushA_prior1',
    'rushY_prior1',
    'rushT_prior1',
    'rush2_prior1',
    'recC_prior1',
    'recY_prior1',
    'recT_prior1',
    'rec2_prior1',
    'fum_prior1',
    'XPA_prior1',
    'XPM_prior1',
    'FGA_prior1',
    'FGM_prior1',
    'FG50_prior1',
    'defSack_prior1',
    'defI_prior1',
    'defSaf_prior1',
    'defFum_prior1',
    'defBlk_prior1',
    'defT_prior1',
    'defPtsAgainst_prior1',
    'defPassYAgainst_prior1',
    'defRushYAgainst_prior1',
    'defYdsAgainst_prior1',
    'gamesPlayed_prior2',
    'passA_prior2',
    'passC_prior2',
    'passY_prior2',
    'passT_prior2',
    'passI_prior2',
    'pass2_prior2',
    'rushA_prior2',
    'rushY_prior2',
    'rushT_prior2',
    'rush2_prior2',
    'recC_prior2',
    'recY_prior2',
    'recT_prior2',
    'rec2_prior2',
    'fum_prior2',
    'XPA_prior2',
    'XPM_prior2',
    'FGA_prior2',
    'FGM_prior2',
    'FG50_prior2',
    'defSack_prior2',
    'defI_prior2',
    'defSaf_prior2',
    'defFum_prior2',
    'defBlk_prior2',
    'defT_prior2',
    'defPtsAgainst_prior2',
    'defPassYAgainst_prior2',
    'defRushYAgainst_prior2',
    'defYdsAgainst_prior2',
    'defSack_curr_opp',
    'defI_curr_opp',
    'defSaf_curr_opp',
    'defFum_curr_opp',
    'defBlk_curr_opp',
    'defT_curr_opp',
    'defPtsAgainst_curr_opp',
    'defPassYAgainst_curr_opp',
    'defRushYAgainst_curr_opp',
    'defYdsAgainst_curr_opp',
    'defSack_prior1_opp',
    'defI_prior1_opp',
    'defSaf_prior1_opp',
    'defFum_prior1_opp',
    'defBlk_prior1_opp',
    'defT_prior1_opp',
    'defPtsAgainst_prior1_opp',
    'defPassYAgainst_prior1_opp',
    'defRushYAgainst_prior1_opp',
    'defYdsAgainst_prior1_opp']]


header = xl2[[
    'season',
    'week',
    'team',
    'player',
    'age',
    'KR',
    'PR',
    'RES',
    'pos',
    'posRank',
    'opponent'
]]

# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])



In [31]:
y_pred = regressor.predict(X)
y_pred = pd.DataFrame(y_pred)
y_pred.columns = ['passA',
'passC',
'passY',
'passT',
'passI',
'pass2',
'rushA',
'rushY',
'rushT',
'rush2',
'recC',
'recY',
'recT',
'rec2',
'fum',
'XPA',
'XPM',
'FGA',
'FGM',
'FG50',
'defSack',
'defI',
'defSaf',
'defFum',
'defBlk',
'defT',
'defPtsAgainst',
'defPassYAgainst',
'defRushYAgainst',
'defYdsAgainst']
y_pred

Unnamed: 0,passA,passC,passY,passT,passI,pass2,rushA,rushY,rushT,rush2,...,defSack,defI,defSaf,defFum,defBlk,defT,defPtsAgainst,defPassYAgainst,defRushYAgainst,defYdsAgainst
0,0.002109,0.001068,0.025776,0.000168,0.000039,0.0,0.128840,0.752714,0.001097,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.001662,0.001204,0.017874,0.000124,0.000039,0.0,0.136380,0.741279,0.000783,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.001934,0.000791,0.014548,0.000042,0.000039,0.0,0.125140,0.671090,0.002448,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.002776,0.001335,0.019975,0.000056,0.000039,0.0,0.128760,0.665689,0.001160,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.002877,0.001796,0.023863,0.000080,0.000013,0.0,0.112063,0.649814,0.000642,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5435,0.002529,0.001723,0.024254,0.000137,0.000039,0.0,0.123636,0.772844,0.000989,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5436,0.002699,0.002291,0.026353,0.000095,0.000013,0.0,0.144010,0.870204,0.001461,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5437,0.002091,0.001708,0.019931,0.000095,0.000039,0.0,0.135858,0.783268,0.001127,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5438,0.002877,0.001692,0.024484,0.000129,0.000039,0.0,0.134403,0.712056,0.001882,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
# Calculate FANTASY scores
multiplier = [
    0,0,.04,4,-2,2,.1,.1,6,2,.25,.1,6,2,-2,0,1,0,3,5,1,2,2,2,1.5,6,0,0,0,0,1,1
]
binList_defPts = [-5,0,6,13,17,21,27,34,45,59,99]
binList_defYds = [0,274,324,375,425,999]

ptList_defPts = [10,8,7,5,3,2,0,-1,-3,-5]
ptList_defYds = [5,2,0,-2,-5]

y_pred['defPtsBin'] = pd.cut(y_pred['defPtsAgainst'], bins=binList_defPts, include_lowest=True, labels=ptList_defPts)
y_pred['defYdsBin'] = pd.cut(y_pred['defYdsAgainst'], bins=binList_defYds, include_lowest=True, labels=ptList_defYds)

a_pred = header.merge(y_pred, left_index=True, right_index=True)

a_pred.loc[a_pred['pos']!='DF', 'defPtsBin'] = 0
a_pred.loc[a_pred['pos']!='DF', 'defYdsBin'] = 0

a_pred = a_pred.drop(columns=['week','season','team','player','age','pos','KR','PR','RES','posRank','opponent'])

def multer(row):
    return row.multiply(multiplier)

c = a_pred.apply(multer, axis=1)
c = c.apply(np.sum, axis=1)
c = pd.DataFrame(c, columns=['pred'])

In [46]:
# Calculate weekly values
weeklyPred = header.merge(c, left_index=True, right_index=True)
weeklyPred

Unnamed: 0,season,week,team,player,age,KR,PR,RES,pos,posRank,opponent,pred
0,2022,1.0,NEP,MATT SLATER,36.0,NO,NO,NO,WR,WR3,MIA,1.913872
1,2022,2.0,NEP,MATT SLATER,36.0,NO,NO,NO,WR,WR3,PIT,1.940325
2,2022,4.0,NEP,MATT SLATER,36.0,NO,NO,NO,WR,WR3,GBP,1.846282
3,2022,6.0,NEP,MATT SLATER,36.0,NO,NO,NO,WR,WR3,CLE,1.844826
4,2022,8.0,NEP,MATT SLATER,36.0,NO,NO,NO,WR,WR3,NYJ,2.113664
...,...,...,...,...,...,...,...,...,...,...,...,...
5435,2022,9.0,ATL,JARED BERNHARDT,24.0,NO,NO,NO,WR,WR3,LAC,1.627897
5436,2022,11.0,ATL,JARED BERNHARDT,24.0,NO,NO,NO,WR,WR3,CHI,1.564502
5437,2022,13.0,ATL,JARED BERNHARDT,24.0,NO,NO,NO,WR,WR3,PIT,1.618206
5438,2022,17.0,ATL,JARED BERNHARDT,24.0,NO,NO,NO,WR,WR3,ARI,1.555638


In [43]:
times17 = weeklyPred.loc[weeklyPred.week==1]
times17['pred'] = times17['pred'] * 17
times17.sort_values(by='pred', ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,season,week,team,player,age,KR,PR,RES,pos,posRank,opponent,pred
1028,2022,1.0,LAR,COOPER KUPP,29.0,NO,PR2,NO,WR,WR1,BUF,222.536105
3664,2022,1.0,CIN,JAMARR CHASE,22.0,NO,NO,NO,WR,WR1,PIT,217.739622
2932,2022,1.0,MIN,JUSTIN JEFFERSON,23.0,NO,NO,NO,WR,WR1,GBP,202.510597
842,2022,1.0,MIA,TYREEK HILL,28.0,NO,PR1,NO,WR,WR1,NEP,196.395979
2414,2022,1.0,PIT,DIONTAE JOHNSON,26.0,NO,NO,NO,WR,WR1,CIN,190.681746
...,...,...,...,...,...,...,...,...,...,...,...,...
1198,2022,1.0,CHI,DAVID MOORE,27.0,NO,NO,RES,WR,WR3,SFO,22.334789
1997,2022,1.0,ATL,CAMERON BATSON,26.0,KR2,NO,NO,WR,WR3,NOS,21.265881
2771,2022,1.0,PIT,GUNNER OLSZEWSKI,25.0,KR1,PR1,NO,WR,WR3,CIN,21.196540
3969,2022,1.0,SEA,DWAYNE ESKRIDGE,25.0,NO,NO,NO,WR,WR3,DEN,20.437514


In [47]:
weeklyPred.groupby('player')['pred'].sum().sort_values(ascending=False)

player
COOPER KUPP         227.640976
JAMARR CHASE        213.364983
JUSTIN JEFFERSON    201.438797
TYREEK HILL         182.410199
CEEDEE LAMB         173.719632
                       ...    
CAMERON BATSON       22.362522
GUNNER OLSZEWSKI     22.114708
DWAYNE ESKRIDGE      21.505398
DEANDRE CARTER       21.252199
GREG DORTCH          19.254401
Name: pred, Length: 320, dtype: float64