In [1]:
import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

from joblib import dump, load

import numpy as np

In [60]:
labels = [
    'passA',
    'passC',
    'passY',
    'passT',
    'passI',
    'pass2',
    'rushA',
    'rushY',
    'rushT',
    'rush2',
    'recC',
    'recY',
    'recT',
    'rec2',
    'fum',
    'XPA',
    'XPM',
    'FGA',
    'FGM',
    'FG50',
    'defSack',
    'defI',
    'defSaf',
    'defFum',
    'defBlk',
    'defT',
    'defPtsAgainst',
    'defPassYAgainst',
    'defRushYAgainst',
    'defYdsAgainst'   
]

features = [
    'week',
    'age',
    'passA_curr',
    'passC_curr',
    'passY_curr',
    'passT_curr',
    'passI_curr',
    'pass2_curr',
    'rushA_curr',
    'rushY_curr',
    'rushT_curr',
    'rush2_curr',
    'recC_curr',
    'recY_curr',
    'recT_curr',
    'rec2_curr',
    'fum_curr',
    'XPA_curr',
    'XPM_curr',
    'FGA_curr',
    'FGM_curr',
    'FG50_curr',
    'defSack_curr',
    'defI_curr',
    'defSaf_curr',
    'defFum_curr',
    'defBlk_curr',
    'defT_curr',
    'defPtsAgainst_curr',
    'defPassYAgainst_curr',
    'defRushYAgainst_curr',
    'defYdsAgainst_curr',
    'gamesPlayed_curr',
    'gamesPlayed_prior1',
    'passA_prior1',
    'passC_prior1',
    'passY_prior1',
    'passT_prior1',
    'passI_prior1',
    'pass2_prior1',
    'rushA_prior1',
    'rushY_prior1',
    'rushT_prior1',
    'rush2_prior1',
    'recC_prior1',
    'recY_prior1',
    'recT_prior1',
    'rec2_prior1',
    'fum_prior1',
    'XPA_prior1',
    'XPM_prior1',
    'FGA_prior1',
    'FGM_prior1',
    'FG50_prior1',
    'defSack_prior1',
    'defI_prior1',
    'defSaf_prior1',
    'defFum_prior1',
    'defBlk_prior1',
    'defT_prior1',
    'defPtsAgainst_prior1',
    'defPassYAgainst_prior1',
    'defRushYAgainst_prior1',
    'defYdsAgainst_prior1',
    'gamesPlayed_prior2',
    'passA_prior2',
    'passC_prior2',
    'passY_prior2',
    'passT_prior2',
    'passI_prior2',
    'pass2_prior2',
    'rushA_prior2',
    'rushY_prior2',
    'rushT_prior2',
    'rush2_prior2',
    'recC_prior2',
    'recY_prior2',
    'recT_prior2',
    'rec2_prior2',
    'fum_prior2',
    'XPA_prior2',
    'XPM_prior2',
    'FGA_prior2',
    'FGM_prior2',
    'FG50_prior2',
    'defSack_prior2',
    'defI_prior2',
    'defSaf_prior2',
    'defFum_prior2',
    'defBlk_prior2',
    'defT_prior2',
    'defPtsAgainst_prior2',
    'defPassYAgainst_prior2',
    'defRushYAgainst_prior2',
    'defYdsAgainst_prior2',
    'defSack_curr_opp',
    'defI_curr_opp',
    'defSaf_curr_opp',
    'defFum_curr_opp',
    'defBlk_curr_opp',
    'defT_curr_opp',
    'defPtsAgainst_curr_opp',
    'defPassYAgainst_curr_opp',
    'defRushYAgainst_curr_opp',
    'defYdsAgainst_curr_opp',
    'defSack_prior1_opp',
    'defI_prior1_opp',
    'defSaf_prior1_opp',
    'defFum_prior1_opp',
    'defBlk_prior1_opp',
    'defT_prior1_opp',
    'defPtsAgainst_prior1_opp',
    'defPassYAgainst_prior1_opp',
    'defRushYAgainst_prior1_opp',
    'defYdsAgainst_prior1_opp',
    'pos',
    'posRank'
]


In [71]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='WR']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_WR1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.6710288882007978
0.5572068723241955


In [72]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='RB']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_RB1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.6352989823361703
0.5914009194459453


In [73]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='QB']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_QB1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.634475389185847
0.6087219280291438


In [74]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='TE']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_TE1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.6125140751655654
0.5275337932458792


In [75]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='PK']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_PK1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.8668317951162946
0.838600598600479


In [76]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='DF']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_DF1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.7094616764404338
0.671480102473799


In [54]:
# Read player model and ages
xl2 = pd.read_csv('data_cleaned/toPredict.csv', index_col=0)
xl2 = xl2.loc[xl2.posRank.isin(['WR1', 'WR2', 'WR3'])]
xl2 = xl2.loc[xl2.pos=='WR']
xl2 = xl2.dropna()
xl2.reset_index(inplace=True, drop=True)

X2 = xl2[features]


header = xl2[[
    'season',
    'week',
    'team',
    'player',
    'age',
    'KR',
    'PR',
    'RES',
    'pos',
    'posRank',
    'opponent'
]]

# Encode categorical features
X2 = pd.get_dummies(X2, columns = ['pos', 'posRank'])



In [65]:
y_pred = regressor.predict(X2)
y_pred = pd.DataFrame(y_pred)
y_pred.columns = labels
y_pred

Unnamed: 0,passA,passC,passY,passT,passI,pass2,rushA,rushY,rushT,rush2,...,defSack,defI,defSaf,defFum,defBlk,defT,defPtsAgainst,defPassYAgainst,defRushYAgainst,defYdsAgainst
0,0.002109,0.001068,0.025776,0.000168,0.000039,0.0,0.128840,0.752714,0.001097,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.001662,0.001204,0.017874,0.000124,0.000039,0.0,0.136380,0.741279,0.000783,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.001934,0.000791,0.014548,0.000042,0.000039,0.0,0.125140,0.671090,0.002448,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.002776,0.001335,0.019975,0.000056,0.000039,0.0,0.128760,0.665689,0.001160,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.002877,0.001796,0.023863,0.000080,0.000013,0.0,0.112063,0.649814,0.000642,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5435,0.002529,0.001723,0.024254,0.000137,0.000039,0.0,0.123636,0.772844,0.000989,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5436,0.002699,0.002291,0.026353,0.000095,0.000013,0.0,0.144010,0.870204,0.001461,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5437,0.002091,0.001708,0.019931,0.000095,0.000039,0.0,0.135858,0.783268,0.001127,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5438,0.002877,0.001692,0.024484,0.000129,0.000039,0.0,0.134403,0.712056,0.001882,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [66]:
# Calculate FANTASY scores
multiplier = [
    0,0,.04,4,-2,2,.1,.1,6,2,.25,.1,6,2,-2,0,1,0,3,5,1,2,2,2,1.5,6,0,0,0,0,1,1
]
binList_defPts = [-5,0,6,13,17,21,27,34,45,59,99]
binList_defYds = [0,274,324,375,425,999]

ptList_defPts = [10,8,7,5,3,2,0,-1,-3,-5]
ptList_defYds = [5,2,0,-2,-5]

y_pred['defPtsBin'] = pd.cut(y_pred['defPtsAgainst'], bins=binList_defPts, include_lowest=True, labels=ptList_defPts)
y_pred['defYdsBin'] = pd.cut(y_pred['defYdsAgainst'], bins=binList_defYds, include_lowest=True, labels=ptList_defYds)

a_pred = header.merge(y_pred, left_index=True, right_index=True)

a_pred.loc[a_pred['pos']!='DF', 'defPtsBin'] = 0
a_pred.loc[a_pred['pos']!='DF', 'defYdsBin'] = 0

a_pred = a_pred.drop(columns=['week','season','team','player','age','pos','KR','PR','RES','posRank','opponent'])

def multer(row):
    return row.multiply(multiplier)

c = a_pred.apply(multer, axis=1)
c = c.apply(np.sum, axis=1)
c = pd.DataFrame(c, columns=['pred'])

In [68]:
# Calculate weekly values
weeklyPred = header.merge(c, left_index=True, right_index=True)


In [70]:
weeklyPred.groupby('player')['pred'].sum().sort_values(ascending=False)['COOPER KUPP']

227.6409759968924