In [3]:
import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

from joblib import dump, load

import numpy as np

In [4]:
labels = [
    'passA',
    'passC',
    'passY',
    'passT',
    'passI',
    'pass2',
    'rushA',
    'rushY',
    'rushT',
    'rush2',
    'recC',
    'recY',
    'recT',
    'rec2',
    'fum',
    'XPA',
    'XPM',
    'FGA',
    'FGM',
    'FG50',
    'defSack',
    'defI',
    'defSaf',
    'defFum',
    'defBlk',
    'defT',
    'defPtsAgainst',
    'defPassYAgainst',
    'defRushYAgainst',
    'defYdsAgainst'   
]

features = [
    'week',
    'age',
    'passA_curr',
    'passC_curr',
    'passY_curr',
    'passT_curr',
    'passI_curr',
    'pass2_curr',
    'rushA_curr',
    'rushY_curr',
    'rushT_curr',
    'rush2_curr',
    'recC_curr',
    'recY_curr',
    'recT_curr',
    'rec2_curr',
    'fum_curr',
    'XPA_curr',
    'XPM_curr',
    'FGA_curr',
    'FGM_curr',
    'FG50_curr',
    'defSack_curr',
    'defI_curr',
    'defSaf_curr',
    'defFum_curr',
    'defBlk_curr',
    'defT_curr',
    'defPtsAgainst_curr',
    'defPassYAgainst_curr',
    'defRushYAgainst_curr',
    'defYdsAgainst_curr',
    'gamesPlayed_curr',
    'gamesPlayed_prior1',
    'passA_prior1',
    'passC_prior1',
    'passY_prior1',
    'passT_prior1',
    'passI_prior1',
    'pass2_prior1',
    'rushA_prior1',
    'rushY_prior1',
    'rushT_prior1',
    'rush2_prior1',
    'recC_prior1',
    'recY_prior1',
    'recT_prior1',
    'rec2_prior1',
    'fum_prior1',
    'XPA_prior1',
    'XPM_prior1',
    'FGA_prior1',
    'FGM_prior1',
    'FG50_prior1',
    'defSack_prior1',
    'defI_prior1',
    'defSaf_prior1',
    'defFum_prior1',
    'defBlk_prior1',
    'defT_prior1',
    'defPtsAgainst_prior1',
    'defPassYAgainst_prior1',
    'defRushYAgainst_prior1',
    'defYdsAgainst_prior1',
    'gamesPlayed_prior2',
    'passA_prior2',
    'passC_prior2',
    'passY_prior2',
    'passT_prior2',
    'passI_prior2',
    'pass2_prior2',
    'rushA_prior2',
    'rushY_prior2',
    'rushT_prior2',
    'rush2_prior2',
    'recC_prior2',
    'recY_prior2',
    'recT_prior2',
    'rec2_prior2',
    'fum_prior2',
    'XPA_prior2',
    'XPM_prior2',
    'FGA_prior2',
    'FGM_prior2',
    'FG50_prior2',
    'defSack_prior2',
    'defI_prior2',
    'defSaf_prior2',
    'defFum_prior2',
    'defBlk_prior2',
    'defT_prior2',
    'defPtsAgainst_prior2',
    'defPassYAgainst_prior2',
    'defRushYAgainst_prior2',
    'defYdsAgainst_prior2',
    'defSack_curr_opp',
    'defI_curr_opp',
    'defSaf_curr_opp',
    'defFum_curr_opp',
    'defBlk_curr_opp',
    'defT_curr_opp',
    'defPtsAgainst_curr_opp',
    'defPassYAgainst_curr_opp',
    'defRushYAgainst_curr_opp',
    'defYdsAgainst_curr_opp',
    'defSack_prior1_opp',
    'defI_prior1_opp',
    'defSaf_prior1_opp',
    'defFum_prior1_opp',
    'defBlk_prior1_opp',
    'defT_prior1_opp',
    'defPtsAgainst_prior1_opp',
    'defPassYAgainst_prior1_opp',
    'defRushYAgainst_prior1_opp',
    'defYdsAgainst_prior1_opp',
    'pos',
    'posRank'
]


In [5]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='WR']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_WR1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.6714331884575875
0.5148071364089047


In [6]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='RB']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_RB1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.6365824859098983
0.5938182852619155


In [7]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='QB']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_QB1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.6358042340619297
0.6087602375137522


In [8]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='TE']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_TE1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.6112585420213847
0.5156633673843108


In [9]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='PK']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_PK1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.8668317951162946
0.838600598600479


In [10]:
# Read player model
xl1 = pd.read_csv('data_cleaned/modelSource.csv', index_col=0)
# Limit to only one position
xl1 = xl1.loc[xl1.pos=='DF']

# Define features and labels
y = xl1[labels]
X = xl1[features]
# Encode categorical features
X = pd.get_dummies(X, columns = ['pos', 'posRank'])
# Split into training tand validation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Adjust max_depth hyperparameter
regressor = RandomForestRegressor(min_samples_split=64, n_estimators=100, max_depth=19, random_state=1)
regressor.fit(X_train, y_train)

# Pickle the model
dump(regressor, 'rfmodel_DF1.joblib')

# Print results
print(regressor.score(X_train, y_train))
print(regressor.score(X_test, y_test))

0.7094616764404338
0.671480102473799
