In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso

### Load in Dataset

In [None]:
# Master Dataset
df = pd.read_csv('data/master_df.csv')

# Separate Datasets by Position
qbs = df[df['Position'] == 'QB']
rbs = df[df['Position'] == 'RB']
wrs = df[df['Position'] == 'WR']
tes = df[df['Position'] == 'TE']

# Model Development/Exploration

## Regressions

### QBs

In [None]:
# Prep Dataset for Regressions with train_test_split
# To define the input and output feature
#qb_reduced_df = qbs[['Passing_TD%','Passing_Int%','Passing_1D','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Sk','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Rushing_1D','Rushing_Lng','Rushing_Y/A_y','Fantasy_PPR']]
#qb_reduced_df = qb_reduced_df.dropna()
qb_reduced_df = pd.read_csv('data/qb_std_stats.csv', index_col=0)
qb_combine_df = pd.read_csv('data/qb_combine_stats.csv', index_col=0)
qb_stats_df = qb_reduced_df.merge(qb_combine_df)
x = qb_stats_df.drop('Fantasy_PPR', axis=1)
y = qb_stats_df['Fantasy_PPR']
### If want to scale data
#col_names = qb_reduced_df.columns
#scaler = StandardScaler()
#qb_reduced_normed = scaler.fit_transform(qb_reduced_df)
#qb_reduced_df_normed = pd.DataFrame(qb_reduced_normed, columns=col_names)
#x = qb_reduced_df_normed.drop('Fantasy_PPR', axis=1)
##x = np.column_stack([np.ones(len(x)),x.iloc[:,0].values.reshape(1,-1)[0]])
#y = qb_reduced_df_normed['Fantasy_PPR']
##y = y.values.reshape(1,-1)

#x_normed = scaler.fit_transform(x)
#y_normed = scaler.fit_transform(y)

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)




In [None]:
# Ordinary Least Squares using using fit and predict QB Fantasy Points
print('OLS Linear Regression Model')
ols_reg = LinearRegression()
model_ols = ols_reg.fit(x_train,y_train)
y_preds_ols = model_ols.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ols))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ols))
# Print Importance Coefficients
cdf = pd.DataFrame(model_ols.coef_, x.columns, columns=['OLS Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,0].values, y_test, color="black")
plt.plot(x_test.iloc[:,0].values, y_preds_ols, '.', color="blue")

plt.show()

- QB:
    - ...

In [None]:
# Ridge regression
print('Ridge Linear Regression Model')
ridge_reg = Ridge(alpha=0.5)
ridge_reg.fit(x_train,y_train)
y_preds_ridge = ridge_reg.predict(x_test)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ridge))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ridge))
# Print Importance Coefficients
cdf = pd.DataFrame(ridge_reg.coef_, x.columns, columns=['Ridge Reg Coefficients'])
print(cdf)

In [None]:
# Lasso regression
print('Lasso Linear Regression Model')
lasso_reg = Lasso(alpha=0.5)
lasso_reg.fit(x_train,y_train)
y_preds_lasso = lasso_reg.predict(x_test)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_lasso))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_lasso))
# Print Importance Coefficients
cdf = pd.DataFrame(lasso_reg.coef_, x.columns, columns=['Lasso Reg Coefficients'])
print(cdf)

In [None]:
# Ordinary Least Squares using makepipeline
#qb_reduced_df = qbs[['Passing_TD%','Passing_Int%','Passing_1D','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Sk','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Rushing_1D','Rushing_Lng','Rushing_Y/A_y','Fantasy_PPR']]
#qb_reduced_df = qb_reduced_df.dropna()
#x = qb_reduced_df.drop('Fantasy_PPR', axis=1)
#y = qb_reduced_df['Fantasy_PPR']
# train and test split
#x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)
# Make pipeline
pipe = make_pipeline(StandardScaler(), LinearRegression())
# Fit Model
pipe.fit(x_train,y_train)
# Print Score
print("The R-squared Score was: "+str(pipe.score(x_test,y_test)))
print('')
# Print Importance Coefficients
cdf = pd.DataFrame(pipe.named_steps['linearregression'].coef_, x.columns, columns=['Coefficients'])
print(cdf)

# Plot outputs
#plt.scatter(x_test[:,np.newaxis], y_test, color="black")
#plt.plot(x_test.iloc[:,np.newaxis].values, y_preds_ols, color="blue", linewidth=3)

plt.show()

In [None]:
# Support Vector Regression (Regression Version of Support Vector Machine)
from sklearn import svm

svr = svm.SVR()
svr.fit(x_train,y_train)
y_preds_svr = svr.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_svr))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_svr))

### RBs

In [None]:
# Load in RB data
rb_reduced_df = pd.read_csv('data/rb_std_stats.csv', index_col=0)
rb_combine_df = pd.read_csv('data/rb_combine_stats.csv', index_col=0)
rb_stats_df = rb_reduced_df.merge(rb_combine_df)
x = rb_stats_df.drop('Fantasy_PPR', axis=1)
y = rb_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

In [None]:
# Ordinary Least Squares using using fit and predict RB Fantasy Points
print('OLS Linear Regression Model')
ols_reg = LinearRegression()
model_ols = ols_reg.fit(x_train,y_train)
y_preds_ols = model_ols.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ols))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ols))
# Print Importance Coefficients
cdf = pd.DataFrame(model_ols.coef_, x.columns, columns=['OLS Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,14].values, y_test, color="black")
plt.plot(x_test.iloc[:,14].values, y_preds_ols, '.', color="blue")

plt.show()

- RB:
    - Age = 28 or younger
    - 1D = The higher the better, but at least 40
    - Rushing Lng = At least 25
    - Rushing Y/A = At least 4.0
    - Ctch% = Greater than 75
    - Receiving Lng = Greater than 15
    - Receiving Y/Tgt = 5-7
    - Ht = 5'10
    - 40yd = 4.6 or less
    - 3Cone = 7.0 or less
    - Shuttle = 4.25 or less
    

### WRs

In [None]:
# Load in WR data
wr_reduced_df = pd.read_csv('data/wr_std_stats.csv', index_col=0)
wr_combine_df = pd.read_csv('data/wr_combine_stats.csv', index_col=0)
wr_stats_df = wr_reduced_df.merge(wr_combine_df)
x = wr_stats_df.drop('Fantasy_PPR', axis=1)
y = wr_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

In [None]:
# Ordinary Least Squares using using fit and predict WR Fantasy Points
print('OLS Linear Regression Model')
ols_reg = LinearRegression()
model_ols = ols_reg.fit(x_train,y_train)
y_preds_ols = model_ols.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ols))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ols))
# Print Importance Coefficients
cdf = pd.DataFrame(model_ols.coef_, x.columns, columns=['OLS Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,11].values, y_test, color="black")
plt.plot(x_test.iloc[:,11].values, y_preds_ols, '.', color="blue")

plt.show()

- WR:
    - Age = Less than 30
    - Ctch% = Greater than 50%
    - Lng = Greater than 50 yds
    - Y/Tgt = Greater than 6.5
    - Ht = 5'10 or higher
    - Wt = 200lbs or higher
    - 40yd = 4.6 or less
    - Shutte = 4.3 or less

### TEs

In [None]:
# Load in TE data
te_reduced_df = pd.read_csv('data/te_std_stats.csv', index_col=0)
te_combine_df = pd.read_csv('data/te_combine_stats.csv', index_col=0)
te_stats_df = te_reduced_df.merge(te_combine_df)
x = te_stats_df.drop('Fantasy_PPR', axis=1)
y = te_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

In [None]:
# Ordinary Least Squares using using fit and predict TE Fantasy Points
print('OLS Linear Regression Model')
ols_reg = LinearRegression()
model_ols = ols_reg.fit(x_train,y_train)
y_preds_ols = model_ols.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ols))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ols))
# Print Importance Coefficients
cdf = pd.DataFrame(model_ols.coef_, x.columns, columns=['OLS Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,1].values, y_test, color="black")
plt.plot(x_test.iloc[:,1].values, y_preds_ols, '.', color="blue")

plt.show()

### For choosing players, OLS suggests...
- QB:
- RB:
- WR:
- TE:
    - Height = 6.5
    - Cth% = Greater than 60%
    - Lng = Greater than 30 yds
    - Weight = 240 to 260
    - Age = Any less than 35
    - 40 = 4.6 to 4.8
    - Vert = 30 to 36
    - 3Cone = Less than 7.2
    - Shuttle = Less than 4.5