In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso

### Load in Dataset

In [None]:
# Master Dataset
df = pd.read_csv('data/master_df.csv')

# Separate Datasets by Position
qbs = df[df['Position'] == 'QB']
rbs = df[df['Position'] == 'RB']
wrs = df[df['Position'] == 'WR']
tes = df[df['Position'] == 'TE']

# Model Development/Exploration

## Regressions

### QBs

In [None]:
# Prep Dataset for Regressions with train_test_split
# To define the input and output feature
#qb_reduced_df = qbs[['Passing_TD%','Passing_Int%','Passing_1D','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Sk','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Rushing_1D','Rushing_Lng','Rushing_Y/A_y','Fantasy_PPR']]
#qb_reduced_df = qb_reduced_df.dropna()
qb_reduced_df = pd.read_csv('data/qb_std_stats.csv', index_col=0)
qb_combine_df = pd.read_csv('data/qb_combine_stats.csv', index_col=0)
qb_stats_df = qb_reduced_df.merge(qb_combine_df)
x = qb_stats_df.drop('Fantasy_PPR', axis=1)
y = qb_stats_df['Fantasy_PPR']
### If want to scale data
#col_names = qb_reduced_df.columns
#scaler = StandardScaler()
#qb_reduced_normed = scaler.fit_transform(qb_reduced_df)
#qb_reduced_df_normed = pd.DataFrame(qb_reduced_normed, columns=col_names)
#x = qb_reduced_df_normed.drop('Fantasy_PPR', axis=1)
##x = np.column_stack([np.ones(len(x)),x.iloc[:,0].values.reshape(1,-1)[0]])
#y = qb_reduced_df_normed['Fantasy_PPR']
##y = y.values.reshape(1,-1)

#x_normed = scaler.fit_transform(x)
#y_normed = scaler.fit_transform(y)

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)




In [None]:
# Ordinary Least Squares using using fit and predict QB Fantasy Points
print('OLS Linear Regression Model')
ols_reg = LinearRegression()
model_ols = ols_reg.fit(x_train,y_train)
y_preds_ols = model_ols.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ols))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ols))
# Print Importance Coefficients
cdf = pd.DataFrame(model_ols.coef_, x.columns, columns=['OLS Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,22].values, y_test, color="black")
plt.plot(x_test.iloc[:,22].values, y_preds_ols, '.', color="blue")

plt.show()

- QB:
    - Int% == Less than 3%
    - Lng == Greater than 50
    - Y/A == Greater than 7
    - AY/A == Greater than 7
    - Y/C == Greater than 11
    - Sk% == Less than 7
    - NY/A == Greater than 6.5
    - ANY/A == Greater than 5.5
    - Vertical == Greater than 31
    - 3Cone = Less than 7.2
    

In [None]:
# List of QBs with that fit optimal Fantasy QB Criteria
qbs_best_projected = qbs[(qbs['Passing_Int%'] < 3) & (qbs['Passing_Lng'] > 50) & (qbs['Passing_Y/A'] > 7) & (qbs['Passing_AY/A'] > 7) & (qbs['Passing_Y/C'] > 11) & (qbs['Passing_Sk%'] < 7) & (qbs['Passing_NY/A'] > 6.5) & (qbs['Passing_ANY/A'] > 5.5) & (qbs['Vertical'] > 31) & (qbs['3Cone'] < 7.2)]

"""
top_5_qbs = qbs_best_projected[qbs_best_projected['Year'] >= 2019]
top_5_qbs = top_5_qbs.sort_values(by=['Rank'])
top_5_qbs = top_5_qbs['Player'].unique()
top_5_qbs = top_5_qbs[0:5]

with open("top_5_qbs.txt", "w") as output:
    output.write(str(top_5_qbs))
"""

top_qbs = qbs_best_projected[(qbs_best_projected['Year'] >= 2017) & (qbs_best_projected['Fantasy_PosRank'] <= 10)]
top_qbs_agg = top_qbs.groupby('Player').agg({'Fantasy_PosRank': ['mean', 'std'], 'Year': 'count', 'Rank': 'mean'}).reset_index()
#top_tes_agg.sort_values([('Year','count'),('Rank','mean')],ascending=False)
top_qbs_agg_sorted = top_qbs_agg.sort_values([('Fantasy_PosRank','mean')])

with open("top_qbs_agg_sorted.txt", "w") as output:
    output.write(str(top_qbs_agg_sorted))

In [None]:
# Ridge regression
print('Ridge Linear Regression Model')
ridge_reg = Ridge(alpha=0.5)
ridge_reg.fit(x_train,y_train)
y_preds_ridge = ridge_reg.predict(x_test)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ridge))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ridge))
# Print Importance Coefficients
cdf = pd.DataFrame(ridge_reg.coef_, x.columns, columns=['Ridge Reg Coefficients'])
print(cdf)

In [None]:
# Lasso regression
print('Lasso Linear Regression Model')
lasso_reg = Lasso(alpha=0.5)
lasso_reg.fit(x_train,y_train)
y_preds_lasso = lasso_reg.predict(x_test)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_lasso))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_lasso))
# Print Importance Coefficients
cdf = pd.DataFrame(lasso_reg.coef_, x.columns, columns=['Lasso Reg Coefficients'])
print(cdf)

In [None]:
# Ordinary Least Squares using makepipeline
#qb_reduced_df = qbs[['Passing_TD%','Passing_Int%','Passing_1D','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Sk','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Rushing_1D','Rushing_Lng','Rushing_Y/A_y','Fantasy_PPR']]
#qb_reduced_df = qb_reduced_df.dropna()
#x = qb_reduced_df.drop('Fantasy_PPR', axis=1)
#y = qb_reduced_df['Fantasy_PPR']
# train and test split
#x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)
# Make pipeline
pipe = make_pipeline(StandardScaler(), LinearRegression())
# Fit Model
pipe.fit(x_train,y_train)
# Print Score
print("The R-squared Score was: "+str(pipe.score(x_test,y_test)))
print('')
# Print Importance Coefficients
cdf = pd.DataFrame(pipe.named_steps['linearregression'].coef_, x.columns, columns=['Coefficients'])
print(cdf)

# Plot outputs
#plt.scatter(x_test[:,np.newaxis], y_test, color="black")
#plt.plot(x_test.iloc[:,np.newaxis].values, y_preds_ols, color="blue", linewidth=3)

plt.show()

In [None]:
# Support Vector Regression (Regression Version of Support Vector Machine)
from sklearn import svm
print('Support Vector Regression Model')
svr = svm.SVR()
svr.fit(x_train,y_train)
y_preds_svr = svr.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_svr))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_svr))

In [None]:
# Random Forest Regressor using using fit and predict QB Fantasy Points
from sklearn.ensemble import RandomForestRegressor
print('Random Forest Regression Model')
rf_reg = RandomForestRegressor()
model_rf = rf_reg.fit(x_train,y_train)
y_preds_rf = model_rf.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_rf))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_rf))
# Print Importance Coefficients
cdf = pd.DataFrame(model_rf.feature_importances_, x.columns, columns=['RF Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,0].values, y_test, color="black")
plt.plot(x_test.iloc[:,0].values, y_preds_rf, '.', color="blue")

plt.show()

In [None]:
# Test model with no 1ds and sacks
qb_reduced_df = pd.read_csv('data/qb_std_stats_no_1ds_or_sks.csv', index_col=0)
qb_combine_df = pd.read_csv('data/qb_combine_stats.csv', index_col=0)
qb_stats_df = qb_reduced_df.merge(qb_combine_df)
x = qb_stats_df.drop('Fantasy_PPR', axis=1)
y = qb_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

# Random Forest Regressor using using fit and predict QB Fantasy Points
from sklearn.ensemble import RandomForestRegressor
print('Random Forest Regression Model')
rf_reg = RandomForestRegressor()
model_rf = rf_reg.fit(x_train,y_train)
y_preds_rf = model_rf.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_rf))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_rf))
# Print Importance Coefficients
cdf = pd.DataFrame(model_rf.feature_importances_, x.columns, columns=['RF Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,0].values, y_test, color="black")
plt.plot(x_test.iloc[:,0].values, y_preds_rf, '.', color="blue")

plt.show()


In [None]:
# Predicting against full dataset
y_preds_rf_fullset = model_rf.predict(x)

# Create new dataframe for projections
player_point_proj = pd.DataFrame({'Age': x['Age'], 'Passing_TD%': x['Passing_TD%'], 'Model_Projection_Points': y_preds_rf_fullset, 'Actual_Points': y})
# Merge player names and info back in
player_point_proj_wnames = player_point_proj.merge(qbs[['Player', 'Age', 'Position', 'Year', 'Passing_TD%', 'Fantasy_PPR']], how='inner', left_on=['Age', 'Passing_TD%', 'Actual_Points'], right_on=['Age', 'Passing_TD%', 'Fantasy_PPR'])
player_point_proj_wnames = player_point_proj_wnames.drop(columns=['Fantasy_PPR'])

# Calculate Model vs Actual Delta
player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames['Model_Projection_Points'] - player_point_proj_wnames['Actual_Points']

# Prep Dataframe for csv output
player_point_proj_wnames = player_point_proj_wnames.sort_values(by='Model_Projection_Points', ascending=False)
player_point_proj_wnames = player_point_proj_wnames[['Player', 'Position', 'Age', 'Year', 'Model_Projection_Points', 'Actual_Points', 'Model_v_Actual_Delta']]

# Save dataframes
years = player_point_proj_wnames['Year'].unique()
position = player_point_proj_wnames['Position'][0]
for year in years:
    df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year]
    filename = 'projections/random_forest/'+str(position)+'/_'+str(year)+'_projections.csv'
    df.to_csv(filename)

In [None]:
# plotting projections vs actuals
plt.hist(player_point_proj_wnames['Model_Projection_Points'], 
         label='Model_Projection_Points')
  
plt.hist(player_point_proj_wnames['Actual_Points'], 
         label='Actual_Points')
  
plt.legend(loc='upper right')
plt.title('Overlapping')
plt.show()

In [None]:
# plotting model deltas
plt.hist(player_point_proj_wnames['Model_v_Actual_Delta'], 
         label='Model_v_Actual_Delta')
  
plt.legend(loc='upper right')
plt.show()

### RBs

In [None]:
# Load in RB data
rb_reduced_df = pd.read_csv('data/rb_std_stats.csv', index_col=0)
rb_combine_df = pd.read_csv('data/rb_combine_stats.csv', index_col=0)
rb_stats_df = rb_reduced_df.merge(rb_combine_df)
x = rb_stats_df.drop('Fantasy_PPR', axis=1)
y = rb_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

In [None]:
# Ordinary Least Squares using using fit and predict RB Fantasy Points
print('OLS Linear Regression Model')
ols_reg = LinearRegression()
model_ols = ols_reg.fit(x_train,y_train)
y_preds_ols = model_ols.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ols))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ols))
# Print Importance Coefficients
cdf = pd.DataFrame(model_ols.coef_, x.columns, columns=['OLS Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,13].values, y_test, color="black")
plt.plot(x_test.iloc[:,13].values, y_preds_ols, '.', color="blue")

plt.show()

- RB:
    - Age = 28 or younger
    - 1D = The higher the better, but at least 40
    - Rushing Lng = At least 25
    - Rushing Y/A = At least 4.0
    - Receiving Ctch% = Greater than 75
    - Receiving Lng = Greater than 15
    - Receiving Y/Tgt = Greater than 5
    - Ht = 5'10 or more
    - 40yd = 4.6 or less
    - 3Cone = 7.0 or less
    - Shuttle = 4.25 or less
    

In [None]:
# List of RBs with that fit optimal Fantasy RB Criteria
rbs['Age_in_2022'] = 2022 - rbs['Year'] + rbs['Age']
rbs_best_projected = rbs[(rbs['Age_in_2022'] <= 28) & (rbs['Rushing_1D'] > 40) & (rbs['Rushing_Lng'] > 25) & (rbs['Rushing_Y/A_x'] > 4) & (rbs['Receiving_Ctch%'] > 75) & (rbs['Receiving_Lng'] > 15) & (rbs['Receiving_Y/Tgt'] > 5) & (rbs['Ht'] > 5.8) & (rbs['40yd'] < 4.6) & (rbs['Shuttle'] < 4.25)]

"""
top_10_rbs = rbs_best_projected[rbs_best_projected['Year'] >= 2019]
top_10_rbs = top_10_rbs.sort_values(by=['Rank'])
top_10_rbs = top_10_rbs['Player'].unique()
top_10_rbs = top_10_rbs[0:10]

with open("top_10_rbs.txt", "w") as output:
    output.write(str(top_10_rbs))
"""

top_rbs = rbs_best_projected[(rbs_best_projected['Year'] >= 2017) & (rbs_best_projected['Fantasy_PosRank'] <= 30)]
top_rbs_agg = top_rbs.groupby('Player').agg({'Fantasy_PosRank': ['mean', 'std'], 'Year': 'count', 'Rank': 'mean'}).reset_index()
#top_tes_agg.sort_values([('Year','count'),('Rank','mean')],ascending=False)
top_rbs_agg_sorted = top_rbs_agg.sort_values([('Fantasy_PosRank','mean')])

with open("top_rbs_agg_sorted.txt", "w") as output:
    output.write(str(top_rbs_agg_sorted))


In [None]:
# Test model with no 1ds and sacks
rb_reduced_df = pd.read_csv('data/rb_std_stats_no_1ds.csv', index_col=0)
rb_combine_df = pd.read_csv('data/rb_combine_stats.csv', index_col=0)
rb_stats_df = rb_reduced_df.merge(rb_combine_df)
x = rb_stats_df.drop('Fantasy_PPR', axis=1)
y = rb_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

# Random Forest Regressor using using fit and predict QB Fantasy Points
from sklearn.ensemble import RandomForestRegressor
print('Random Forest Regression Model')
rf_reg = RandomForestRegressor()
model_rf = rf_reg.fit(x_train,y_train)
y_preds_rf = model_rf.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_rf))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_rf))
# Print Importance Coefficients
cdf = pd.DataFrame(model_rf.feature_importances_, x.columns, columns=['RF Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,0].values, y_test, color="black")
plt.plot(x_test.iloc[:,0].values, y_preds_rf, '.', color="blue")
plt.show()

In [None]:
x.columns

In [None]:
# Predicting against full dataset
y_preds_rf_fullset = model_rf.predict(x)

# Create new dataframe for projections
player_point_proj = pd.DataFrame({'Age': x['Age'], 'Rushing_Lng': x['Rushing_Lng'], 'Model_Projection_Points': y_preds_rf_fullset, 'Actual_Points': y})
# Merge player names and info back in
player_point_proj_wnames = player_point_proj.merge(rbs[['Player', 'Age', 'Position', 'Year', 'Rushing_Lng', 'Fantasy_PPR']], how='inner', left_on=['Age', 'Rushing_Lng', 'Actual_Points'], right_on=['Age', 'Rushing_Lng', 'Fantasy_PPR'])
player_point_proj_wnames = player_point_proj_wnames.drop(columns=['Fantasy_PPR'])

# Calculate Model vs Actual Delta
player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames['Model_Projection_Points'] - player_point_proj_wnames['Actual_Points']

# Prep Dataframe for csv output
player_point_proj_wnames = player_point_proj_wnames.sort_values(by='Model_Projection_Points', ascending=False)
player_point_proj_wnames = player_point_proj_wnames[['Player', 'Position', 'Age', 'Year', 'Model_Projection_Points', 'Actual_Points', 'Model_v_Actual_Delta']]

# Save dataframes
years = player_point_proj_wnames['Year'].unique()
position = player_point_proj_wnames['Position'][0]
for year in years:
    df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year]
    filename = 'projections/random_forest/'+str(position)+'/_'+str(year)+'_projections.csv'
    df.to_csv(filename)

### WRs

In [None]:
# Load in WR data
wr_reduced_df = pd.read_csv('data/wr_std_stats.csv', index_col=0)
wr_combine_df = pd.read_csv('data/wr_combine_stats.csv', index_col=0)
wr_stats_df = wr_reduced_df.merge(wr_combine_df)
x = wr_stats_df.drop('Fantasy_PPR', axis=1)
y = wr_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

In [None]:
# Ordinary Least Squares using using fit and predict WR Fantasy Points
print('OLS Linear Regression Model')
ols_reg = LinearRegression()
model_ols = ols_reg.fit(x_train,y_train)
y_preds_ols = model_ols.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ols))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ols))
# Print Importance Coefficients
cdf = pd.DataFrame(model_ols.coef_, x.columns, columns=['OLS Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,11].values, y_test, color="black")
plt.plot(x_test.iloc[:,11].values, y_preds_ols, '.', color="blue")

plt.show()

- WR:
    - Age = Less than 30
    - Ctch% = Greater than 50%
    - Lng = Greater than 50 yds
    - Y/Tgt = Greater than 6.5
    - Ht = 5'10 or higher
    - Wt = 200lbs or higher
    - 40yd = 4.6 or less
    - Shutte = 4.3 or less

In [None]:
# List of WRs with that fit optimal Fantasy WR Criteria
wrs['Age_in_2022'] = 2022 - wrs['Year'] + wrs['Age']
wrs_best_projected = wrs[(wrs['Age_in_2022'] <= 30) & (wrs['Receiving_Ctch%'] > 50) & (wrs['Receiving_Lng'] > 50) & (wrs['Receiving_Y/Tgt'] > 6.5) & (wrs['Ht'] > 5.8) & (wrs['Wt'] > 200) & (wrs['40yd'] < 4.6) & (wrs['Shuttle'] < 4.3)]
"""
top_wrs = wrs_best_projected[(wrs_best_projected['Year'] >= 2019) & (wrs_best_projected['Fantasy_PosRank'] <= 30)]
top_wrs = top_wrs.sort_values(by=['Rank'])
top_wrs = top_wrs['Player'].unique()
#top_wrs = top_wrs[0:20]

with open("top_wrs.txt", "w") as output:
    output.write(str(top_wrs))
"""

top_wrs = wrs_best_projected[(wrs_best_projected['Year'] >= 2017) & (wrs_best_projected['Fantasy_PosRank'] <= 30)]
top_wrs_agg = top_wrs.groupby('Player').agg({'Fantasy_PosRank': ['mean', 'std'], 'Year': 'count', 'Rank': 'mean'}).reset_index()
#top_tes_agg.sort_values([('Year','count'),('Rank','mean')],ascending=False)
top_wrs_agg_sorted = top_wrs_agg.sort_values([('Fantasy_PosRank','mean')])

with open("top_wrs_agg_sorted.txt", "w") as output:
    output.write(str(top_wrs_agg_sorted))



In [None]:
# Test model
wr_reduced_df = pd.read_csv('data/wr_std_stats.csv', index_col=0)
wr_combine_df = pd.read_csv('data/wr_combine_stats.csv', index_col=0)
wr_stats_df = wr_reduced_df.merge(wr_combine_df)
x = wr_stats_df.drop('Fantasy_PPR', axis=1)
y = wr_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

# Random Forest Regressor using using fit and predict QB Fantasy Points
from sklearn.ensemble import RandomForestRegressor
print('Random Forest Regression Model')
rf_reg = RandomForestRegressor()
model_rf = rf_reg.fit(x_train,y_train)
y_preds_rf = model_rf.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_rf))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_rf))
# Print Importance Coefficients
cdf = pd.DataFrame(model_rf.feature_importances_, x.columns, columns=['RF Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,0].values, y_test, color="black")
plt.plot(x_test.iloc[:,0].values, y_preds_rf, '.', color="blue")
plt.show()

In [None]:
# Predicting against full dataset
y_preds_rf_fullset = model_rf.predict(x)

# Create new dataframe for projections
player_point_proj = pd.DataFrame({'Age': x['Age'], 'Receiving_Ctch%': x['Receiving_Ctch%'], 'Model_Projection_Points': y_preds_rf_fullset, 'Actual_Points': y})
# Merge player names and info back in
player_point_proj_wnames = player_point_proj.merge(wrs[['Player', 'Age', 'Position', 'Year', 'Receiving_Ctch%', 'Fantasy_PPR']], how='inner', left_on=['Age', 'Receiving_Ctch%', 'Actual_Points'], right_on=['Age', 'Receiving_Ctch%', 'Fantasy_PPR'])
player_point_proj_wnames = player_point_proj_wnames.drop(columns=['Fantasy_PPR'])

# Calculate Model vs Actual Delta
player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames['Model_Projection_Points'] - player_point_proj_wnames['Actual_Points']

# Prep Dataframe for csv output
player_point_proj_wnames = player_point_proj_wnames.sort_values(by='Model_Projection_Points', ascending=False)
player_point_proj_wnames = player_point_proj_wnames[['Player', 'Position', 'Age', 'Year', 'Model_Projection_Points', 'Actual_Points', 'Model_v_Actual_Delta']]

# Save dataframes
years = player_point_proj_wnames['Year'].unique()
position = player_point_proj_wnames['Position'][0]
for year in years:
    df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year]
    filename = 'projections/random_forest/'+str(position)+'/_'+str(year)+'_projections.csv'
    df.to_csv(filename)

### TEs

In [None]:
# Load in TE data
te_reduced_df = pd.read_csv('data/te_std_stats.csv', index_col=0)
te_combine_df = pd.read_csv('data/te_combine_stats.csv', index_col=0)
te_stats_df = te_reduced_df.merge(te_combine_df)
x = te_stats_df.drop('Fantasy_PPR', axis=1)
y = te_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

In [None]:
# Ordinary Least Squares using using fit and predict TE Fantasy Points
print('OLS Linear Regression Model')
ols_reg = LinearRegression()
model_ols = ols_reg.fit(x_train,y_train)
y_preds_ols = model_ols.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_ols))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_ols))
# Print Importance Coefficients
cdf = pd.DataFrame(model_ols.coef_, x.columns, columns=['OLS Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,4].values, y_test, color="black")
plt.plot(x_test.iloc[:,4].values, y_preds_ols, '.', color="blue")

plt.show()

### For choosing players, OLS suggests...
- TE:
    - Age = Any less than 35
    - Cth% = Greater than 60%
    - Lng = Greater than 30 yds
    - Height = 6'5
    - Weight = 240 to 260
    - 40 = 4.6 to 4.8
    - Vert = 30 to 36
    - 3Cone = Less than 7.2
    - Shuttle = Less than 4.5

In [None]:
# List of WRs with that fit optimal Fantasy WR Criteria
tes['Age_in_2022'] = 2022 - tes['Year'] + tes['Age']
tes_best_projected = tes[(tes['Age_in_2022'] <= 35) & (tes['Receiving_Ctch%'] > 60) & (tes['Receiving_Lng'] > 30) & (tes['Ht'] > 6.3) & (tes['Wt'] > 240) & (tes['40yd'] < 4.8) & (tes['Vertical'] >= 30) & (tes['3Cone'] < 7.2) & (tes['Shuttle'] < 4.5)]

top_tes = tes_best_projected[(tes_best_projected['Year'] >= 2019) & (tes_best_projected['Fantasy_PosRank'] <= 10)]
top_tes = top_tes.sort_values(by=['Rank'])
top_tes = top_tes['Player'].unique()
#top_tes = top_tes[0:20]

with open("top_tes.txt", "w") as output:
    output.write(str(top_tes))

In [None]:
top_tes = tes_best_projected[(tes_best_projected['Year'] >= 2017) & (tes_best_projected['Fantasy_PosRank'] <= 10)]
top_tes_count = top_tes.Player.value_counts()
#top_tes.groupby('Player').agg({'Rank': ['mean', 'std'], 'Year': 'count'})

with open("top_tes_count.txt", "w") as output:
    output.write(str(top_tes_count))

In [None]:
top_tes_agg = top_tes.groupby('Player').agg({'Fantasy_PosRank': ['mean', 'std'], 'Year': 'count', 'Rank': 'mean'}).reset_index()
#top_tes_agg.sort_values([('Year','count'),('Rank','mean')],ascending=False)
top_tes_agg_sorted = top_tes_agg.sort_values([('Fantasy_PosRank','mean')])

with open("top_tes_agg_sorted.txt", "w") as output:
    output.write(str(top_tes_agg_sorted))


In [None]:
# Test model
te_reduced_df = pd.read_csv('data/te_std_stats.csv', index_col=0)
te_combine_df = pd.read_csv('data/te_combine_stats.csv', index_col=0)
te_stats_df = te_reduced_df.merge(te_combine_df)
x = te_stats_df.drop('Fantasy_PPR', axis=1)
y = te_stats_df['Fantasy_PPR']

# train and test split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.20,random_state=42)

# Random Forest Regressor using using fit and predict QB Fantasy Points
from sklearn.ensemble import RandomForestRegressor
print('Random Forest Regression Model')
rf_reg = RandomForestRegressor()
model_rf = rf_reg.fit(x_train,y_train)
y_preds_rf = model_rf.predict(x_test)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_preds_rf))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_preds_rf))
# Print Importance Coefficients
cdf = pd.DataFrame(model_rf.feature_importances_, x.columns, columns=['RF Coefficients'])
print(cdf)

# Plot outputs
plt.scatter(x_test.iloc[:,0].values, y_test, color="black")
plt.plot(x_test.iloc[:,0].values, y_preds_rf, '.', color="blue")
plt.show()

In [None]:
# Predicting against full dataset
y_preds_rf_fullset = model_rf.predict(x)

# Create new dataframe for projections
player_point_proj = pd.DataFrame({'Age': x['Age'], 'Receiving_Ctch%': x['Receiving_Ctch%'], 'Model_Projection_Points': y_preds_rf_fullset, 'Actual_Points': y})
# Merge player names and info back in
player_point_proj_wnames = player_point_proj.merge(tes[['Player', 'Age', 'Position', 'Year', 'Receiving_Ctch%', 'Fantasy_PPR']], how='inner', left_on=['Age', 'Receiving_Ctch%', 'Actual_Points'], right_on=['Age', 'Receiving_Ctch%', 'Fantasy_PPR'])
player_point_proj_wnames = player_point_proj_wnames.drop(columns=['Fantasy_PPR'])

# Calculate Model vs Actual Delta
player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames['Model_Projection_Points'] - player_point_proj_wnames['Actual_Points']

# Prep Dataframe for csv output
player_point_proj_wnames = player_point_proj_wnames.sort_values(by='Model_Projection_Points', ascending=False)
player_point_proj_wnames = player_point_proj_wnames[['Player', 'Position', 'Age', 'Year', 'Model_Projection_Points', 'Actual_Points', 'Model_v_Actual_Delta']]

# Save dataframes
years = player_point_proj_wnames['Year'].unique()
position = player_point_proj_wnames['Position'][0]
for year in years:
    df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year]
    filename = 'projections/random_forest/'+str(position)+'/_'+str(year)+'_projections.csv'
    df.to_csv(filename)

### Next Steps
- [x] Reduce redundant data into functions
- [x] Train on 2012-2020 data, test on 2021 data
- [x] Build function that loads in list of object models, tests against various datasets, and stores data on how each model performs
- [x] Do calculations on 3-5 yr stats by Player and join them in for model projections
- [x] Rerun RF models with new included data as test
- [x] Run new data against all desired models, find most desirable for each position, and make projections accordingly
- [] Add in advanced stats from PFR
- [] Add in 2022 List of players and rookies combine results
- [] Compare ratings to current 2022 draft ADP
- [] Find other potential parameters/data to improve models
- [] Consider categorical variable models and project bins of fantasy points (300+, 250-300, etc.)

In [123]:
# Master Dataset
df = pd.read_csv('data/master_df.csv')

# Separate Datasets by Position
qbs = df[df['Position'] == 'QB']

from sklearn.preprocessing import StandardScaler
model = StandardScaler()
qb_reduced_df = qbs[['Year','Age','Passing_TD%','Passing_Int%','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Rushing_Lng','Rushing_Y/A_y','Fantasy_PPR']]
#qb_reduced_df = qb_reduced_df.dropna()
print(qb_reduced_df.shape)
qb_reduced_df.replace(0, np.nan, inplace=True)
qb_reduced_df = qb_reduced_df.apply(lambda x: x.fillna(x.median()),axis=0)
model.fit(qb_reduced_df)

#qb_plt = sns.pairplot(qb_reduced_df)
#plt.savefig('plots/qb_pairplot.png')
qb_reduced_df.to_csv('data/qb_std_stats_year_no_1ds_or_sks.csv')


# Test model with no 1ds and sacks
#qb_reduced_df = pd.read_csv('data/qb_std_stats_year_no_1ds_or_sks.csv', index_col=0)

qb_combine_df = pd.read_csv('data/qb_combine_stats.csv', index_col=0)
print(qb_combine_df.shape)
qb_stats_df = qb_reduced_df.merge(qb_combine_df)
print(qb_combine_df.shape)

# Train and test split for test being 2021 data
qb_2012_2020 = qb_stats_df[qb_stats_df['Year'] < 2021]
print(qb_2012_2020.shape)
qb_2021 = qb_stats_df[qb_stats_df['Year'] == 2021]
print(qb_2021.shape)

x_qb_2012_2020 = qb_2012_2020.drop(['Year','Fantasy_PPR'], axis=1)
y_qb_2012_2020 = qb_2012_2020[['Fantasy_PPR']]
x_qb_2021 = qb_2021.drop(['Year','Fantasy_PPR'], axis=1)
y_qb_2021 = qb_2021[['Fantasy_PPR']]

# Random Forest Regressor using using fit and predict QB Fantasy Points
from sklearn.ensemble import RandomForestRegressor
print('Random Forest Regression Model')
rf_reg = RandomForestRegressor()
model_rf = rf_reg.fit(x_qb_2012_2020,y_qb_2012_2020)
y_qb_preds_rf = model_rf.predict(x_qb_2021)

# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_qb_2021, y_qb_preds_rf))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_qb_2021, y_qb_preds_rf))
# Print Importance Coefficients
cdf = pd.DataFrame(model_rf.feature_importances_, x_qb_2012_2020.columns, columns=['RF Coefficients'])
print(cdf)

"""
# Plot outputs
plt.scatter(x_test.iloc[:,0].values, y_test, color="black")
plt.plot(x_test.iloc[:,0].values, y_preds_rf, '.', color="blue")

plt.show()
"""

# Create new dataframe for projections
player_point_proj = pd.DataFrame({'Age': x_qb_2021['Age'], 'Passing_TD%': x_qb_2021['Passing_TD%'], 'Model_Projection_Points': y_qb_preds_rf, 'Actual_Points': y_qb_2021['Fantasy_PPR']})
# Merge player names and info back in
player_point_proj_wnames = player_point_proj.merge(qbs[['Player', 'Age', 'Position', 'Year', 'Passing_TD%', 'Fantasy_PPR']], how='inner', left_on=['Age', 'Passing_TD%', 'Actual_Points'], right_on=['Age', 'Passing_TD%', 'Fantasy_PPR'])
player_point_proj_wnames = player_point_proj_wnames.drop(columns=['Fantasy_PPR'])

# Calculate Model vs Actual Delta
player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames['Model_Projection_Points'] - player_point_proj_wnames['Actual_Points']

# Prep Dataframe for csv output
player_point_proj_wnames = player_point_proj_wnames.sort_values(by='Model_Projection_Points', ascending=False)
player_point_proj_wnames = player_point_proj_wnames[['Player', 'Position', 'Age', 'Year', 'Model_Projection_Points', 'Actual_Points', 'Model_v_Actual_Delta']]

# Save dataframes
years = player_point_proj_wnames['Year'].unique()
position = player_point_proj_wnames['Position'][0]
for year in years:
    df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year]
    filename = 'projections/random_forest/'+str(position)+'/_'+str(year)+'_projections.csv'
    df.to_csv(filename)


(736, 14)
(736, 10)
(736, 10)
(1180, 23)
(158, 23)
Random Forest Regression Model


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qb_reduced_df.replace(0, np.nan, inplace=True)
  model_rf = rf_reg.fit(x_qb_2012_2020,y_qb_2012_2020)


Mean squared error: 2594.80
Coefficient of determination: 0.78
               RF Coefficients
Age                   0.012111
Passing_TD%           0.045799
Passing_Int%          0.038538
Passing_Lng           0.234559
Passing_Y/A           0.009393
Passing_AY/A          0.094212
Passing_Y/C           0.013234
Passing_Sk%           0.012608
Passing_NY/A          0.024340
Passing_ANY/A         0.408079
Rushing_Lng           0.048836
Rushing_Y/A_y         0.011516
Ht                    0.004386
Wt                    0.008107
40yd                  0.004704
Vertical              0.003193
Bench                 0.000515
BroadJump             0.004859
3Cone                 0.007575
Shuttle               0.005538
Combine_Year          0.007898


In [11]:
def predict_PPR(model_obj, position):
    # Master Dataset
    master_df = pd.read_csv('data/master_df.csv')
    join_column = ""

    # Separate Datasets by Position
    df = master_df[master_df['Position'] == position]
    if position == "QB":
        df = df[['Year','Age','Passing_TD%','Passing_Int%','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Rushing_Lng','Rushing_Y/A_y','3PriorYear_Avg_PPR','3PriorYear_StdDev_PPR','Fantasy_PPR']]
        join_column = 'Passing_TD%'
    elif position == "RB":
        df = df[['Year','Age','Rushing_Fumbles','Rushing_Lng','Rushing_Y/A_y','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt','3PriorYear_Avg_PPR','3PriorYear_StdDev_PPR','Fantasy_PPR']]
        join_column = 'Rushing_Lng'
    elif (position == "WR") or (position == "TE"):
        df = df[['Year','Age','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt','3PriorYear_Avg_PPR','3PriorYear_StdDev_PPR','Fantasy_PPR']]
        join_column = 'Receiving_Ctch%'
    else:
        raise Exception("Sorry, this position is not supported currently. Please enter one of these four positions: (QB, RB, WR, TE)")
        


    from sklearn.preprocessing import StandardScaler
    model = StandardScaler()
    #qb_reduced_df = qb_reduced_df.dropna()
    df.replace(0, np.nan, inplace=True)
    df = df.apply(lambda x: x.fillna(x.median()),axis=0)
    model.fit(df)

    #qb_plt = sns.pairplot(qb_reduced_df)
    #plt.savefig('plots/qb_pairplot.png')
    #qb_reduced_df.to_csv('data/qb_std_stats_year_no_1ds_or_sks.csv')


    # Test model with no 1ds and sacks
    #qb_reduced_df = pd.read_csv('data/qb_std_stats_year_no_1ds_or_sks.csv', index_col=0)
    position_lowercase = str.lower(position)
    combine_df = pd.read_csv('data/'+position_lowercase+'_combine_stats.csv', index_col=0)
    stats_df = df.merge(combine_df)

    year_to_project = 2021

    # Train and test split for test being 2021 data
    df_2012_2020 = stats_df[stats_df['Year'] < year_to_project]
    df_2021 = stats_df[stats_df['Year'] == year_to_project]

    x_2012_2020 = df_2012_2020.drop(['Year','Fantasy_PPR'], axis=1)
    y_2012_2020 = df_2012_2020[['Fantasy_PPR']]
    x_2021 = df_2021.drop(['Year','Fantasy_PPR'], axis=1)
    y_2021 = df_2021[['Fantasy_PPR']]

    # ML Model using using fit and predict QB Fantasy Points
    model = model_obj
    model_name = type(model).__name__
    print('Predict '+ position + ' with a '+ model_name + ' model')
    if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
        model = model.fit(x_2012_2020,y_2012_2020)
    else:
        model = model.fit(x_2012_2020,y_2012_2020.values.ravel())
    y_preds = model.predict(x_2021)

    # The mean squared error
    mean_sq_err = mean_squared_error(y_2021, y_preds)
    #print("Mean squared error: %.2f" % mean_sq_err)
    
    # The coefficient of determination: 1 is perfect prediction
    r2_err = r2_score(y_2021, y_preds)
    #print("Coefficient of determination: %.2f" % r2_err)
    # Print Importance Coefficients
    #cdf = pd.DataFrame(model_rf.feature_importances_, x_qb_2012_2020.columns, columns=['RF Coefficients'])
    #print(cdf)

    """
    # Plot outputs
    plt.scatter(x_test.iloc[:,0].values, y_test, color="black")
    plt.plot(x_test.iloc[:,0].values, y_preds_rf, '.', color="blue")

    plt.show()
    """


    # Create new dataframe for projections
    player_point_proj = None
    if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
        player_point_proj = pd.DataFrame({'Age': x_2021['Age'], join_column: x_2021[join_column], 'Model_Projection_Points': y_preds[:,0], 'Actual_Points': y_2021['Fantasy_PPR']})
    else:
        player_point_proj = pd.DataFrame({'Age': x_2021['Age'], join_column: x_2021[join_column], 'Model_Projection_Points': y_preds, 'Actual_Points': y_2021['Fantasy_PPR']})

    # Merge player names and info back in
    player_point_proj_wnames = player_point_proj.merge(master_df[['Player', 'Age', 'Position', 'Year', join_column, 'Fantasy_PPR']], how='inner', left_on=['Age', join_column, 'Actual_Points'], right_on=['Age', join_column, 'Fantasy_PPR'])
    player_point_proj_wnames = player_point_proj_wnames.drop(columns=['Fantasy_PPR'])

    # Calculate Model vs Actual Delta
    player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames['Model_Projection_Points'] - player_point_proj_wnames['Actual_Points']

    # Prep Dataframe for csv output
    player_point_proj_wnames = player_point_proj_wnames.sort_values(by='Model_Projection_Points', ascending=False)
    player_point_proj_wnames = player_point_proj_wnames[['Player', 'Position', 'Age', 'Year', 'Model_Projection_Points', 'Actual_Points', 'Model_v_Actual_Delta']]

    # Save dataframes
    """
    years = player_point_proj_wnames['Year'].unique()
    #position = player_point_proj_wnames['Position'][0]
    for year in years:
        df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year]
        filename = 'projections/random_forest/'+str(position)+'/_'+str(year)+'_projections.csv'
        df.to_csv(filename)
    """
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")
    df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year_to_project]
    filename = 'projections/'+str(position)+'/_'+str(model_name)+'_projections_'+timestr+'.csv'
    df.to_csv(filename)

    return model_name, mean_sq_err, r2_err

def fantasy_points_predictor(models, positions):
    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")

    results = []
    for model in models:
        for position in positions:
            model_name, mean_sq_err, r2_err = predict_PPR(model, position)
            result = [model_name, position, mean_sq_err, r2_err]
            results.append(result)
    results_df = pd.DataFrame(results, columns=['Model Name', 'Position', 'Mean Square Error', 'R2 Score'])
    results_df = results_df.sort_values(by=['Position', 'R2 Score'], ascending=[True, False])
    #print(results_df)
    results_filename = 'predictor_tool_results/model_results_summary_'+timestr+'.csv'
    results_df.to_csv(results_filename)
    print('Simulation complete! Check the predictor_tool_results folder to find summary of models.')

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, BayesianRidge
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor

models_list = [LinearRegression(), Ridge(), Lasso(), BayesianRidge(), RandomForestRegressor(), KNeighborsRegressor(), MLPRegressor()]
positions_list = ['QB', 'RB', 'WR', 'TE']

fantasy_points_predictor(models_list, positions_list)



Predict QB with a LinearRegression model
Predict RB with a LinearRegression model
Predict WR with a LinearRegression model
Predict TE with a LinearRegression model
Predict QB with a Ridge model
Predict RB with a Ridge model
Predict WR with a Ridge model
Predict TE with a Ridge model
Predict QB with a Lasso model
Predict RB with a Lasso model
Predict WR with a Lasso model
Predict TE with a Lasso model
Predict QB with a BayesianRidge model
Predict RB with a BayesianRidge model
Predict WR with a BayesianRidge model
Predict TE with a BayesianRidge model
Predict QB with a RandomForestRegressor model
Predict RB with a RandomForestRegressor model
Predict WR with a RandomForestRegressor model
Predict TE with a RandomForestRegressor model
Predict QB with a KNeighborsRegressor model
Predict RB with a KNeighborsRegressor model
Predict WR with a KNeighborsRegressor model
Predict TE with a KNeighborsRegressor model
Predict QB with a MLPRegressor model
Predict RB with a MLPRegressor model
Predict WR

In [14]:
master_df = pd.read_csv('data/master_df.csv')

In [16]:
master_df.head(10)

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Year,Rank,Player,Team,Position,Age,Games_G,...,Combine_Year,Ft,In,Ht,ADP,AvgMockDraftPosition,3Year_Avg_PPR,3Year_StdDev_PPR,3PriorYear_Avg_PPR,3PriorYear_StdDev_PPR
0,0,0,0,2021,1,Jonathan Taylor,IND,RB,22,17,...,2020.0,5.0,10.0,5.833333,15.0,13.8,312.95,85.064946,252.8,
1,1,1,1,2021,2,Cooper Kupp,LAR,WR,28,17,...,2017.0,6.0,2.0,6.166667,43.0,41.3,306.233333,119.47725,204.766667,67.785643
2,2,2,2,2021,3,Deebo Samuel,SFO,WR,25,16,...,2019.0,5.0,11.0,5.916667,92.0,89.8,202.933333,129.704446,134.9,76.650375
3,3,3,3,2021,4,Josh Allen,BUF,QB,25,17,...,2018.0,6.0,5.0,6.416667,35.0,34.4,362.433333,64.024084,297.6,94.322585
4,4,4,4,2021,5,Austin Ekeler,LAC,RB,26,16,...,,,,,9.0,8.5,272.7,94.62468,214.366667,81.973553
5,5,5,5,2021,6,Justin Herbert,LAC,QB,23,17,...,2020.0,6.0,6.0,6.5,70.0,69.3,356.8,33.941125,332.8,
6,6,6,6,2021,7,Tom Brady,TAM,QB,44,17,...,2000.0,6.0,4.0,6.333333,80.0,77.8,325.433333,56.540369,294.3,38.770607
7,7,7,7,2021,8,Mark Andrews,BAL,TE,26,17,...,2018.0,6.0,5.0,6.416667,54.0,52.8,226.133333,67.521132,161.5,50.551657
8,8,8,8,2021,9,Ja'Marr Chase,CIN,WR,21,17,...,2021.0,6.0,0.0,6.0,78.0,77.4,304.6,,,
9,9,9,9,2021,10,Justin Jefferson,MIN,WR,22,17,...,2020.0,6.0,1.0,6.083333,23.0,22.4,302.3,39.739401,274.2,
