In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

import numpy as np
from tqdm import tqdm

In [2]:
df = pd.read_csv(f"processed data/wr_proc_data.csv")

In [3]:
df = df[df["Rookie"] == 0]

In [4]:
df[["pid", "Player", "FPTS/G_MISC_avg", "FPTS/G_MISC_ewm", "Season"]].sort_values("FPTS/G_MISC_avg", ascending=0)

Unnamed: 0,pid,Player,FPTS/G_MISC_avg,FPTS/G_MISC_ewm,Season
3,9808,Antonio Brown (FA),18.400000,18.313937,2018
414,12123,Davante Adams (LV),18.033333,18.468353,2021
265,9808,Antonio Brown (FA),17.900000,18.010994,2019
561,12123,Davante Adams (LV),17.833333,18.147194,2022
351,9808,Antonio Brown (FA),16.933333,16.400943,2020
...,...,...,...,...,...
558,13977,Ty Montgomery (NE),0.000000,0.000000,2021
409,13977,Ty Montgomery (NE),0.000000,0.000000,2020
69,16757,David Moore (TB),0.000000,0.000000,2018
536,19736,Dezmon Patmon (BUF),0.000000,0.000000,2021


In [5]:
df.Season.value_counts()

2018    128
2021    123
2019    117
2020    101
2022     73
Name: Season, dtype: int64

In [6]:
corrs = df.corr(numeric_only=True)

In [7]:
corrs[["FPTS_TG"]]

Unnamed: 0,FPTS_TG
pid,-0.028053
Season,0.10732
AVG,-0.715617
FPTS_TG,1.0
FPTS/G_MISC_avg,0.67611
ATT_RUSHING/G_avg,-0.042159
YDS_RUSHING/G_avg,0.017107
TD_RUSHING/G_avg,0.023284
REC_RECEIVING/G_avg,0.638836
TGT_RECEIVING/G_avg,0.617874


In [8]:
rel_cols = ["Season"]
for i, vals in corrs[["FPTS_TG"]].iterrows():
    val = vals["FPTS_TG"]
    if (val > 0.11 or val < -0.11) and "_ls" not in i:
        rel_cols.append(i)

In [9]:
rel_df = df[rel_cols]

In [10]:
rel_cols

['Season',
 'AVG',
 'FPTS_TG',
 'FPTS/G_MISC_avg',
 'REC_RECEIVING/G_avg',
 'TGT_RECEIVING/G_avg',
 'YDS_RECEIVING/G_avg',
 'TD_RECEIVING/G_avg',
 'YDS/REC_RECEIVING_avg',
 'YDS/TGT_RECEIVING_avg',
 'REC/TGT_RECEIVING_avg',
 'FPTS/G_MISC_ewm',
 'REC_RECEIVING/G_ewm',
 'TGT_RECEIVING/G_ewm',
 'YDS_RECEIVING/G_ewm',
 'TD_RECEIVING/G_ewm',
 'YDS/REC_RECEIVING_ewm',
 'YDS/TGT_RECEIVING_ewm',
 'REC/TGT_RECEIVING_ewm',
 'Career_Years']

In [11]:
for col in rel_cols[2:]:
    if "FPTS_TG" not in col:
        new_c = rel_df[col]/rel_df["AVG"]

        new_name = col+"_ADP"
    
        rel_df[new_name] = new_c

In [12]:
corrs = rel_df.corr()

In [13]:
corrs[["FPTS_TG"]]

Unnamed: 0,FPTS_TG
Season,0.10732
AVG,-0.715617
FPTS_TG,1.0
FPTS/G_MISC_avg,0.67611
REC_RECEIVING/G_avg,0.638836
TGT_RECEIVING/G_avg,0.617874
YDS_RECEIVING/G_avg,0.668811
TD_RECEIVING/G_avg,0.569134
YDS/REC_RECEIVING_avg,0.113846
YDS/TGT_RECEIVING_avg,0.269479


In [14]:
model_cols = ["Season"]
for i, vals in corrs[["FPTS_TG"]].iterrows():
    val = vals["FPTS_TG"]
    if val > 0.55 or val < -0.55:
        model_cols.append(i)


In [15]:
model_cols

['Season',
 'AVG',
 'FPTS_TG',
 'FPTS/G_MISC_avg',
 'REC_RECEIVING/G_avg',
 'TGT_RECEIVING/G_avg',
 'YDS_RECEIVING/G_avg',
 'TD_RECEIVING/G_avg',
 'FPTS/G_MISC_ewm',
 'REC_RECEIVING/G_ewm',
 'TGT_RECEIVING/G_ewm',
 'YDS_RECEIVING/G_ewm',
 'TD_RECEIVING/G_ewm',
 'YDS/REC_RECEIVING_avg_ADP',
 'YDS/TGT_RECEIVING_avg_ADP',
 'REC/TGT_RECEIVING_avg_ADP',
 'YDS/REC_RECEIVING_ewm_ADP',
 'YDS/TGT_RECEIVING_ewm_ADP',
 'REC/TGT_RECEIVING_ewm_ADP']

In [16]:
#rel_df = df[rel_cols]

In [17]:
for col in rel_df.columns[2:]:
    if "FPTS" not in col and "_ADP" not in col:
        if "_avg" in col:
            new_c = rel_df["FPTS/G_MISC_avg"]*rel_df[col]
        elif "_ewm" in col:
            new_c = rel_df["FPTS/G_MISC_ewm"]*rel_df[col]
        
        new_name = col+"_FP"

        rel_df[new_name] = new_c

In [18]:
corrs = rel_df.corr()

In [19]:
corrs[["FPTS_TG"]].tail(15)

Unnamed: 0,FPTS_TG
REC_RECEIVING/G_avg_FP,0.664052
TGT_RECEIVING/G_avg_FP,0.656676
YDS_RECEIVING/G_avg_FP,0.675593
TD_RECEIVING/G_avg_FP,0.620643
YDS/REC_RECEIVING_avg_FP,0.641492
YDS/TGT_RECEIVING_avg_FP,0.673086
REC/TGT_RECEIVING_avg_FP,0.681433
REC_RECEIVING/G_ewm_FP,0.678375
TGT_RECEIVING/G_ewm_FP,0.673946
YDS_RECEIVING/G_ewm_FP,0.690727


In [20]:
#model_cols = ["Season"]
for i, vals in corrs[["FPTS_TG"]].tail(15).iterrows():
    val = vals["FPTS_TG"]
    if val > 0.65 or val < -0.65:
        model_cols.append(i)

In [21]:
model_cols

['Season',
 'AVG',
 'FPTS_TG',
 'FPTS/G_MISC_avg',
 'REC_RECEIVING/G_avg',
 'TGT_RECEIVING/G_avg',
 'YDS_RECEIVING/G_avg',
 'TD_RECEIVING/G_avg',
 'FPTS/G_MISC_ewm',
 'REC_RECEIVING/G_ewm',
 'TGT_RECEIVING/G_ewm',
 'YDS_RECEIVING/G_ewm',
 'TD_RECEIVING/G_ewm',
 'YDS/REC_RECEIVING_avg_ADP',
 'YDS/TGT_RECEIVING_avg_ADP',
 'REC/TGT_RECEIVING_avg_ADP',
 'YDS/REC_RECEIVING_ewm_ADP',
 'YDS/TGT_RECEIVING_ewm_ADP',
 'REC/TGT_RECEIVING_ewm_ADP',
 'REC_RECEIVING/G_avg_FP',
 'TGT_RECEIVING/G_avg_FP',
 'YDS_RECEIVING/G_avg_FP',
 'YDS/TGT_RECEIVING_avg_FP',
 'REC/TGT_RECEIVING_avg_FP',
 'REC_RECEIVING/G_ewm_FP',
 'TGT_RECEIVING/G_ewm_FP',
 'YDS_RECEIVING/G_ewm_FP',
 'YDS/REC_RECEIVING_ewm_FP',
 'YDS/TGT_RECEIVING_ewm_FP',
 'REC/TGT_RECEIVING_ewm_FP',
 'Career_Years_FP']

In [22]:
import pickle
with open("model features/wr_feats", "wb") as fp:   #Pickling
    pickle.dump(model_cols, fp)

In [23]:
model_df = rel_df[model_cols].dropna()

In [24]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [25]:
X = model_df.drop(["FPTS_TG", "Season"], axis=1)
y = model_df[["FPTS_TG"]]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

lr = LinearRegression()
sc = preprocessing.StandardScaler()

X_train_sc = sc.fit_transform(X_train)

X_test_sc = sc.transform(X_test)

lr.fit(X_train_sc, y_train)
y_pred = lr.predict(X_test_sc)

In [26]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [27]:
def evaluate(y_test, y_pred):
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print("mae: ", mae)
    print("mse: ", mse)
    print("r2: ", r2)

In [28]:
evaluate(y_test, y_pred)

mae:  2.3710211361310054
mse:  9.070542136781366
r2:  0.5832776929890328


In [29]:
X_22 = model_df[model_df.Season == 2022].drop(["FPTS_TG", "Season"], axis=1)
mdd = df[df.Season == 2022][["FPTS_TG", "Player"]].copy()
mdd["comp"] = lr.predict(sc.transform(X_22))

evaluate(mdd.FPTS_TG, mdd.comp)

mae:  1.7771186788344422
mse:  4.751222693924019
r2:  0.668414754767513


In [30]:
from sklearn.ensemble import RandomForestRegressor

In [31]:
from sklearn.model_selection import RandomizedSearchCV

In [53]:
random_grid = {'n_estimators': [200, 400, 600],
               'max_features': ["sqrt"],
               'max_depth': [2, 5, 10, 20],
               'min_samples_split': [2, 5, 10],
              }

In [54]:
rf = RandomForestRegressor()

In [55]:
rf_random = RandomizedSearchCV(estimator = rf,param_distributions = random_grid,n_iter = 100, cv = 5, verbose=99, random_state=21, n_jobs = -1)

In [56]:
rf_random.fit(X_train, y_train)



Fitting 5 folds for each of 36 candidates, totalling 180 fits


  self.best_estimator_.fit(X, y, **fit_params)


In [57]:
rf_random.best_params_

{'n_estimators': 400,
 'min_samples_split': 10,
 'max_features': 'sqrt',
 'max_depth': 5}

In [58]:
rf_opt = RandomForestRegressor(max_depth=5, max_features="sqrt", min_samples_split = 10, n_estimators=400, random_state=0)

In [59]:
rf_opt.fit(X_train, y_train)

  rf_opt.fit(X_train, y_train)


In [60]:
y_pred = rf_opt.predict(X_test)

In [61]:
evaluate(y_train, rf_opt.predict(X_train))

mae:  1.7933622138081615
mse:  5.2755793046988595
r2:  0.7361131103582583


In [62]:
evaluate(y_test, y_pred)

mae:  2.2509980098467186
mse:  7.724966484526112
r2:  0.6450966429051366


In [63]:
X_22 = model_df[model_df.Season == 2022].drop(["FPTS_TG", "Season"], axis=1)
mdd = df[df.Season == 2022][["FPTS_TG", "Player"]].copy()
mdd["comp"] = rf_opt.predict((X_22))

evaluate(mdd.FPTS_TG, mdd.comp)

mae:  1.640871643988367
mse:  3.8731542206755836
r2:  0.7296946754930419


In [64]:
mdd.head(30)

Unnamed: 0,FPTS_TG,Player,comp
560,17.9,Justin Jefferson (MIN),15.809891
561,16.8,Davante Adams (LV),16.371283
562,16.6,Tyreek Hill (MIA),13.564888
563,15.7,Stefon Diggs (BUF),14.592796
564,15.0,A.J. Brown (PHI),12.02359
565,14.6,CeeDee Lamb (DAL),12.513669
566,13.0,Jaylen Waddle (MIA),11.790145
567,13.4,Amon-Ra St. Brown (DET),11.587464
568,12.2,Amari Cooper (CLE),11.286075
569,12.2,DeVonta Smith (PHI),9.246143
