In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [3]:
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor

In [10]:
def create_features(rel_df, pos):
    if pos == "rb" or pos ==  "wr" or pos == "qb" or pos == "te":
        for col in rel_df.columns[4:]:
            if "FPTS" not in col:
                if "_avg" in col:
                    new_c = rel_df["FPTS/G_MISC_avg"]*rel_df[col]
                elif "_ewm" in col:
                    new_c = rel_df["FPTS/G_MISC_ewm"]*rel_df[col]
                
                new_name = col+"_FP"

                rel_df[new_name] = new_c
    return rel_df

In [22]:
def train(pos):
    df = pd.read_csv(f"processed data/{pos}_proc_data.csv")
    df = df[df["Rookie"] == 0]
    df = df.drop(["Rookie", "Career_Years"], axis=1)
    df_feats = create_features(df, pos)
    
    
    with open(f"model features/{pos}_feats", "rb") as fp:   # Unpickling
        model_cols = pickle.load(fp)
        
    model_df = df_feats[model_cols]
    
    X = model_df.drop(["FPTS_TG", "Season"], axis=1)
    y = model_df[["FPTS_TG"]]

    if pos == "wr":
        rf_opt = RandomForestRegressor(max_depth=5, max_features="sqrt", min_samples_split = 5, n_estimators=300, random_state=0)
    elif pos == "rb":
        rf_opt = RandomForestRegressor(max_depth=20, max_features="sqrt", min_samples_split = 2, n_estimators=300, random_state=0)
    elif pos == "qb":
        rf_opt = RandomForestRegressor(max_depth=20, max_features="sqrt", min_samples_split = 5, n_estimators=700, random_state=0)
    elif pos == "te":
        rf_opt = RandomForestRegressor(max_depth=20, max_features="sqrt", min_samples_split = 10, n_estimators=300, random_state=0)
    sc = preprocessing.StandardScaler()
    #X = sc.fit_transform(X)
    
    
    rf_opt.fit(X, y.values.ravel())
    
    return rf_opt, sc#[model_df.Season == 2022]

In [31]:
def predict_ns(pos, model, sc):
    df = pd.read_csv(f"processed data/{pos}_ns_proc.csv")
    
    df = df[df["Rookie"] == 0]
    df = df.drop(["Rookie", "Career_Years"], axis=1)
    df = df[~df["YDS_RUSHING/G_avg"].isna()]
    #return df
    df_feats = create_features(df, pos)
    
    
    with open(f"model features/{pos}_feats", "rb") as fp:   # Unpickling
        model_cols = pickle.load(fp)
    #return model_cols
    model_df = df_feats[model_cols]
    
    X = model_df.drop(["FPTS_TG", "Season"], axis=1)
    X_sc = X#sc.transform(X)
    
    pred = model.predict(X_sc)
    
    df_feats["FPTS_TG_pred"] = pred
    
    df_board = df_feats[["pid", "Player", "Season", "AVG", "FPTS_TG_pred"]+model_cols[3:]]
    return df_board

Unnamed: 0,pid,Player,Season,AVG,FPTS_TG,Y/A_PASSING_avg,FPTS/G_MISC_avg,CMP_PASSING/G_avg,ATT_PASSING/G_avg,PCT_PASSING/G_avg,...,Y/A_PASSING/G_ewm,TD_PASSING/G_ewm,INT_PASSING/G_ewm,SACKS_PASSING/G_ewm,ATT_RUSHING/G_ewm,YDS_RUSHING/G_ewm,TD_RUSHING/G_ewm,FL_MISC/G_ewm,Rookie,Career_Years
0,16413,Patrick Mahomes II KC (10),2023,15.0,,7.833333,24.2,25.745098,38.67451,4.08902,...,0.475786,2.363922,0.655433,1.552889,3.805951,21.37323,0.174504,0.105588,0,6
1,17298,Josh Allen BUF (13),2023,20.0,,7.4,24.766667,23.367647,35.818627,3.922059,...,0.439895,2.135137,0.7974,1.73763,7.101512,41.021996,0.412908,0.274749,0,5
2,19275,Jalen Hurts PHI (10),2023,22.7,,7.5,18.166667,14.4,23.111111,3.995556,...,0.5063,1.096872,0.43387,1.899463,8.904201,45.047563,0.651564,0.133333,0,3
3,17233,Lamar Jackson BAL (13),2023,34.3,,7.233333,21.4,17.85,28.022222,4.950556,...,0.571008,1.461707,0.747298,2.434184,10.207643,64.501897,0.272386,0.216019,0,5
4,19196,Joe Burrow CIN (7),2023,35.0,,7.666667,20.033333,24.621078,36.260784,4.986471,...,0.527216,1.934772,0.713423,2.83886,3.664687,12.482498,0.241269,0.210619,0,3
5,18635,Justin Herbert LAC (5),2023,46.0,,7.2,21.1,26.839216,40.104575,4.109412,...,0.432826,1.850619,0.700232,2.080276,3.457132,13.143271,0.132051,0.113904,0,3
6,19781,Justin Fields CHI (13),2023,47.0,,7.0,15.95,13.025,21.85,4.4675,...,0.515445,0.905516,0.774755,3.390524,8.73367,59.134401,0.381455,0.250694,0,2
7,19780,Trevor Lawrence JAC (9),2023,56.0,,6.5,15.3,21.941176,34.882353,3.702941,...,0.387399,1.153837,0.689878,1.710063,3.915079,18.165364,0.221021,0.43195,0,2
8,15600,Dak Prescott DAL (7),2023,77.7,,7.733333,22.1,25.858333,38.161111,7.805556,...,0.80639,2.017053,0.947821,1.808926,3.475736,14.008845,0.193705,0.293823,0,7
9,16398,Deshaun Watson CLE (5),2023,82.7,,7.7,19.3,20.1875,31.166667,7.04375,...,0.865008,1.537733,0.669374,3.22115,5.84467,28.579864,0.175296,0.077665,0,5


In [25]:
pos = ["wr", "rb", "te", "qb"]

full_board = pd.DataFrame()

for p in tqdm(pos):
    model, scaler = train(p)
    
    pred_board = predict_ns(p, model, scaler)
    
    full_board = pd.concat([full_board, pred_board])

100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00,  1.29it/s]


In [26]:
#pred_board.sort_values("FPTS_TG_pred", ascending=False).head(20)

In [27]:
#pred_board.sort_values("FPTS_TG_pred", ascending=False).head(20)

In [28]:
full_board.sort_values("AVG", ascending=True).head(20)

Unnamed: 0,pid,Player,Season,AVG,FPTS_TG_pred,FPTS/G_MISC_avg,REC_RECEIVING/G_avg,TGT_RECEIVING/G_avg,YDS_RECEIVING/G_avg,FPTS/G_MISC_ewm,...,Y/A_PASSING_avg_FP,CMP_PASSING/G_avg_FP,ATT_PASSING/G_avg_FP,YDS_PASSING/G_avg_FP,TD_PASSING/G_avg_FP,Y/A_PASSING_ewm_FP,CMP_PASSING/G_ewm_FP,ATT_PASSING/G_ewm_FP,YDS_PASSING/G_ewm_FP,TD_PASSING/G_ewm_FP
0,19236,Justin Jefferson MIN (13),2023,1.3,15.177861,16.2,6.460784,9.48652,96.323529,16.594503,...,,,,,,,,,,
0,16393,Christian McCaffrey SF (9),2023,1.7,16.658,21.8,,,,20.425414,...,,,,,,,,,,
1,19788,Ja'Marr Chase CIN (7),2023,3.0,15.619821,15.4,5.728507,8.918552,83.024887,15.382843,...,,,,,,,,,,
1,16483,Austin Ekeler LAC (5),2023,4.0,19.344333,17.075,,,,17.63039,...,,,,,,,,,,
2,16433,Cooper Kupp LAR (10),2023,5.3,15.829172,16.866667,7.665359,10.130283,89.894989,17.61288,...,,,,,,,,,,
0,11594,Travis Kelce KC (10),2023,5.7,13.660302,15.466667,6.406863,,81.139461,15.276403,...,,,,,,,,,,
3,15802,Tyreek Hill MIA (10),2023,7.3,15.518191,16.6,6.443137,9.45098,86.179085,16.374793,...,,,,,,,,,,
2,17240,Saquon Barkley NYG (13),2023,8.3,13.138333,12.05,,,,12.31912,...,,,,,,,,,,
4,17246,Nick Chubb CLE (5),2023,11.3,15.572,15.475,,,,15.514382,...,,,,,,,,,,
4,13981,Stefon Diggs BUF (13),2023,11.3,16.067451,15.366667,6.822304,9.732843,84.528186,15.29517,...,,,,,,,,,,
