In [14]:
import pandas as pd
import numpy as np
from tqdm import tqdm

In [24]:
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor

In [25]:
def create_features(rel_df, pos):
    if pos == "rb":
        for col in rel_df.columns[4:]:
            if "FPTS" not in col:
                if "_avg" in col:
                    new_c = rel_df["FPTS/G_MISC_avg"]*rel_df[col]
                elif "_ewm" in col:
                    new_c = rel_df["FPTS/G_MISC_ewm"]*rel_df[col]
                
                new_name = col+"_FP"

                rel_df[new_name] = new_c
    return rel_df

In [29]:
def train(pos):
    df = pd.read_csv(f"processed data/{pos}_proc_data.csv")
    df = df[df["Rookie"] == 0]
    df = df.drop(["Rookie", "Career_Years"], axis=1)
    df_feats = create_features(df, pos)
    
    
    with open(f"{pos}_feats", "rb") as fp:   # Unpickling
        model_cols = pickle.load(fp)
        
    model_df = df_feats[model_cols]
    
    X = model_df.drop(["FPTS_TG", "Season"], axis=1)
    y = model_df[["FPTS_TG"]]

    rf_opt = RandomForestRegressor(max_depth=20, max_features="sqrt", min_samples_split = 5, n_estimators=300, random_state=0)
    
    sc = preprocessing.StandardScaler()
    #X = sc.fit_transform(X)
    
    
    rf_opt.fit(X, y)
    
    return rf_opt, sc#[model_df.Season == 2022]

In [32]:
def predict_ns(pos, model, sc):
    df = pd.read_csv(f"processed data/{pos}_ns_proc.csv")
    df = df[df["Rookie"] == 0]
    df = df.drop(["Rookie", "Career_Years"], axis=1)
    df = df[~df["YDS_RUSHING/G_avg"].isna()]
    #return df
    df_feats = create_features(df, pos)
    
    
    with open(f"{pos}_feats", "rb") as fp:   # Unpickling
        model_cols = pickle.load(fp)
    #return model_cols
    model_df = df_feats[model_cols]
    
    X = model_df.drop(["FPTS_TG", "Season"], axis=1)
    X_sc = X#sc.transform(X)
    
    pred = model.predict(X_sc)
    
    df_feats["FPTS_TG_pred"] = pred
    
    df_board = df_feats[["pid", "Player", "Season", "AVG", "FPTS_TG_pred"]+model_cols[3:]]
    return df_board

In [33]:
model, scaler = train("rb")

  rf_opt.fit(X, y)


In [34]:
pred_board = predict_ns("rb", model, scaler)

In [35]:
pred_board.sort_values("FPTS_TG_pred", ascending=False).head(20)

Unnamed: 0,pid,Player,Season,AVG,FPTS_TG_pred,FPTS/G_MISC_avg,YDS_RUSHING/G_avg,FPTS/G_MISC_ewm,YDS_RUSHING/G_ewm,TD_RUSHING/G_ewm,...,TD_RUSHING/G_avg_FP,REC_RECEIVING/G_avg_FP,TGT_RECEIVING/G_avg_FP,YDS_RECEIVING/G_avg_FP,ATT_RUSHING/G_ewm_FP,YDS_RUSHING/G_ewm_FP,TD_RUSHING/G_ewm_FP,REC_RECEIVING/G_ewm_FP,TGT_RECEIVING/G_ewm_FP,YDS_RECEIVING/G_ewm_FP
1,16483,Austin Ekeler LAC (5),2023,4.0,18.987045,17.075,49.643382,17.63039,51.897749,0.551158,...,7.693166,93.140358,113.529917,790.873824,205.319159,914.977537,9.717123,97.221966,118.845876,779.094167
6,15514,Derrick Henry TEN (7),2023,17.0,17.121957,20.025,110.651042,19.909307,108.794905,1.017221,...,20.984531,33.541875,43.05375,325.322812,467.46685,2166.0312,20.252164,36.321265,45.598461,364.6878
0,16393,Christian McCaffrey SF (9),2023,1.7,16.361995,21.8,72.957589,20.425414,70.215253,0.678104,...,17.535986,126.452976,149.430375,1117.617341,323.281638,1434.17559,13.850548,112.742825,133.999399,999.290027
4,17246,Nick Chubb CLE (5),2023,11.3,15.829129,15.475,90.48153,15.514382,90.119872,0.697758,...,10.744722,25.534292,32.979875,218.052015,265.26415,1398.154108,10.825278,24.601827,31.9412,213.510957
5,19217,Jonathan Taylor IND (11),2023,13.7,15.240504,15.833333,85.404248,15.204439,84.29331,0.65638,...,11.217865,37.399782,47.5,279.861928,258.626317,1281.632514,9.979894,35.802166,46.76146,253.905015
8,18269,Josh Jacobs LV (13),2023,21.7,14.391077,14.85,78.707541,15.349271,80.103423,0.671905,...,9.817127,38.818303,48.664548,279.79371,275.202685,1229.529173,10.313258,43.80104,54.044266,313.992862
2,17240,Saquon Barkley NYG (13),2023,8.3,13.148604,12.05,54.627747,12.31912,57.691774,0.347252,...,3.703554,40.641853,57.04715,312.104104,179.309353,710.711869,4.277836,41.121427,57.078015,295.007917
23,16374,Dalvin Cook,2023,62.7,13.038122,17.25,87.60989,16.066667,84.474899,0.662592,...,12.952832,51.051693,66.500869,420.21368,291.946025,1357.230051,10.645647,44.255734,59.47481,348.459661
15,16420,Joe Mixon CIN (7),2023,38.3,12.703075,14.65,68.19375,14.783249,66.042227,0.547389,...,7.660729,45.094531,55.471615,329.487656,248.812349,976.318698,8.092183,48.376987,59.362281,352.85235
10,19302,Najee Harris PIT (6),2023,27.3,12.593674,13.7,65.705882,13.391169,64.868203,0.411765,...,5.641176,46.338235,59.232353,280.447059,225.678591,868.661058,5.514011,43.063673,55.126524,258.042174


In [36]:
pred_board.sort_values("AVG", ascending=True).head(20)

Unnamed: 0,pid,Player,Season,AVG,FPTS_TG_pred,FPTS/G_MISC_avg,YDS_RUSHING/G_avg,FPTS/G_MISC_ewm,YDS_RUSHING/G_ewm,TD_RUSHING/G_ewm,...,TD_RUSHING/G_avg_FP,REC_RECEIVING/G_avg_FP,TGT_RECEIVING/G_avg_FP,YDS_RECEIVING/G_avg_FP,ATT_RUSHING/G_ewm_FP,YDS_RUSHING/G_ewm_FP,TD_RUSHING/G_ewm_FP,REC_RECEIVING/G_ewm_FP,TGT_RECEIVING/G_ewm_FP,YDS_RECEIVING/G_ewm_FP
0,16393,Christian McCaffrey SF (9),2023,1.7,16.361995,21.8,72.957589,20.425414,70.215253,0.678104,...,17.535986,126.452976,149.430375,1117.617341,323.281638,1434.17559,13.850548,112.742825,133.999399,999.290027
1,16483,Austin Ekeler LAC (5),2023,4.0,18.987045,17.075,49.643382,17.63039,51.897749,0.551158,...,7.693166,93.140358,113.529917,790.873824,205.319159,914.977537,9.717123,97.221966,118.845876,779.094167
2,17240,Saquon Barkley NYG (13),2023,8.3,13.148604,12.05,54.627747,12.31912,57.691774,0.347252,...,3.703554,40.641853,57.04715,312.104104,179.309353,710.711869,4.277836,41.121427,57.078015,295.007917
4,17246,Nick Chubb CLE (5),2023,11.3,15.829129,15.475,90.48153,15.514382,90.119872,0.697758,...,10.744722,25.534292,32.979875,218.052015,265.26415,1398.154108,10.825278,24.601827,31.9412,213.510957
5,19217,Jonathan Taylor IND (11),2023,13.7,15.240504,15.833333,85.404248,15.204439,84.29331,0.65638,...,11.217865,37.399782,47.5,279.861928,258.626317,1281.632514,9.979894,35.802166,46.76146,253.905015
6,15514,Derrick Henry TEN (7),2023,17.0,17.121957,20.025,110.651042,19.909307,108.794905,1.017221,...,20.984531,33.541875,43.05375,325.322812,467.46685,2166.0312,20.252164,36.321265,45.598461,364.6878
7,18705,Tony Pollard DAL (7),2023,18.3,10.458201,8.975,42.097917,10.261691,47.311896,0.323714,...,2.42138,17.473203,23.194766,145.507188,93.670084,485.500045,3.321852,22.059075,29.363973,190.862708
8,18269,Josh Jacobs LV (13),2023,21.7,14.391077,14.85,78.707541,15.349271,80.103423,0.671905,...,9.817127,38.818303,48.664548,279.79371,275.202685,1229.529173,10.313258,43.80104,54.044266,313.992862
9,22726,Rhamondre Stevenson NE (11),2023,24.3,10.412266,10.8,55.838235,11.108831,56.754132,0.344879,...,3.838235,28.217647,36.052941,189.079412,131.384728,630.472067,3.831204,31.780732,40.587521,208.318555
10,19302,Najee Harris PIT (6),2023,27.3,12.593674,13.7,65.705882,13.391169,64.868203,0.411765,...,5.641176,46.338235,59.232353,280.447059,225.678591,868.661058,5.514011,43.063673,55.126524,258.042174


In [9]:
z = pd.read_csv("processed data/rb_proc_data.csv")

In [13]:
z[z.pid == 16673]["FPTS/G_MISC_ewm"]

22      6.300000
118    10.341926
229    13.856969
336    15.006294
448    15.268054
Name: FPTS/G_MISC_ewm, dtype: float64