### Summarized models for Fantasy Football Post Data Pulls and filling in new rows

In [270]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, BayesianRidge, ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
import time

In [None]:
#Here
df = pd.read_csv('data/master_df_w_2022.csv')

#Here
pass_adv_accuracy = pd.read_csv('data/adv_stats/pass_adv_accuracy_all.csv')
pass_adv_air_yards = pd.read_csv('data/adv_stats/pass_adv_air_yards_all.csv')
pass_adv_play_type = pd.read_csv('data/adv_stats/pass_adv_play_type_all.csv')
pass_adv_pressure = pd.read_csv('data/adv_stats/pass_adv_pressure_all.csv')
rush_adv = pd.read_csv('data/adv_stats/rush_adv_all.csv')
rec_adv = pd.read_csv('data/adv_stats/rec_adv_all.csv')

pass_adv_accuracy = pass_adv_accuracy[['Player','Year','Bats','ThAwy','Spikes','Drops','Drop%','BadTh','Bad%','OnTgt','OnTgt%']]
pass_adv_air_yards = pass_adv_air_yards[['Player','Year','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp']]
pass_adv_play_type = pass_adv_play_type[['Player','Year','RPO_Plays','RPO_Yds','RPO_PassAtt','RPO_PassYds','RPO_RushAtt','RPO_RushYds','PlayAction_PassAtt','PlayAction_PassYds']]
pass_adv_pressure = pass_adv_pressure[['Player','Year','Sk','PktTime','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr']]
rush_adv = rush_adv[['Player','Year','Rushing_YBC','Rushing_YBC/Att','Rushing_YAC','Rushing_YAC/Att','Rushing_BrkTkl','Rushing_Att/Br']]
rec_adv = rec_adv[['Player','Year','Receiving_YBC','Receiving_YBC/R','Receiving_YAC','Receiving_YAC/R','Receiving_ADOT','Receiving_BrkTkl','Receiving_Rec/Br','Receiving_Drop','Receiving_Drop%','Receiving_Int','Receiving_Rat']]

#Here
print(df.shape)
df_acc = df.merge(pass_adv_accuracy, how='left', on=['Player','Year'])
print(df_acc.shape)
df_air = df_acc.merge(pass_adv_air_yards, how='left', on=['Player','Year'])
print(df_air.shape)
df_play = df_air.merge(pass_adv_play_type, how='left', on=['Player','Year'])
print(df_play.shape)
df_press = df_play.merge(pass_adv_pressure, how='left', on=['Player','Year'])
print(df_press.shape)
df_rush_adv = df_press.merge(rush_adv, how='left', on=['Player','Year'])
print(df_rush_adv.shape)
df_rec_adv = df_rush_adv.merge(rec_adv, how='left', on=['Player','Year'])
print(df_rec_adv.shape)

df_rec_adv.to_csv('data/master_df_adv_w_2022.csv')

In [4]:
def three_year_avg(df, row, param):
    if row['Year'] - 3 >= 2012:
        player_df = df[df['Player'] == row['Player']]
        three_year_data = player_df[(player_df['Year'] >= (row['Year'] - 3)) & (player_df['Year'] <= (row['Year']-1))]
        return three_year_data[param].mean()
    else:
        return np.NaN

def three_year_stddev(df, row, param):
    if row['Year'] - 3 >= 2012:
        player_df = df[df['Player'] == row['Player']]
        three_year_data = player_df[(player_df['Year'] >= (row['Year'] - 3)) & (player_df['Year'] <= (row['Year']-1))]
        return three_year_data[param].std()
    else:
        return np.NaN


three_year = pd.read_csv('data/rolling_avg_data/three_year_avg_df_2023.csv')
three_year['3YearPrior_Avg_PPR'] = three_year.apply(lambda row: three_year_avg(three_year,row, 'Fantasy_PPR'),axis=1)
three_year['3YearPrior_StdDev_PPR'] = three_year.apply(lambda row: three_year_stddev(three_year,row, 'Fantasy_PPR'),axis=1)
three_year['3YearPrior_Avg_PPR'] = np.where(three_year['3YearPrior_Avg_PPR'].isna(), three_year['Fantasy_PPR'], three_year['3YearPrior_Avg_PPR'])
three_year['3YearPrior_StdDev_PPR'] = np.where(three_year['3YearPrior_StdDev_PPR'].isna(), 0, three_year['3YearPrior_StdDev_PPR'])
three_year.to_csv('three_year_avg_df_updated_2023.csv')

In [47]:
def predict_PPR(position, year_to_project, dataset):
    # Master Dataset
    master_df = None
    if dataset == 'yearly':
        master_df = pd.read_csv('data/master_df_adv_w_2022.csv')
    elif dataset == '3year':
        master_df = pd.read_csv('data/rolling_avg_data/three_year_avg_df_updated_2023.csv', encoding='unicode-escape')
    join_column = ""
    model = None

    # Separate Datasets by Position
    df = master_df[master_df['Position'] == position]

    if position == "QB":
        df = df[['Year','Age','Passing_TD%','Passing_Int%','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Rushing_Lng','Rushing_Y/A_y','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Bats','ThAwy','Spikes','Drop%','Bad%','OnTgt%','IAY/PA','CAY/Cmp','CAY/PA','YAC/Cmp','Sk','PktTime','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr','Fantasy_PPR']]
        join_column = 'Passing_TD%'
        model = BayesianRidge()
    elif position == "RB":
        df = df[['Year','Age','Rushing_Fumbles','Rushing_Lng','Rushing_Y/A_y','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Rushing_YBC/Att','Rushing_YAC/Att','Rushing_Att/Br','Fantasy_PPR']]
        join_column = 'Rushing_Lng'
        model = RandomForestRegressor()
    elif (position == "WR") or (position == "TE"):
        df = df[['Year','Age','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Receiving_YBC/R','Receiving_YAC/R','Receiving_ADOT','Receiving_Rec/Br','Receiving_Drop%','Receiving_Int','Receiving_Rat','Fantasy_PPR']]
        join_column = 'Receiving_Ctch%'
        if position == "WR":
            model = BayesianRidge()
        if position == "TE":
            model = LinearRegression()
    else:
        raise Exception("Sorry, this position is not supported currently. Please enter one of these four positions: (QB, RB, WR, TE)")

    # Train and test split for test being 2021 data
    df_train = df[(df['Year'] < 2022) & (df['Year'] > 2020)]
    df_test = df[df['Year'] == year_to_project]

    x_train = df_train.drop(['Year','Fantasy_PPR'], axis=1)
    scaler = StandardScaler()
    x_train.replace(np.nan, 0, inplace=True)
    scaler.fit(x_train)
    y_train = df_train[['Fantasy_PPR']]

    x_test = df_test.drop(['Year','Fantasy_PPR'], axis=1)
    scaler = StandardScaler()
    x_test.replace(np.nan,0, inplace=True)
    #x_2022 = df.apply(lambda x: x.fillna(x.median()),axis=0)
    scaler.fit(x_test)
    y_test = df_test[['Fantasy_PPR']]

    # ML Model using using fit and predict Fantasy Points
    model_name = type(model).__name__
    print('Predict '+ position + ' with a '+ model_name + ' model')
    if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
        model = model.fit(x_train,y_train)
    else:
        model = model.fit(x_train,y_train.values.ravel())
    y_preds = model.predict(x_test)
    
    mean_sq_err = None
    r2_err = None

    # Create new dataframe for projections
    player_point_proj = None
    if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
        player_point_proj = pd.DataFrame({'Age': x_test['Age'], join_column: x_test[join_column], 'Model_Projection_Points': y_preds[:,0], 'Actual_Points': y_test['Fantasy_PPR']})
    else:
        player_point_proj = pd.DataFrame({'Age': x_test['Age'], join_column: x_test[join_column], 'Model_Projection_Points': y_preds, 'Actual_Points': y_test['Fantasy_PPR']})
    # Merge player names and info back in
    player_point_proj_wnames = player_point_proj.merge(master_df[['Player', 'Age', 'Position', 'Year', join_column, 'Fantasy_PPR']], how='inner', left_on=['Age', join_column], right_on=['Age', join_column])
    player_point_proj_wnames = player_point_proj_wnames.drop(columns=['Fantasy_PPR'])

    # Calculate Model vs Actual Delta
    player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames['Model_Projection_Points'] - player_point_proj_wnames['Actual_Points']

    # Prep Dataframe for csv output
    player_point_proj_wnames = player_point_proj_wnames.sort_values(by='Model_Projection_Points', ascending=False)
    player_point_proj_wnames = player_point_proj_wnames[['Player', 'Position', 'Age', 'Year', 'Model_Projection_Points', 'Actual_Points', 'Model_v_Actual_Delta']]
    # Save dataframes

    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")
    df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year_to_project]
    adp = pd.read_csv('data/fantasy_draft_adp/draft_adp_2023.csv')
    adp = adp.rename({'Name': 'Player'}, axis=1)
    df_w_adp = df.merge(adp, how='left', on='Player')
    df_w_adp = df_w_adp[['Player','Position','Age','Year','Model_Projection_Points','#','StdDev','High','Low']]
    #filename = 'projections/'+str(position)+'/'+str(model_name)+'2022_projections_'+timestr+'.csv'
    filename = 'draft_proj_082723/'+str(position)+'/'+str(model_name)+'2023_projections_'+timestr+'.csv'
    df_w_adp = df_w_adp[df_w_adp['Position'] == position]
    df_w_adp.to_csv(filename)

    return df_w_adp


In [48]:
positions = ['QB','RB','WR','TE']
for position in positions:
    predict_PPR(position,2022,'3year')

Predict QB with a BayesianRidge model
Predict RB with a RandomForestRegressor model
Predict WR with a BayesianRidge model
Predict TE with a LinearRegression model


### Grab top 10 features for each and predict with those

In [57]:
qb_cols = ['3PriorYear_Avg_PPR','Passing_Lng','Age','Bltz','Prss','Prss%','Bad%','Hrry','ThAwy','Drop%','Passing_Int%','Passing_Sk%','YAC/Cmp']
rb_cols = ['3PriorYear_Avg_PPR','Age','Rushing_Lng','3PriorYear_StdDev_PPR','Rushing_Y/A_y','Rushing_Att/Br']
wr_cols = ['Age','Receiving_Int','Receiving_ADOT','3PriorYear_Avg_PPR','Receiving_Y/Tgt','Receiving_Ctch%','Receiving_Drop%','Receiving_Lng','Receiving_Rat']
te_cols = ['Receiving_Int','Age','Receiving_Y/Tgt','Receiving_ADOT','Receiving_YAC/R','Receiving_Drop%','3PriorYear_Avg_PPR']

In [60]:
def predict_PPR(position, year_to_project, dataset):
    # Master Dataset
    master_df = None
    if dataset == 'yearly':
        master_df = pd.read_csv('data/master_df_adv_w_2022.csv')
    elif dataset == '3year':
        master_df = pd.read_csv('data/rolling_avg_data/three_year_avg_df_updated_2023.csv', encoding='unicode-escape')
    join_column = ""
    model = None

    # Separate Datasets by Position
    df = master_df[master_df['Position'] == position]

    if position == "QB":
        df = df[['Year','3YearPrior_Avg_PPR','Passing_Lng','Age','Bltz','Prss','Prss%','Bad%','Hrry','ThAwy','Drop%','Passing_Int%','Passing_Sk%','YAC/Cmp','Fantasy_PPR']]
        join_column = 'Passing_Int%'
        model = BayesianRidge()
    elif position == "RB":
        df = df[['Year','3YearPrior_Avg_PPR','Age','Rushing_Lng','3YearPrior_StdDev_PPR','Rushing_Y/A_y','Rushing_Att/Br','Fantasy_PPR']]
        join_column = 'Rushing_Lng'
        model = RandomForestRegressor()
    elif position == "WR":
        df = df[['Year','Age','Receiving_Int','Receiving_ADOT','3YearPrior_Avg_PPR','Receiving_Y/Tgt','Receiving_Ctch%','Receiving_Drop%','Receiving_Lng','Receiving_Rat','Fantasy_PPR']]
        join_column = 'Receiving_Ctch%'
        model = BayesianRidge()
    elif position == "TE":
        df = df[['Year','Receiving_Int','Age','Receiving_Y/Tgt','Receiving_ADOT','Receiving_YAC/R','Receiving_Drop%','3YearPrior_Avg_PPR','Fantasy_PPR']]
        join_column = 'Receiving_ADOT'
        model = LinearRegression()
    else:
        raise Exception("Sorry, this position is not supported currently. Please enter one of these four positions: (QB, RB, WR, TE)")

    # Train and test split for test being 2021 data
    df_train = df[(df['Year'] < 2022) & (df['Year'] > 2019)]
    df_test = df[df['Year'] == year_to_project]

    x_train = df_train.drop(['Year','Fantasy_PPR'], axis=1)
    scaler = StandardScaler()
    x_train.replace(np.nan, 0, inplace=True)
    scaler.fit(x_train)
    y_train = df_train[['Fantasy_PPR']]

    x_test = df_test.drop(['Year','Fantasy_PPR'], axis=1)
    scaler = StandardScaler()
    x_test.replace(np.nan,0, inplace=True)
    #x_2022 = df.apply(lambda x: x.fillna(x.median()),axis=0)
    scaler.fit(x_test)
    y_test = df_test[['Fantasy_PPR']]

    # ML Model using using fit and predict Fantasy Points
    model_name = type(model).__name__
    print('Predict '+ position + ' with a '+ model_name + ' model')
    if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
        model = model.fit(x_train,y_train)
    else:
        model = model.fit(x_train,y_train.values.ravel())
    y_preds = model.predict(x_test)
    
    mean_sq_err = None
    r2_err = None

    # Create new dataframe for projections
    player_point_proj = None
    if (model_name == 'LinearRegression') or (model_name == 'Ridge'):
        player_point_proj = pd.DataFrame({'Age': x_test['Age'], join_column: x_test[join_column], 'Model_Projection_Points': y_preds[:,0], 'Actual_Points': y_test['Fantasy_PPR']})
    else:
        player_point_proj = pd.DataFrame({'Age': x_test['Age'], join_column: x_test[join_column], 'Model_Projection_Points': y_preds, 'Actual_Points': y_test['Fantasy_PPR']})
    # Merge player names and info back in
    player_point_proj_wnames = player_point_proj.merge(master_df[['Player', 'Age', 'Position', 'Year', join_column, 'Fantasy_PPR']], how='inner', left_on=['Age', join_column], right_on=['Age', join_column])
    player_point_proj_wnames = player_point_proj_wnames.drop(columns=['Fantasy_PPR'])

    # Calculate Model vs Actual Delta
    player_point_proj_wnames['Model_v_Actual_Delta'] = player_point_proj_wnames['Model_Projection_Points'] - player_point_proj_wnames['Actual_Points']

    # Prep Dataframe for csv output
    player_point_proj_wnames = player_point_proj_wnames.sort_values(by='Model_Projection_Points', ascending=False)
    player_point_proj_wnames = player_point_proj_wnames[['Player', 'Position', 'Age', 'Year', 'Model_Projection_Points', 'Actual_Points', 'Model_v_Actual_Delta']]
    # Save dataframes

    import time
    timestr = time.strftime("%Y%m%d-%H%M%S")
    df = player_point_proj_wnames[player_point_proj_wnames["Year"] == year_to_project]
    adp = pd.read_csv('data/fantasy_draft_adp/draft_adp_2023.csv')
    adp = adp.rename({'Name': 'Player'}, axis=1)
    df_w_adp = df.merge(adp, how='left', on='Player')
    df_w_adp = df_w_adp[['Player','Position','Age','Year','Model_Projection_Points','#','StdDev','High','Low']]
    #filename = 'projections/'+str(position)+'/'+str(model_name)+'2022_projections_'+timestr+'.csv'
    filename = 'draft_proj_082723/'+str(position)+'/'+str(model_name)+'2023_projections_'+timestr+'.csv'
    df_w_adp = df_w_adp[df_w_adp['Position'] == position]
    df_w_adp.to_csv(filename)

    return df_w_adp


positions = ['QB','RB','WR','TE']
for position in positions:
    predict_PPR(position,2022,'3year')

## YOLO

### Import Three Year Data

In [124]:
import pandas as pd

master_df = pd.read_csv('data/rolling_avg_data/three_year_avg_df_updated_2023.csv', encoding='unicode-escape')

### Train and Test models to find best Performing regressor model

In [195]:
passing_data = master_df[['Player','Year','Age','Passing_Cmp','Passing_Att','Passing_Yds','Passing_TD','Passing_Int','Passing_TD%','Passing_Int%','Passing_1D','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Y/G','Passing_Rate','Passing_QBR','Passing_Sk','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Bats','ThAwy','Spikes','Drops','Drop%','BadTh','Bad%','OnTgt','OnTgt%','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','RPO_Plays','RPO_Yds','RPO_PassAtt','RPO_PassYds','RPO_RushAtt','RPO_RushYds','PlayAction_PassAtt','PlayAction_PassYds','Sk','PktTime','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Fantasy_PPR']]
rushing_data = master_df[['Player','Year','Age','Rushing_Att','Rushing_Yds','Rushing_Y/A_x','Rushing_TD','Rushing_1D','Rushing_Lng','Rushing_Y/A_y','Rushing_Y/G','Rushing_Fumbles','Rushing_YBC','Rushing_YBC/Att','Rushing_YAC','Rushing_YAC/Att','Rushing_BrkTkl','Rushing_Att/Br','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Fantasy_PPR']]
receiving_data = master_df[['Player','Year','Age','Receiving_Yds','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt','Receiving_R/G','Receiving_YBC','Receiving_YBC/R','Receiving_YAC','Receiving_YAC/R','Receiving_ADOT','Receiving_BrkTkl','Receiving_Rec/Br','Receiving_Drop','Receiving_Drop%','Receiving_Int','Receiving_Rat','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Fantasy_PPR']]

passing_training_data = passing_data[(passing_data['Year'] >= 2019) & (passing_data['Year'] <= 2021)]
rushing_training_data = rushing_data[(rushing_data['Year'] >= 2019) & (rushing_data['Year'] <= 2021)]
wr_receiving_training_data = receiving_data[(receiving_data['Year'] >= 2019) & (receiving_data['Year'] <= 2021)]
te_receiving_training_data = receiving_data[(receiving_data['Year'] >= 2019) & (receiving_data['Year'] <= 2021)]

passing_test_data = passing_data[passing_data['Year'] >= 2022]
rushing_test_data = rushing_data[rushing_data['Year'] >= 2022]
wr_receiving_test_data = receiving_data[receiving_data['Year'] >= 2022]
te_receiving_test_data = receiving_data[receiving_data['Year'] >= 2022]

passing_training_data_x = passing_training_data[['3YearPrior_Avg_PPR','Passing_Lng','Age','Bltz','Prss','Prss%','Bad%','Hrry','ThAwy','Drop%','Passing_Int%','Passing_Sk%','YAC/Cmp']]
rushing_training_data_x = rushing_training_data[['3YearPrior_Avg_PPR','Age','Rushing_Lng','3YearPrior_StdDev_PPR','Rushing_Y/A_y','Rushing_Att/Br']]
wr_receiving_training_data_x = wr_receiving_training_data[['Age','Receiving_Int','Receiving_ADOT','3YearPrior_Avg_PPR','Receiving_Y/Tgt','Receiving_Ctch%','Receiving_YAC/R','Receiving_Drop%','Receiving_Lng','Receiving_Rat']]
te_receiving_training_data_x = te_receiving_training_data[['Receiving_Int','Age','Receiving_Y/Tgt','Receiving_ADOT','Receiving_YAC/R','Receiving_Drop%','3YearPrior_Avg_PPR']]

passing_training_data_y = passing_training_data[['Passing_Yds']]
rushing_training_data_y = rushing_training_data[['Rushing_Yds']]
wr_receiving_training_data_y = wr_receiving_training_data[['Receiving_Yds']]
te_receiving_training_data_y = te_receiving_training_data[['Receiving_Yds']]

passing_test_data_x = passing_test_data[['3YearPrior_Avg_PPR','Passing_Lng','Age','Bltz','Prss','Prss%','Bad%','Hrry','ThAwy','Drop%','Passing_Int%','Passing_Sk%','YAC/Cmp']]
rushing_test_data_x = rushing_test_data[['3YearPrior_Avg_PPR','Age','Rushing_Lng','3YearPrior_StdDev_PPR','Rushing_Y/A_y','Rushing_Att/Br']]
wr_receiving_test_data_x = wr_receiving_test_data[['Age','Receiving_Int','Receiving_ADOT','3YearPrior_Avg_PPR','Receiving_Y/Tgt','Receiving_Ctch%','Receiving_YAC/R','Receiving_Drop%','Receiving_Lng','Receiving_Rat']]
te_receiving_test_data_x = te_receiving_test_data[['Receiving_Int','Age','Receiving_Y/Tgt','Receiving_ADOT','Receiving_YAC/R','Receiving_Drop%','3YearPrior_Avg_PPR']]

passing_test_data_y = passing_test_data[['Passing_Yds']]
rushing_test_data_y = rushing_test_data[['Rushing_Yds']]
wr_receiving_test_data_y = wr_receiving_test_data[['Receiving_Yds']]
te_receiving_test_data_y = te_receiving_test_data[['Receiving_Yds']]

passing_training_data_x = passing_training_data_x.replace('§', 0)
rushing_training_data_x = rushing_training_data_x.replace('§', 0)
wr_receiving_training_data_x = wr_receiving_training_data_x.replace('§', 0)
te_receiving_training_data_x = te_receiving_training_data_x.replace('§', 0)
passing_training_data_y = passing_training_data_y.replace('§', 0)
rushing_training_data_y = rushing_training_data_y.replace('§', 0)
wr_receiving_training_data_y = wr_receiving_training_data_y.replace('§', 0)
te_receiving_training_data_y = te_receiving_training_data_y.replace('§', 0)
passing_test_data_x = passing_test_data_x.replace('§', 0)
rushing_test_data_x = rushing_test_data_x.replace('§', 0)
wr_receiving_test_data_x = wr_receiving_test_data_x.replace('§', 0)
te_receiving_test_data_x = te_receiving_test_data_x.replace('§', 0)
passing_test_data_y = passing_test_data_y.replace('§', 0)
rushing_test_data_y = rushing_test_data_y.replace('§', 0)
wr_receiving_test_data_y = wr_receiving_test_data_y.replace('§', 0)
te_receiving_test_data_y = te_receiving_test_data_y.replace('§', 0)

# Scale Data
scaler = MinMaxScaler()

passing_training_data_x_scaled = scaler.fit_transform(passing_training_data_x)
rushing_training_data_x_scaled = scaler.fit_transform(rushing_training_data_x)
wr_receiving_training_data_x_scaled = scaler.fit_transform(wr_receiving_training_data_x)
te_receiving_training_data_x_scaled = scaler.fit_transform(te_receiving_training_data_x)

passing_training_data_y_scaled = scaler.fit_transform(passing_training_data_y)
rushing_training_data_y_scaled = scaler.fit_transform(rushing_training_data_y)
wr_receiving_training_data_y_scaled = scaler.fit_transform(wr_receiving_training_data_y)
te_receiving_training_data_y_scaled = scaler.fit_transform(te_receiving_training_data_y)

passing_test_data_x_scaled = scaler.fit_transform(passing_test_data_x)
rushing_test_data_x_scaled = scaler.fit_transform(rushing_test_data_x)
wr_receiving_test_data_x_scaled = scaler.fit_transform(wr_receiving_test_data_x)
te_receiving_test_data_x_scaled = scaler.fit_transform(te_receiving_test_data_x)

passing_test_data_y_scaled = scaler.fit_transform(passing_test_data_y)
rushing_test_data_y_scaled = scaler.fit_transform(rushing_test_data_y)
wr_receiving_test_data_y_scaled = scaler.fit_transform(wr_receiving_test_data_y)
te_receiving_test_data_y_scaled = scaler.fit_transform(te_receiving_test_data_y)

# Revert back to Dataframes w/ column names
passing_training_data_x_df = pd.DataFrame(data=passing_training_data_x_scaled,columns=passing_training_data_x.columns)
rushing_training_data_x_df = pd.DataFrame(data=rushing_training_data_x_scaled,columns=rushing_training_data_x.columns)
wr_receiving_training_data_x_df = pd.DataFrame(data=wr_receiving_training_data_x_scaled,columns=wr_receiving_training_data_x.columns)
te_receiving_training_data_x_df = pd.DataFrame(data=te_receiving_training_data_x_scaled,columns=te_receiving_training_data_x.columns)

passing_training_data_y_df = pd.DataFrame(data=passing_training_data_y_scaled,columns=passing_training_data_y.columns)
rushing_training_data_y_df = pd.DataFrame(data=rushing_training_data_y_scaled,columns=rushing_training_data_y.columns)
wr_receiving_training_data_y_df = pd.DataFrame(data=wr_receiving_training_data_y_scaled,columns=wr_receiving_training_data_y.columns)
te_receiving_training_data_y_df = pd.DataFrame(data=te_receiving_training_data_y_scaled,columns=te_receiving_training_data_y.columns)

passing_test_data_x_df = pd.DataFrame(data=passing_test_data_x_scaled,columns=passing_test_data_x.columns)
rushing_test_data_x_df = pd.DataFrame(data=rushing_test_data_x_scaled,columns=rushing_test_data_x.columns)
wr_receiving_test_data_x_df = pd.DataFrame(data=wr_receiving_test_data_x_scaled,columns=wr_receiving_test_data_x.columns)
te_receiving_test_data_x_df = pd.DataFrame(data=te_receiving_test_data_x_scaled,columns=te_receiving_test_data_x.columns)

passing_test_data_y_df = pd.DataFrame(data=passing_test_data_y_scaled,columns=passing_test_data_y.columns)
rushing_test_data_y_df = pd.DataFrame(data=rushing_test_data_y_scaled,columns=rushing_test_data_y.columns)
wr_receiving_test_data_y_df = pd.DataFrame(data=wr_receiving_test_data_y_scaled,columns=wr_receiving_test_data_y.columns)
te_receiving_test_data_y_df = pd.DataFrame(data=te_receiving_test_data_y_scaled,columns=te_receiving_test_data_y.columns)

# Passing Model predicting passing yds
model_objs = [LinearRegression(),BayesianRidge(),ElasticNet(),MLPRegressor(),Ridge(),Lasso(),KNeighborsRegressor()]
pass_r2_scores = []
pass_mae_scores = []
for model_obj in model_objs:
    passing_training_data_x_df = passing_training_data_x_df.fillna(0)
    passing_training_data_y_df = passing_training_data_y_df.fillna(0)
    passing_test_data_x_df = passing_test_data_x_df.fillna(0)
    passing_test_data_y_df = passing_test_data_y_df.fillna(0)
    model = model_obj.fit(passing_training_data_x_df, passing_training_data_y_df)
    y_preds = model.predict(passing_test_data_x_df)
    r2 = r2_score(passing_test_data_y_df.values, y_preds)
    pass_r2_scores.append(r2)
    mae = mean_absolute_error(passing_test_data_y_df.values, y_preds)
    pass_mae_scores.append(mae)
print(pass_r2_scores)
print(pass_mae_scores)

# Rushing Model predicting rushing yds
model_objs = [LinearRegression(),BayesianRidge(),ElasticNet(),MLPRegressor(),Ridge(),Lasso(),KNeighborsRegressor()]
rush_r2_scores = []
rush_mae_scores = []
for model_obj in model_objs:
    rushing_training_data_x_df = rushing_training_data_x_df.fillna(0)
    rushing_training_data_y_df = rushing_training_data_y_df.fillna(0)
    rushing_test_data_x_df = rushing_test_data_x_df.fillna(0)
    rushing_test_data_y_df = rushing_test_data_y_df.fillna(0)
    model = model_obj.fit(rushing_training_data_x_df, rushing_training_data_y_df)
    y_preds = model.predict(rushing_test_data_x_df)
    r2 = r2_score(rushing_test_data_y_df.values, y_preds)
    rush_r2_scores.append(r2)
    mae = mean_absolute_error(rushing_test_data_y_df.values, y_preds)
    rush_mae_scores.append(mae)
print(rush_r2_scores)
print(rush_mae_scores)

# Receiving Model predicting receiving yds
model_objs = [LinearRegression(),BayesianRidge(),ElasticNet(),MLPRegressor(),Ridge(),Lasso(),KNeighborsRegressor()]
catch_r2_scores = []
catch_mae_scores = []
for model_obj in model_objs:
    wr_receiving_training_data_x_df = wr_receiving_training_data_x_df.fillna(0)
    wr_receiving_training_data_y_df = wr_receiving_training_data_y_df.fillna(0)
    wr_receiving_test_data_x_df = wr_receiving_test_data_x_df.fillna(0)
    wr_receiving_test_data_y_df = wr_receiving_test_data_y_df.fillna(0)
    model = model_obj.fit(wr_receiving_training_data_x_df, wr_receiving_training_data_y_df)
    y_preds = model.predict(wr_receiving_test_data_x_df)
    r2 = r2_score(wr_receiving_test_data_y_df.values, y_preds)
    catch_r2_scores.append(r2)
    mae = mean_absolute_error(wr_receiving_test_data_y_df.values, y_preds)
    catch_mae_scores.append(mae)
print(catch_r2_scores)
print(catch_mae_scores)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0.9217863157266866, 0.9222533243521033, -0.00026325986028052917, 0.8701192089302646, 0.9203066463774178, -0.00026325986028052917, 0.8938190235094772]
[0.018275381245098213, 0.01829066679415409, 0.078210161377703, 0.018817313466291664, 0.019287572504878003, 0.078210161377703, 0.013895122176860111]


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[0.7323061024637045, 0.7326137598404163, -0.0001456813460953832, 0.7843244975744665, 0.7405375807646688, -0.0001456813460953832, 0.736057110908571]
[0.045943812879882466, 0.04590498427415812, 0.10238615689165434, 0.03564542447810389, 0.04471731520449978, 0.10238615689165434, 0.03484617402668358]
[0.4130462577422396, 0.41449187463318904, -0.007234220862295748, 0.41880144479805703, 0.42897583895947644, -0.007234220862295748, 0.5332462228154455]
[0.09068745954382847, 0.09060096451377701, 0.13169913964692426, 0.08036587953586793, 0.08988889010933102, 0.13169913964692426, 0.06772057820620325]


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


### 2023 Predictions

In [273]:
passing_data = master_df[['Player','Year','Age','Passing_Cmp','Passing_Att','Passing_Yds','Passing_TD','Passing_Int','Passing_TD%','Passing_Int%','Passing_1D','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Y/G','Passing_Rate','Passing_QBR','Passing_Sk','Passing_Sk%','Passing_NY/A','Passing_ANY/A','Bats','ThAwy','Spikes','Drops','Drop%','BadTh','Bad%','OnTgt','OnTgt%','IAY','IAY/PA','CAY','CAY/Cmp','CAY/PA','YAC','YAC/Cmp','RPO_Plays','RPO_Yds','RPO_PassAtt','RPO_PassYds','RPO_RushAtt','RPO_RushYds','PlayAction_PassAtt','PlayAction_PassYds','Sk','PktTime','Bltz','Hrry','Hits','Prss','Prss%','Scrm','Yds/Scr','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Fantasy_PPR']]
rushing_data = master_df[['Player','Year','Age','Rushing_Att','Rushing_Yds','Rushing_Y/A_x','Rushing_TD','Rushing_1D','Rushing_Lng','Rushing_Y/A_y','Rushing_Y/G','Rushing_Fumbles','Rushing_YBC','Rushing_YBC/Att','Rushing_YAC','Rushing_YAC/Att','Rushing_BrkTkl','Rushing_Att/Br','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Fantasy_PPR']]
receiving_data = master_df[['Player','Year','Age','Receiving_Yds','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt','Receiving_R/G','Receiving_YBC','Receiving_YBC/R','Receiving_YAC','Receiving_YAC/R','Receiving_ADOT','Receiving_BrkTkl','Receiving_Rec/Br','Receiving_Drop','Receiving_Drop%','Receiving_Int','Receiving_Rat','3YearPrior_Avg_PPR','3YearPrior_StdDev_PPR','Fantasy_PPR']]

passing_predict_data = passing_data[(passing_data['Year'] >= 2020) & (passing_data['Year'] <= 2022)]
rushing_predict_data = rushing_data[(rushing_data['Year'] >= 2020) & (rushing_data['Year'] <= 2022)]
wr_receiving_predict_data = receiving_data[(receiving_data['Year'] >= 2020) & (receiving_data['Year'] <= 2022)]
te_receiving_predict_data = receiving_data[(receiving_data['Year'] >= 2020) & (receiving_data['Year'] <= 2022)]

passing_predict_data_x = passing_predict_data[['3YearPrior_Avg_PPR','Passing_Lng','Age','Bltz','Prss','Prss%','Bad%','Hrry','ThAwy','Drop%','Passing_Int%','Passing_Sk%','YAC/Cmp']]
rushing_predict_data_x = rushing_predict_data[['3YearPrior_Avg_PPR','Age','Rushing_Lng','3YearPrior_StdDev_PPR','Rushing_Y/A_y','Rushing_Att/Br']]
wr_receiving_predict_data_x = wr_receiving_predict_data[['Age','Receiving_Int','Receiving_ADOT','3YearPrior_Avg_PPR','Receiving_Y/Tgt','Receiving_Ctch%','Receiving_YAC/R','Receiving_Drop%','Receiving_Lng','Receiving_Rat']]
te_receiving_predict_data_x = te_receiving_predict_data[['Receiving_Int','Age','Receiving_Y/Tgt','Receiving_ADOT','Receiving_YAC/R','Receiving_Drop%','3YearPrior_Avg_PPR']]


passing_predict_data_x = passing_predict_data_x.replace('§', 0)
rushing_predict_data_x = rushing_predict_data_x.replace('§', 0)
wr_receiving_predict_data_x = wr_receiving_predict_data_x.replace('§', 0)
te_receiving_predict_data_x = te_receiving_predict_data_x.replace('§', 0)

# Scale Data
scaler = MinMaxScaler()

passing_predict_data_x_scaled = scaler.fit_transform(passing_predict_data_x)
rushing_predict_data_x_scaled = scaler.fit_transform(rushing_predict_data_x)
wr_receiving_predict_data_x_scaled = scaler.fit_transform(wr_receiving_predict_data_x)
te_receiving_predict_data_x_scaled = scaler.fit_transform(te_receiving_predict_data_x)

# Revert back to Dataframes w/ column names
passing_predict_data_x_df = pd.DataFrame(data=passing_predict_data_x_scaled,columns=passing_predict_data_x.columns)
rushing_predict_data_x_df = pd.DataFrame(data=rushing_predict_data_x_scaled,columns=rushing_predict_data_x.columns)
wr_receiving_predict_data_x_df = pd.DataFrame(data=wr_receiving_predict_data_x_scaled,columns=wr_receiving_predict_data_x.columns)
te_receiving_predict_data_x_df = pd.DataFrame(data=te_receiving_predict_data_x_scaled,columns=te_receiving_predict_data_x.columns)

# Passing Model predicting passing yds
model_obj = KNeighborsRegressor()
passing_predict_data_x_df = passing_predict_data_x_df.fillna(0)
pass_model = model_obj.fit(passing_training_data_x_df, passing_training_data_y_df)
pass_y_preds = pass_model.predict(passing_predict_data_x_df)

# Rushing Model predicting passing yds
model_obj = KNeighborsRegressor()
rushing_predict_data_x_df = rushing_predict_data_x_df.fillna(0)
rush_model = model_obj.fit(rushing_training_data_x_df, rushing_training_data_y_df)
rush_y_preds = rush_model.predict(rushing_predict_data_x_df)

# Receiving Model predicting passing yds
model_obj = MLPRegressor()
wr_receiving_predict_data_x_df = wr_receiving_predict_data_x_df.fillna(0)
catch_model = model_obj.fit(wr_receiving_training_data_x_df, wr_receiving_training_data_y_df)
catch_y_preds = catch_model.predict(wr_receiving_predict_data_x_df)

# Report predictions with player names
pass_preds_df = pd.DataFrame(data=pass_y_preds, columns=['passing_yds_score'])
pass_preds_df['Player'] = passing_predict_data['Player'].values
pass_preds_df['Year'] = passing_predict_data['Year'].values
pass_preds_df['Age'] = passing_predict_data['Age'].values
pass_preds_df = pass_preds_df[pass_preds_df['Year'] == 2022]

rush_preds_df = pd.DataFrame(data=rush_y_preds, columns=['rush_yds_score'])
rush_preds_df['Player'] = rushing_predict_data['Player'].values
rush_preds_df['Year'] = rushing_predict_data['Year'].values
rush_preds_df['Age'] = rushing_predict_data['Age'].values
rush_preds_df = rush_preds_df[rush_preds_df['Year'] == 2022]

catch_preds_df = pd.DataFrame(data=catch_y_preds, columns=['catch_yds_score'])
catch_preds_df['Player'] = wr_receiving_predict_data['Player'].values
catch_preds_df['Year'] = wr_receiving_predict_data['Year'].values
catch_preds_df['Age'] = wr_receiving_predict_data['Age'].values
catch_preds_df = catch_preds_df[catch_preds_df['Year'] == 2022]

prediction_df = pass_preds_df.merge(rush_preds_df[['Player','rush_yds_score']],on='Player',how='left')
prediction_df = prediction_df.merge(catch_preds_df[['Player','catch_yds_score']],on='Player',how='left')
prediction_df = prediction_df.merge(master_df[['Player','Position']],on='Player',how='left')
prediction_df = prediction_df.drop_duplicates()

qb_preds = prediction_df[prediction_df['Position'] == 'QB']
rb_preds = prediction_df[prediction_df['Position'] == 'RB']
wr_preds = prediction_df[prediction_df['Position'] == 'WR']
te_preds = prediction_df[prediction_df['Position'] == 'TE']

qb_preds['ovr_score'] = qb_preds['passing_yds_score'] + qb_preds['rush_yds_score']
rb_preds['ovr_score'] = rb_preds['catch_yds_score'] + rb_preds['rush_yds_score']
wr_preds['ovr_score'] = wr_preds['catch_yds_score']
te_preds['ovr_score'] = te_preds['catch_yds_score']

qb_preds = qb_preds[['Player','Position','Age','ovr_score','passing_yds_score','rush_yds_score']]
qb_preds = qb_preds.sort_values('ovr_score', ascending=False)

rb_preds = rb_preds[['Player','Position','Age','ovr_score','rush_yds_score','catch_yds_score']]
rb_preds = rb_preds.sort_values('ovr_score', ascending=False)

wr_preds = wr_preds[['Player','Position','Age','ovr_score','catch_yds_score']]
wr_preds = wr_preds.sort_values('ovr_score', ascending=False)

te_preds = te_preds[['Player','Position','Age','ovr_score','catch_yds_score']]
te_preds = te_preds.sort_values('ovr_score', ascending=False)

timestr = time.strftime("%Y%m%d-%H%M%S")
adp = pd.read_csv('data/fantasy_draft_adp/draft_adp_2023.csv')
adp = adp.rename({'Name': 'Player'}, axis=1)
qb_w_adp = qb_preds.merge(adp[['Player','Overall','High','Low']], how='left', on='Player')
rb_w_adp = rb_preds.merge(adp[['Player','Overall','High','Low']], how='left', on='Player')
wr_w_adp = wr_preds.merge(adp[['Player','Overall','High','Low']], how='left', on='Player')
te_w_adp = te_preds.merge(adp[['Player','Overall','High','Low']], how='left', on='Player')
qb_filename = 'draft_proj_new_083023/QB/2023_projections_'+timestr+'.csv'
rb_filename = 'draft_proj_new_083023/RB/2023_projections_'+timestr+'.csv'
wr_filename = 'draft_proj_new_083023/WR/2023_projections_'+timestr+'.csv'
te_filename = 'draft_proj_new_083023/TE/2023_projections_'+timestr+'.csv'
qb_w_adp.to_csv(qb_filename)
rb_w_adp.to_csv(rb_filename)
wr_w_adp.to_csv(wr_filename)
te_w_adp.to_csv(te_filename)

  y = column_or_1d(y, warn=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qb_preds['ovr_score'] = qb_preds['passing_yds_score'] + qb_preds['rush_yds_score']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rb_preds['ovr_score'] = rb_preds['catch_yds_score'] + rb_preds['rush_yds_score']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wr_preds

### Query against 2022 data using model observations from last year

In [282]:
qbs = master_df[master_df['Position'] == "QB"]
qbs = qbs[qbs['Year'] == 2022]
qbs = qbs[qbs['Passing_Int%'] <= 3]
qbs = qbs[qbs['Passing_Lng'] >= 50]
qbs = qbs[qbs['Passing_Y/A'] >= 7]
qbs = qbs[qbs['Passing_AY/A'] >= 7]
qbs = qbs[qbs['Passing_Y/C'] >= 11]
qbs = qbs[qbs['Passing_Sk%'] < 7]
qbs = qbs[qbs['Passing_NY/A'] > 6.5]
qbs = qbs[qbs['Passing_ANY/A'] > 5.5]

qbs = qbs[['Rank','Player','Position','Year','Age','Passing_Int%','Passing_Lng','Passing_Y/A','Passing_AY/A','Passing_Y/C','Passing_Sk%','Passing_NY/A','Passing_ANY/A']]

In [279]:
rbs = master_df[master_df['Position'] == "RB"]
rbs = rbs[rbs['Year'] == 2022]
rbs = rbs[rbs['Age'] <= 28]
rbs = rbs[rbs['Rushing_1D'] >= 40]
rbs = rbs[rbs['Rushing_Lng'] >= 25]
rbs = rbs[rbs['Rushing_Y/A_x'] >= 4]
rbs = rbs[rbs['Receiving_Ctch%'] > 0.75]
rbs = rbs[rbs['Receiving_Lng'] > 15]
rbs = rbs[rbs['Receiving_Y/Tgt'] > 5]

rbs = rbs[['Rank','Player','Position','Year','Age','Rushing_1D','Rushing_Lng','Rushing_Y/A_x','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt']]

In [284]:
wrs = master_df[master_df['Position'] == "WR"]
wrs = wrs[wrs['Year'] == 2022]
wrs = wrs[wrs['Age'] < 30]
wrs = wrs[wrs['Receiving_Ctch%'] > 0.50]
wrs = wrs[wrs['Receiving_Lng'] > 50]
wrs = wrs[wrs['Receiving_Y/Tgt'] > 6.5]
wrs = wrs[wrs['40yd'] <= 4.6]
wrs = wrs[wrs['Shuttle'] <= 4.3]

wrs = wrs[['Rank','Player','Position','Year','Age','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt','40yd','Shuttle']]

In [309]:
tes = master_df[master_df['Position'] == "TE"]
tes = tes[tes['Year'] == 2022]
tes = tes[tes['Age'] < 35]
tes = tes[tes['Receiving_Ctch%'] > 0.60]
tes = tes[tes['Receiving_Lng'] > 30]
tes = tes[tes['Receiving_Y/Tgt'] > 6.5]
tes = tes[tes['Ht'] >= 6.41]
tes = tes[tes['Wt'] >= 240]
tes = tes[tes['Wt'] <= 260]
tes = tes[tes['40yd'] <= 4.8]
tes = tes[tes['Shuttle'] <= 4.5]
#tes = tes[tes['Vertical'] >= 30]
#tes = tes[tes['3Cone'] <= 7.2]

tes = tes[['Rank','Player','Position','Year','Age','Receiving_Ctch%','Receiving_Lng','Receiving_Y/Tgt','Ht','Wt','40yd','Shuttle','Vertical','3Cone']]

In [311]:
qbs.to_csv('draft_proj_new_083023/QB/qb_star_criteria.csv')
rbs.to_csv('draft_proj_new_083023/RB/rb_star_criteria.csv')
wrs.to_csv('draft_proj_new_083023/WR/wr_star_criteria.csv')
tes.to_csv('draft_proj_new_083023/TE/te_star_criteria.csv')

### Next Steps - go back to train and predict models to see if any further tuning or other model attempts are worth it