In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import math

# Training model with all seasons data - Optimized

In [2]:
all_seasons_data = pd.read_csv('training_data.csv')
all_seasons_data

Unnamed: 0,name,team,position,season,games,receptions,targets,receiving_yards,rec_ypg,receiving_tds,...,sacks,sack_fumbles,offense_snaps,teams_offense_snaps,round,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr
0,A.J. Brown,TEN,WR,2019,16,52,84,1051.0,65.69,8,...,0.0,0,678.0,997.0,2.0,51.0,Nissan Stadium,moderate,Grass,217.10
1,A.J. Brown,TEN,WR,2020,14,70,106,1075.0,76.79,11,...,0.0,0,760.0,945.0,2.0,51.0,Nissan Stadium,moderate,Grass,247.50
2,A.J. Brown,TEN,WR,2021,13,63,105,869.0,66.85,5,...,0.0,0,604.0,876.0,2.0,51.0,Nissan Stadium,moderate,Grass,180.90
3,A.J. Brown,PHI,WR,2022,17,88,145,1496.0,88.00,11,...,0.0,0,1004.0,1189.0,2.0,51.0,Lincoln Financial Field,cold,Grass,299.60
4,A.J. Brown,PHI,WR,2023,17,106,158,1456.0,85.65,7,...,0.0,0,1019.0,1154.0,2.0,51.0,Lincoln Financial Field,cold,Grass,289.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2101,Zay Jones,BUF,WR,2018,16,56,102,652.0,40.75,7,...,0.0,0,941.0,1063.0,2.0,37.0,New Era Field,cold,FieldTurf,165.20
2102,Zay Jones,LV,WR,2020,11,14,20,154.0,14.00,1,...,0.0,0,248.0,771.0,2.0,37.0,Allegiant Stadium,indoor,Grass,36.56
2103,Zay Jones,LV,WR,2021,15,47,70,546.0,36.40,1,...,0.0,0,614.0,1026.0,2.0,37.0,Allegiant Stadium,indoor,Grass,105.90
2104,Zay Jones,JAX,WR,2022,16,82,121,823.0,51.44,5,...,0.0,0,930.0,1082.0,2.0,37.0,TIAA Bank Field,warm,Grass,198.10


In [3]:
# Adding new features to training data
# Whatever you add into this cell for a "new feature" ==> Find the other three cells of code below that match up and add that line of code in there as well
opt_model_all_seasons_data = all_seasons_data.copy()

opt_model_all_seasons_data['target_per_game'] = round((opt_model_all_seasons_data['targets']/opt_model_all_seasons_data['games']),2)
opt_model_all_seasons_data['carries_per_game'] = round((opt_model_all_seasons_data['carries']/opt_model_all_seasons_data['games']),2)
opt_model_all_seasons_data['team_off_snaps_per_game'] = round((opt_model_all_seasons_data['teams_offense_snaps']/opt_model_all_seasons_data['games']),2)
opt_model_all_seasons_data['off_snaps_per_game'] = round((opt_model_all_seasons_data['offense_snaps']/opt_model_all_seasons_data['games']),2)
opt_model_all_seasons_data['attempts_per_game'] = round((opt_model_all_seasons_data['attempts']/opt_model_all_seasons_data['games']),2)
opt_model_all_seasons_data['attempts_per_game'] = round((opt_model_all_seasons_data['attempts']/opt_model_all_seasons_data['games']),2)



opt_model_all_seasons_data.fillna(0, inplace=True)
new_features_training_data_all_seasons = opt_model_all_seasons_data
new_features_training_data_all_seasons.head()

Unnamed: 0,name,team,position,season,games,receptions,targets,receiving_yards,rec_ypg,receiving_tds,...,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr,target_per_game,carries_per_game,team_off_snaps_per_game,off_snaps_per_game,attempts_per_game
0,A.J. Brown,TEN,WR,2019,16,52,84,1051.0,65.69,8,...,51.0,Nissan Stadium,moderate,Grass,217.1,5.25,0.19,62.31,42.38,0.0
1,A.J. Brown,TEN,WR,2020,14,70,106,1075.0,76.79,11,...,51.0,Nissan Stadium,moderate,Grass,247.5,7.57,0.0,67.5,54.29,0.0
2,A.J. Brown,TEN,WR,2021,13,63,105,869.0,66.85,5,...,51.0,Nissan Stadium,moderate,Grass,180.9,8.08,0.15,67.38,46.46,0.15
3,A.J. Brown,PHI,WR,2022,17,88,145,1496.0,88.0,11,...,51.0,Lincoln Financial Field,cold,Grass,299.6,8.53,0.0,69.94,59.06,0.0
4,A.J. Brown,PHI,WR,2023,17,106,158,1456.0,85.65,7,...,51.0,Lincoln Financial Field,cold,Grass,289.6,9.29,0.0,67.88,59.94,0.0


In [4]:
new_features_training_data_all_seasons.columns

Index(['name', 'team', 'position', 'season', 'games', 'receptions', 'targets',
       'receiving_yards', 'rec_ypg', 'receiving_tds', 'ypr',
       'receiving_fumbles', 'receiving_2pt_conversions', 'target_share',
       'air_yards_share', 'carries', 'rushing_yards', 'rush_ypg',
       'rushing_tds', 'rush_td_percentage', 'rushing_fumbles',
       'rushing_2pt_conversions', 'completions', 'attempts', 'comp_percentage',
       'passing_yards', 'pass_ypg', 'passing_tds', 'passing_2pt_conversions',
       'td_percentage', 'interceptions', 'sacks', 'sack_fumbles',
       'offense_snaps', 'teams_offense_snaps', 'round', 'overall',
       'stadium_name', 'stadium_weather_type', 'stadium_surface',
       'fantasy_points_ppr', 'target_per_game', 'carries_per_game',
       'team_off_snaps_per_game', 'off_snaps_per_game', 'attempts_per_game'],
      dtype='object')

In [5]:
# Cleaning up all seasons data columns
all_seasons_data_cleaned = new_features_training_data_all_seasons.drop(columns = ['name', 'season', 'team', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_2pt_conversions', 
                            'rushing_yards', 'rushing_tds', 'rushing_2pt_conversions', 'completions', 'passing_yards', 
                            'passing_tds', 'interceptions'])
all_seasons_data_cleaned.head()

Unnamed: 0,position,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,...,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr,target_per_game,carries_per_game,team_off_snaps_per_game,off_snaps_per_game,attempts_per_game
0,WR,16,84,65.69,20.21,1.0,0.2,0.29,3,3.75,...,51.0,Nissan Stadium,moderate,Grass,217.1,5.25,0.19,62.31,42.38,0.0
1,WR,14,106,76.79,15.36,2.0,0.27,0.35,0,0.0,...,51.0,Nissan Stadium,moderate,Grass,247.5,7.57,0.0,67.5,54.29,0.0
2,WR,13,105,66.85,13.79,0.0,0.28,0.45,2,0.77,...,51.0,Nissan Stadium,moderate,Grass,180.9,8.08,0.15,67.38,46.46,0.15
3,WR,17,145,88.0,17.0,2.0,0.29,0.41,0,0.0,...,51.0,Lincoln Financial Field,cold,Grass,299.6,8.53,0.0,69.94,59.06,0.0
4,WR,17,158,85.65,13.74,2.0,0.3,0.42,0,0.0,...,51.0,Lincoln Financial Field,cold,Grass,289.6,9.29,0.0,67.88,59.94,0.0


In [6]:
all_seasons_data_cleaned.columns

Index(['position', 'games', 'targets', 'rec_ypg', 'ypr', 'receiving_fumbles',
       'target_share', 'air_yards_share', 'carries', 'rush_ypg',
       'rush_td_percentage', 'rushing_fumbles', 'attempts', 'comp_percentage',
       'pass_ypg', 'passing_2pt_conversions', 'td_percentage', 'sacks',
       'sack_fumbles', 'offense_snaps', 'teams_offense_snaps', 'round',
       'overall', 'stadium_name', 'stadium_weather_type', 'stadium_surface',
       'fantasy_points_ppr', 'target_per_game', 'carries_per_game',
       'team_off_snaps_per_game', 'off_snaps_per_game', 'attempts_per_game'],
      dtype='object')

In [7]:
# Encoding categorical columns for model
encoded_training_data_all_seasons = pd.get_dummies(all_seasons_data_cleaned)
encoded_training_data_all_seasons

Unnamed: 0,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,rush_td_percentage,...,stadium_name_Soldier Field,stadium_name_State Farm Stadium,stadium_name_TIAA Bank Field,stadium_name_U.S. Bank Stadium,stadium_weather_type_cold,stadium_weather_type_indoor,stadium_weather_type_moderate,stadium_weather_type_warm,stadium_surface_FieldTurf,stadium_surface_Grass
0,16,84,65.69,20.21,1.0,0.20,0.29,3,3.75,0.33,...,False,False,False,False,False,False,True,False,False,True
1,14,106,76.79,15.36,2.0,0.27,0.35,0,0.00,0.00,...,False,False,False,False,False,False,True,False,False,True
2,13,105,66.85,13.79,0.0,0.28,0.45,2,0.77,0.00,...,False,False,False,False,False,False,True,False,False,True
3,17,145,88.00,17.00,2.0,0.29,0.41,0,0.00,0.00,...,False,False,False,False,True,False,False,False,False,True
4,17,158,85.65,13.74,2.0,0.30,0.42,0,0.00,0.00,...,False,False,False,False,True,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2101,16,102,40.75,11.64,1.0,0.21,0.26,1,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
2102,11,20,14.00,11.00,0.0,0.05,0.04,0,0.00,0.00,...,False,False,False,False,False,True,False,False,False,True
2103,15,70,36.40,11.62,1.0,0.14,0.24,2,0.20,0.00,...,False,False,False,False,False,True,False,False,False,True
2104,16,121,51.44,10.04,0.0,0.22,0.26,4,1.12,0.00,...,False,False,True,False,False,False,False,True,False,True


In [8]:
# Defining y (target variable)
y = encoded_training_data_all_seasons['fantasy_points_ppr']

# Defining X (features)
X = encoded_training_data_all_seasons.drop(columns = 'fantasy_points_ppr')

In [9]:
# Spliting model into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=42)

In [10]:
lr_model = LinearRegression()

In [11]:
lr_model.fit(X_train, y_train)

In [12]:
intercept = lr_model.intercept_
intercept

9.918164376407645

In [13]:
m = lr_model.coef_
m

array([-3.09168598e-01,  1.62893158e+00,  2.15326902e+00, -1.10466671e+00,
       -6.41667006e-01,  7.13887430e+01, -4.13240368e+01,  6.36494297e-01,
        1.43962148e+00,  1.79025424e+01,  4.56792436e-01,  4.53312778e-01,
       -7.26548481e-01,  8.63241187e-01,  5.91191252e+00,  5.79493076e+01,
       -2.48072165e-01, -2.39330717e+00,  1.47277849e-02, -1.96954992e-03,
       -4.20910112e+00,  1.20038147e-01, -1.52049319e+01, -5.75863763e+00,
        1.38548423e-02, -2.13498452e-01, -6.29219471e+00, -4.20370487e-01,
       -4.61426922e+00,  3.15356318e+00,  1.88107653e+00,  5.68081742e-01,
       -3.23758892e+00,  5.31437708e+00, -6.04195375e+00,  5.98852102e+00,
        2.95670436e+00, -4.35787428e+00, -1.98179716e+00,  2.94409788e+00,
        1.59527592e+00, -3.40116924e+00,  2.68259230e-01, -1.49994356e+00,
        4.22654748e+00, -2.16364837e+00,  1.95235187e+00,  2.96356003e-01,
       -1.88249315e+00,  3.07235065e+00, -3.88201605e+00,  2.58084538e+00,
       -5.24972598e+00,  

In [14]:
training_predictions_all_seasons = lr_model.predict(X_train)

In [15]:
training_all_seasons_data_r2 = r2_score(y_train, training_predictions_all_seasons)
training_all_seasons_data_r2

0.9776173673678586

In [16]:
training_all_seasons_data_mse = mean_squared_error(y_train, training_predictions_all_seasons)
training_all_seasons_data_rmse = math.sqrt(training_all_seasons_data_mse)
training_all_seasons_data_rmse

14.643831581074425

In [17]:
testing_predictions_all_seasons = lr_model.predict(X_test)

In [18]:
testing_all_seasons_data_r2 = r2_score(y_test, testing_predictions_all_seasons)
testing_all_seasons_data_r2

0.9749548173727994

In [19]:
testing_all_seasons_data_mse = mean_squared_error(y_test, testing_predictions_all_seasons)
testing_all_seasons_data_rmse = math.sqrt(testing_all_seasons_data_mse)
testing_all_seasons_data_rmse

14.927806296223993

# Training model without 2023 season data for testing 2023 fantasy scores - Optimized

In [20]:
# Creating a df with no 2023 data to test the performance of the model against last years fantasy scores
# Adding new features to training data

no_2023_season_data = all_seasons_data[all_seasons_data['season'] != 2023]
no_2023_season_data = no_2023_season_data.drop(columns = 'season')

no_2023_season_data['target_per_game'] = round((no_2023_season_data['targets']/no_2023_season_data['games']),2)
no_2023_season_data['carries_per_game'] = round((no_2023_season_data['carries']/no_2023_season_data['games']),2)
no_2023_season_data['team_off_snaps_per_game'] = round((no_2023_season_data['teams_offense_snaps']/no_2023_season_data['games']),2)
no_2023_season_data['off_snaps_per_game'] = round((no_2023_season_data['offense_snaps']/no_2023_season_data['games']),2)
no_2023_season_data['attempts_per_game'] = round((no_2023_season_data['attempts']/no_2023_season_data['games']),2)



no_2023_season_data.fillna(0, inplace=True)
new_features_training_data_2023 = no_2023_season_data
new_features_training_data_2023

Unnamed: 0,name,team,position,games,receptions,targets,receiving_yards,rec_ypg,receiving_tds,ypr,...,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr,target_per_game,carries_per_game,team_off_snaps_per_game,off_snaps_per_game,attempts_per_game
0,A.J. Brown,TEN,WR,16,52,84,1051.0,65.69,8,20.21,...,51.0,Nissan Stadium,moderate,Grass,217.10,5.25,0.19,62.31,42.38,0.00
1,A.J. Brown,TEN,WR,14,70,106,1075.0,76.79,11,15.36,...,51.0,Nissan Stadium,moderate,Grass,247.50,7.57,0.00,67.50,54.29,0.00
2,A.J. Brown,TEN,WR,13,63,105,869.0,66.85,5,13.79,...,51.0,Nissan Stadium,moderate,Grass,180.90,8.08,0.15,67.38,46.46,0.15
3,A.J. Brown,PHI,WR,17,88,145,1496.0,88.00,11,17.00,...,51.0,Lincoln Financial Field,cold,Grass,299.60,8.53,0.00,69.94,59.06,0.00
6,AJ McCarron,CIN,QB,2,0,0,0.0,0.00,0,0.00,...,0.0,Paul Brown Stadium,cold,FieldTurf,2.64,0.00,0.00,0.00,0.00,7.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,Zay Jones,BUF,WR,14,27,74,316.0,22.57,2,11.70,...,37.0,New Era Field,cold,FieldTurf,70.60,5.29,0.00,67.29,53.64,0.00
2101,Zay Jones,BUF,WR,16,56,102,652.0,40.75,7,11.64,...,37.0,New Era Field,cold,FieldTurf,165.20,6.38,0.06,66.44,58.81,0.06
2102,Zay Jones,LV,WR,11,14,20,154.0,14.00,1,11.00,...,37.0,Allegiant Stadium,indoor,Grass,36.56,1.82,0.00,70.09,22.55,0.09
2103,Zay Jones,LV,WR,15,47,70,546.0,36.40,1,11.62,...,37.0,Allegiant Stadium,indoor,Grass,105.90,4.67,0.13,68.40,40.93,0.00


In [21]:
new_features_training_data_2023.columns

Index(['name', 'team', 'position', 'games', 'receptions', 'targets',
       'receiving_yards', 'rec_ypg', 'receiving_tds', 'ypr',
       'receiving_fumbles', 'receiving_2pt_conversions', 'target_share',
       'air_yards_share', 'carries', 'rushing_yards', 'rush_ypg',
       'rushing_tds', 'rush_td_percentage', 'rushing_fumbles',
       'rushing_2pt_conversions', 'completions', 'attempts', 'comp_percentage',
       'passing_yards', 'pass_ypg', 'passing_tds', 'passing_2pt_conversions',
       'td_percentage', 'interceptions', 'sacks', 'sack_fumbles',
       'offense_snaps', 'teams_offense_snaps', 'round', 'overall',
       'stadium_name', 'stadium_weather_type', 'stadium_surface',
       'fantasy_points_ppr', 'target_per_game', 'carries_per_game',
       'team_off_snaps_per_game', 'off_snaps_per_game', 'attempts_per_game'],
      dtype='object')

In [22]:
# Cleaning up all seasons data columns
no_2023_season_data_cleaned = new_features_training_data_2023.drop(columns = ['name', 'team', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_2pt_conversions', 
                            'rushing_yards', 'rushing_tds', 'rushing_2pt_conversions', 'completions', 'passing_yards', 
                            'passing_tds', 'interceptions'])
no_2023_season_data_cleaned.head()

Unnamed: 0,position,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,...,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr,target_per_game,carries_per_game,team_off_snaps_per_game,off_snaps_per_game,attempts_per_game
0,WR,16,84,65.69,20.21,1.0,0.2,0.29,3,3.75,...,51.0,Nissan Stadium,moderate,Grass,217.1,5.25,0.19,62.31,42.38,0.0
1,WR,14,106,76.79,15.36,2.0,0.27,0.35,0,0.0,...,51.0,Nissan Stadium,moderate,Grass,247.5,7.57,0.0,67.5,54.29,0.0
2,WR,13,105,66.85,13.79,0.0,0.28,0.45,2,0.77,...,51.0,Nissan Stadium,moderate,Grass,180.9,8.08,0.15,67.38,46.46,0.15
3,WR,17,145,88.0,17.0,2.0,0.29,0.41,0,0.0,...,51.0,Lincoln Financial Field,cold,Grass,299.6,8.53,0.0,69.94,59.06,0.0
6,QB,2,0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0.0,Paul Brown Stadium,cold,FieldTurf,2.64,0.0,0.0,0.0,0.0,7.0


In [23]:
# Encoding categorical columns for model
no_2023_encoded_training_data = pd.get_dummies(no_2023_season_data_cleaned)
no_2023_encoded_training_data

Unnamed: 0,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,rush_td_percentage,...,stadium_name_Soldier Field,stadium_name_State Farm Stadium,stadium_name_TIAA Bank Field,stadium_name_U.S. Bank Stadium,stadium_weather_type_cold,stadium_weather_type_indoor,stadium_weather_type_moderate,stadium_weather_type_warm,stadium_surface_FieldTurf,stadium_surface_Grass
0,16,84,65.69,20.21,1.0,0.20,0.29,3,3.75,0.33,...,False,False,False,False,False,False,True,False,False,True
1,14,106,76.79,15.36,2.0,0.27,0.35,0,0.00,0.00,...,False,False,False,False,False,False,True,False,False,True
2,13,105,66.85,13.79,0.0,0.28,0.45,2,0.77,0.00,...,False,False,False,False,False,False,True,False,False,True
3,17,145,88.00,17.00,2.0,0.29,0.41,0,0.00,0.00,...,False,False,False,False,True,False,False,False,False,True
6,2,0,0.00,0.00,0.0,0.00,0.00,0,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2100,14,74,22.57,11.70,0.0,0.18,0.28,0,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
2101,16,102,40.75,11.64,1.0,0.21,0.26,1,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
2102,11,20,14.00,11.00,0.0,0.05,0.04,0,0.00,0.00,...,False,False,False,False,False,True,False,False,False,True
2103,15,70,36.40,11.62,1.0,0.14,0.24,2,0.20,0.00,...,False,False,False,False,False,True,False,False,False,True


In [24]:
# Defining y (target variable)
y_no_2023 = no_2023_encoded_training_data['fantasy_points_ppr']

# Defining X (features)
X_no_2023 = no_2023_encoded_training_data.drop(columns = 'fantasy_points_ppr')

In [25]:
no_2023_lr_model = LinearRegression()

In [26]:
# Training model without 2023 season data
no_2023_lr_model.fit(X_no_2023, y_no_2023)

In [27]:
no_2023_predictions = no_2023_lr_model.predict(X_no_2023)

In [28]:
no_2023_season_r2 = r2_score(y_no_2023, no_2023_predictions)
no_2023_season_r2

0.9772149496403472

In [29]:
no_2023_season_mse = mean_squared_error(y_no_2023, no_2023_predictions)
no_2023_season_rmse = math.sqrt(no_2023_season_mse)
no_2023_season_rmse

14.834569893029212

# Testing model with 2023 player stats using model without 2023 stats

In [30]:
# Adding new features to training data

test_2023_data = all_seasons_data.loc[all_seasons_data['season'] == 2023].copy()
test_2023_data.head()

test_2023_data['target_per_game'] = round((test_2023_data['targets']/test_2023_data['games']),2)
test_2023_data['carries_per_game'] = round((test_2023_data['carries']/test_2023_data['games']),2)
test_2023_data['team_off_snaps_per_game'] = round((test_2023_data['teams_offense_snaps']/test_2023_data['games']),2)
test_2023_data['off_snaps_per_game'] = round((test_2023_data['offense_snaps']/test_2023_data['games']),2)
test_2023_data['attempts_per_game'] = round((test_2023_data['attempts']/test_2023_data['games']),2)



test_2023_data.fillna(0, inplace=True)
new_features_training_data_2023 = test_2023_data
new_features_training_data_2023



Unnamed: 0,name,team,position,season,games,receptions,targets,receiving_yards,rec_ypg,receiving_tds,...,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr,target_per_game,carries_per_game,team_off_snaps_per_game,off_snaps_per_game,attempts_per_game
4,A.J. Brown,PHI,WR,2023,17,106,158,1456.0,85.65,7,...,51.0,Lincoln Financial Field,cold,Grass,289.60,9.29,0.00,67.88,59.94,0.00
5,A.T. Perry,NO,WR,2023,8,12,18,246.0,30.75,4,...,195.0,Mercedes-Benz Superdome,indoor,FieldTurf,60.60,2.25,0.00,64.88,39.88,0.00
9,AJ McCarron,CIN,QB,2023,2,0,0,0.0,0.00,0,...,0.0,Paul Brown Stadium,cold,FieldTurf,0.76,0.00,0.00,0.00,0.00,2.50
16,Aaron Jones,GB,RB,2023,11,30,43,233.0,21.18,1,...,182.0,Lambeau Field,cold,Grass,134.90,3.91,12.91,65.82,32.18,0.00
27,Aaron Rodgers,NYJ,QB,2023,1,0,0,0.0,0.00,0,...,24.0,MetLife Stadium,cold,FieldTurf,0.00,0.00,0.00,58.00,4.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2092,Zach Wilson,NYJ,QB,2023,12,0,0,0.0,0.00,0,...,2.0,MetLife Stadium,cold,FieldTurf,119.94,0.00,3.00,62.25,57.33,30.67
2096,Zack Moss,IND,RB,2023,14,27,37,192.0,13.71,2,...,86.0,Lucas Oil Stadium,indoor,FieldTurf,169.60,2.64,13.07,68.36,37.93,0.00
2098,Zamir White,LV,RB,2023,13,15,19,98.0,7.54,0,...,122.0,Allegiant Stadium,indoor,Grass,73.90,1.46,8.00,61.77,17.46,0.00
2099,Zay Flowers,BAL,WR,2023,16,77,108,858.0,53.62,5,...,22.0,M&T Bank Stadium,moderate,FieldTurf,206.40,6.75,0.50,67.56,56.56,0.00


In [31]:
test_2023_data_cleaned = new_features_training_data_2023.drop(columns = ['name', 'team', 'season', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_2pt_conversions', 
                            'rushing_yards', 'rushing_tds', 'rushing_2pt_conversions', 'completions', 'passing_yards', 
                            'passing_tds', 'interceptions'])
test_2023_data_cleaned

Unnamed: 0,position,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,...,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr,target_per_game,carries_per_game,team_off_snaps_per_game,off_snaps_per_game,attempts_per_game
4,WR,17,158,85.65,13.74,2.0,0.30,0.42,0,0.00,...,51.0,Lincoln Financial Field,cold,Grass,289.60,9.29,0.00,67.88,59.94,0.00
5,WR,8,18,30.75,20.50,0.0,0.07,0.16,0,0.00,...,195.0,Mercedes-Benz Superdome,indoor,FieldTurf,60.60,2.25,0.00,64.88,39.88,0.00
9,QB,2,0,0.00,0.00,0.0,0.00,0.00,0,0.00,...,0.0,Paul Brown Stadium,cold,FieldTurf,0.76,0.00,0.00,0.00,0.00,2.50
16,RB,11,43,21.18,7.77,1.0,0.12,-0.02,142,59.64,...,182.0,Lambeau Field,cold,Grass,134.90,3.91,12.91,65.82,32.18,0.00
27,QB,1,0,0.00,0.00,0.0,0.00,0.00,0,0.00,...,24.0,MetLife Stadium,cold,FieldTurf,0.00,0.00,0.00,58.00,4.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2092,QB,12,0,0.00,0.00,0.0,0.00,0.00,36,17.58,...,2.0,MetLife Stadium,cold,FieldTurf,119.94,0.00,3.00,62.25,57.33,30.67
2096,RB,14,37,13.71,7.11,0.0,0.07,-0.01,183,56.71,...,86.0,Lucas Oil Stadium,indoor,FieldTurf,169.60,2.64,13.07,68.36,37.93,0.00
2098,RB,13,19,7.54,6.53,0.0,0.04,-0.00,104,34.69,...,122.0,Allegiant Stadium,indoor,Grass,73.90,1.46,8.00,61.77,17.46,0.00
2099,WR,16,108,53.62,11.14,0.0,0.24,0.24,8,3.50,...,22.0,M&T Bank Stadium,moderate,FieldTurf,206.40,6.75,0.50,67.56,56.56,0.00


In [32]:
# Encoding categorical columns for model
encoded_2023_test_data = pd.get_dummies(test_2023_data_cleaned)
encoded_2023_test_data

Unnamed: 0,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,rush_td_percentage,...,stadium_name_Soldier Field,stadium_name_State Farm Stadium,stadium_name_TIAA Bank Field,stadium_name_U.S. Bank Stadium,stadium_weather_type_cold,stadium_weather_type_indoor,stadium_weather_type_moderate,stadium_weather_type_warm,stadium_surface_FieldTurf,stadium_surface_Grass
4,17,158,85.65,13.74,2.0,0.30,0.42,0,0.00,0.00,...,False,False,False,False,True,False,False,False,False,True
5,8,18,30.75,20.50,0.0,0.07,0.16,0,0.00,0.00,...,False,False,False,False,False,True,False,False,True,False
9,2,0,0.00,0.00,0.0,0.00,0.00,0,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
16,11,43,21.18,7.77,1.0,0.12,-0.02,142,59.64,0.01,...,False,False,False,False,True,False,False,False,False,True
27,1,0,0.00,0.00,0.0,0.00,0.00,0,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2092,12,0,0.00,0.00,0.0,0.00,0.00,36,17.58,0.00,...,False,False,False,False,True,False,False,False,True,False
2096,14,37,13.71,7.11,0.0,0.07,-0.01,183,56.71,0.03,...,False,False,False,False,False,True,False,False,True,False
2098,13,19,7.54,6.53,0.0,0.04,-0.00,104,34.69,0.01,...,False,False,False,False,False,True,False,False,False,True
2099,16,108,53.62,11.14,0.0,0.24,0.24,8,3.50,0.12,...,False,False,False,False,False,False,True,False,True,False


In [33]:
# Defining y (target data)
y_just_2023 = encoded_2023_test_data['fantasy_points_ppr']

# Defining X (features)
X_just_2023 = encoded_2023_test_data.drop(columns = 'fantasy_points_ppr')

In [34]:
X_just_2023

Unnamed: 0,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,rush_td_percentage,...,stadium_name_Soldier Field,stadium_name_State Farm Stadium,stadium_name_TIAA Bank Field,stadium_name_U.S. Bank Stadium,stadium_weather_type_cold,stadium_weather_type_indoor,stadium_weather_type_moderate,stadium_weather_type_warm,stadium_surface_FieldTurf,stadium_surface_Grass
4,17,158,85.65,13.74,2.0,0.30,0.42,0,0.00,0.00,...,False,False,False,False,True,False,False,False,False,True
5,8,18,30.75,20.50,0.0,0.07,0.16,0,0.00,0.00,...,False,False,False,False,False,True,False,False,True,False
9,2,0,0.00,0.00,0.0,0.00,0.00,0,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
16,11,43,21.18,7.77,1.0,0.12,-0.02,142,59.64,0.01,...,False,False,False,False,True,False,False,False,False,True
27,1,0,0.00,0.00,0.0,0.00,0.00,0,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2092,12,0,0.00,0.00,0.0,0.00,0.00,36,17.58,0.00,...,False,False,False,False,True,False,False,False,True,False
2096,14,37,13.71,7.11,0.0,0.07,-0.01,183,56.71,0.03,...,False,False,False,False,False,True,False,False,True,False
2098,13,19,7.54,6.53,0.0,0.04,-0.00,104,34.69,0.01,...,False,False,False,False,False,True,False,False,False,True
2099,16,108,53.62,11.14,0.0,0.24,0.24,8,3.50,0.12,...,False,False,False,False,False,False,True,False,True,False


In [35]:
test_2023_predictions = no_2023_lr_model.predict(X_just_2023)
test_2023_predictions

array([ 2.97883323e+02,  6.87666204e+01,  2.48868971e+00,  1.65378211e+02,
       -1.77728368e+00,  2.30398902e+02,  5.37395753e+01,  1.25009670e+02,
       -9.90748628e+00,  1.02658564e+02, -2.73709692e+00,  5.02877649e+01,
        1.69442089e+02,  6.17371518e+01,  6.66837992e+01,  2.38039936e+02,
        2.46441988e+02, -2.17956116e+01,  3.23185613e+01,  3.18963706e+02,
        4.63128825e+01,  4.00085963e+01,  2.92567242e-02, -6.28064333e+00,
        6.55892151e+01, -7.32024407e+00,  1.30730525e+02,  2.19994078e+02,
        4.26087042e+01, -8.33822510e+00,  7.84463438e+01,  2.75428210e+02,
        3.06018145e+01,  2.19165669e+01,  2.72934800e+02,  7.36161545e+00,
        2.53622168e+01,  6.98026913e+01,  1.22760748e+01,  1.48656581e+02,
        2.37754376e+02,  3.05847188e+01,  6.34594671e+01,  7.63797947e+01,
        5.53032683e+01,  2.96437892e+02,  1.49448458e+01,  5.39440845e+01,
       -2.70213886e+01,  1.87419602e+02,  8.54306500e+00,  2.76917204e+02,
        2.45251764e+01,  

In [36]:
fantasy_2023_predictions = test_2023_data.copy()

# Creating a new column that holds model predictions
fantasy_2023_predictions['fantasy_2023_score_prediction'] = test_2023_predictions

# Rounding the fantasy score predictions to 2 decimal places
fantasy_2023_predictions['fantasy_2023_score_prediction'] = round(fantasy_2023_predictions['fantasy_2023_score_prediction'], 2)

# Chosing the columns that are shown in the dataframe
fantasy_2023_predictions = fantasy_2023_predictions[['name', 'team', 'position', 'fantasy_2023_score_prediction']]

# Pulling in a new column that has the actual fantasy score for each player from 2023
fantasy_2023_predictions['actual_fantasy_score'] = test_2023_data['fantasy_points_ppr']

fantasy_2023_predictions

Unnamed: 0,name,team,position,fantasy_2023_score_prediction,actual_fantasy_score
4,A.J. Brown,PHI,WR,297.88,289.60
5,A.T. Perry,NO,WR,68.77,60.60
9,AJ McCarron,CIN,QB,2.49,0.76
16,Aaron Jones,GB,RB,165.38,134.90
27,Aaron Rodgers,NYJ,QB,-1.78,0.00
...,...,...,...,...,...
2092,Zach Wilson,NYJ,QB,143.49,119.94
2096,Zack Moss,IND,RB,170.38,169.60
2098,Zamir White,LV,RB,83.34,73.90
2099,Zay Flowers,BAL,WR,202.29,206.40


In [37]:
# Calculating RMSE for model based on predicting 2023 fantasy scores compared to actual 2023 fantasy scores
mse_2023 = mean_squared_error(fantasy_2023_predictions['actual_fantasy_score'], fantasy_2023_predictions['fantasy_2023_score_prediction'])
rmse_2023 = math.sqrt(mse_2023)
rmse_2023

14.229830865136156

In [38]:
# Calculating R2 for model based on predicting 2023 fantasy scores compared to actual 2023 fantasy scores

r2_2023 = r2_score(fantasy_2023_predictions['actual_fantasy_score'], fantasy_2023_predictions['fantasy_2023_score_prediction'])
r2_2023

0.974004203717691

# Comparison of Model Accuracy vs. just assuming 2022 data would repeat in 2023

In [39]:
# We want to compare the results the model got to the results we would get if we just assumed
# that the 2022 results would continue in 2023.
# We can start by copying our 2023 prediction info into a new DataFrame

just_guess_repeating = fantasy_2023_predictions.copy()


In [40]:
# We will need a second table with just the 2022 actual results

just_2022 = all_seasons_data.loc[all_seasons_data['season'] == 2022].copy()

In [41]:
actual_2022 = just_2022.drop(columns = ['season', 'games', 'receptions', 'targets',
                                        'receiving_yards', 'rec_ypg', 'receiving_tds', 'ypr',
                                        'receiving_fumbles', 'receiving_2pt_conversions', 'target_share',
                                        'air_yards_share', 'carries', 'rushing_yards', 'rush_ypg',
                                        'rushing_tds', 'rush_td_percentage', 'rushing_fumbles',
                                        'rushing_2pt_conversions', 'completions', 'attempts', 'comp_percentage', 
                                        'passing_yards', 'pass_ypg', 'passing_tds', 'passing_2pt_conversions',
                                        'td_percentage', 'interceptions', 'sacks', 'sack_fumbles',
                                        'offense_snaps', 'teams_offense_snaps', 'round', 'overall', 
                                        'stadium_name', 'stadium_weather_type', 'stadium_surface'])
actual_2022.rename(columns={'fantasy_points_ppr': 'actual_fantasy_score_2022'}, inplace=True)
actual_2022

Unnamed: 0,name,team,position,actual_fantasy_score_2022
3,A.J. Brown,PHI,WR,299.60
15,Aaron Jones,GB,RB,248.60
26,Aaron Rodgers,GB,QB,239.20
36,Adam Thielen,MIN,WR,180.00
40,Adam Trautman,NO,TE,44.70
...,...,...,...,...
2088,Zach Pascal,PHI,WR,36.00
2091,Zach Wilson,NYJ,QB,98.92
2095,Zack Moss,IND,RB,47.70
2097,Zamir White,LV,RB,7.00


In [42]:
# Now we need to merge the two dataframes. 

fantasy_merged = pd.merge(just_guess_repeating, actual_2022, on='name')
fantasy_merged.drop(columns = ['team_y', 'position_y'], inplace=True)
fantasy_merged.rename(columns={'team_x': 'team','position_x': 'position', 
                               'actual_fantasy_score': 'actual_fantasy_score_2023'}, inplace=True)

fantasy_merged

Unnamed: 0,name,team,position,fantasy_2023_score_prediction,actual_fantasy_score_2023,actual_fantasy_score_2022
0,A.J. Brown,PHI,WR,297.88,289.60,299.60
1,Aaron Jones,GB,RB,165.38,134.90,248.60
2,Aaron Rodgers,NYJ,QB,-1.78,0.00,239.20
3,Adam Thielen,CAR,WR,230.40,231.00,180.00
4,Adam Trautman,DEN,TE,53.74,60.40,44.70
...,...,...,...,...,...,...
385,Zach Pascal,ARI,WR,5.15,5.90,36.00
386,Zach Wilson,NYJ,QB,143.49,119.94,98.92
387,Zack Moss,IND,RB,170.38,169.60,47.70
388,Zamir White,LV,RB,83.34,73.90,7.00


In [43]:
# Now we can compare the RMS error of the 2022 final scores vs. the actual 2023 data to see whether
# the model is giving more accurate results than just assuming next year will be the same as last year...

mse_assume = mean_squared_error(fantasy_merged['actual_fantasy_score_2023'], fantasy_merged['actual_fantasy_score_2022'])
rmse_assume = math.sqrt(mse_assume)
rmse_assume

71.00374769451727

In [44]:
# The average error of '71' points is quite a bit worse than the results from our machine learning model!
# That is exactly what we were hoping.

# Predictions for 2024

In [45]:
# Adding new features to match training data
prediction_data = pd.read_csv('predicting_data.csv')

prediction_data['target_per_game'] = round((prediction_data['targets']/prediction_data['games']),2)
prediction_data['carries_per_game'] = round((prediction_data['carries']/prediction_data['games']),2)
prediction_data['team_off_snaps_per_game'] = round((prediction_data['teams_offense_snaps']/prediction_data['games']),2)
prediction_data['off_snaps_per_game'] = round((prediction_data['offense_snaps']/prediction_data['games']),2)
prediction_data['attempts_per_game'] = round((prediction_data['attempts']/prediction_data['games']),2)



prediction_data.fillna(0, inplace=True)
new_features_2024_prediction_data = prediction_data
new_features_2024_prediction_data

Unnamed: 0,name,team,position,games,receptions,targets,receiving_yards,rec_ypg,receiving_tds,ypr,...,round,overall,stadium_name,stadium_weather_type,stadium_surface,target_per_game,carries_per_game,team_off_snaps_per_game,off_snaps_per_game,attempts_per_game
0,A.J. Brown,PHI,WR,15.400000,75.800000,119.600000,1189.400000,76.596000,8.400000,16.020000,...,2.0,51.0,Lincoln Financial Field,cold,Grass,7.77,0.06,67.03,52.79,0.03
1,A.T. Perry,NO,WR,8.000000,12.000000,18.000000,246.000000,30.750000,4.000000,20.500000,...,6.0,195.0,Mercedes-Benz Superdome,indoor,FieldTurf,2.25,0.00,64.88,39.88,0.00
2,AJ McCarron,CIN,QB,2.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.0,Paul Brown Stadium,cold,FieldTurf,0.00,0.62,0.00,0.00,7.12
3,Aaron Jones,GB,RB,13.571429,38.857143,52.000000,296.571429,20.691429,2.571429,7.080000,...,5.0,182.0,Lambeau Field,cold,Grass,3.83,12.39,65.60,35.54,0.00
4,Aaron Rodgers,NYJ,QB,13.272727,0.181818,0.181818,-0.909091,-0.057273,0.000000,-0.909091,...,1.0,24.0,MetLife Stadium,cold,FieldTurf,0.01,2.85,67.14,63.86,34.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,Zach Wilson,NYJ,QB,11.333333,0.333333,0.666667,0.666667,0.073333,0.333333,0.666667,...,1.0,2.0,MetLife Stadium,cold,FieldTurf,0.06,2.74,60.88,56.85,29.21
520,Zack Moss,IND,RB,12.000000,17.000000,23.000000,124.000000,9.417500,1.000000,6.367500,...,3.0,86.0,Lucas Oil Stadium,indoor,FieldTurf,1.92,9.73,67.94,31.04,0.00
521,Zamir White,LV,RB,12.000000,7.500000,9.500000,49.000000,3.770000,0.000000,3.265000,...,4.0,122.0,Allegiant Stadium,indoor,Grass,0.79,5.04,63.04,10.92,0.00
522,Zay Flowers,BAL,WR,16.000000,77.000000,108.000000,858.000000,53.620000,5.000000,11.140000,...,1.0,22.0,M&T Bank Stadium,moderate,FieldTurf,6.75,0.50,67.56,56.56,0.00


In [46]:
prediction_2024_data_cleaned = new_features_2024_prediction_data.drop(columns = ['name', 'team', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_2pt_conversions', 
                            'rushing_yards', 'rushing_tds', 'rushing_2pt_conversions', 'completions', 'passing_yards', 
                            'passing_tds', 'interceptions'])
prediction_2024_data_cleaned.head()

Unnamed: 0,position,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,...,round,overall,stadium_name,stadium_weather_type,stadium_surface,target_per_game,carries_per_game,team_off_snaps_per_game,off_snaps_per_game,attempts_per_game
0,WR,15.4,119.6,76.596,16.02,1.4,0.268,0.384,1.0,0.904,...,2.0,51.0,Lincoln Financial Field,cold,Grass,7.77,0.06,67.03,52.79,0.03
1,WR,8.0,18.0,30.75,20.5,0.0,0.07,0.16,0.0,0.0,...,6.0,195.0,Mercedes-Benz Superdome,indoor,FieldTurf,2.25,0.0,64.88,39.88,0.0
2,QB,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.25,4.875,...,0.0,0.0,Paul Brown Stadium,cold,FieldTurf,0.0,0.62,0.0,0.0,7.12
3,RB,13.571429,52.0,20.691429,7.08,0.857143,0.115714,0.007143,168.142857,61.561429,...,5.0,182.0,Lambeau Field,cold,Grass,3.83,12.39,65.6,35.54,0.0
4,QB,13.272727,0.181818,-0.057273,-0.909091,0.0,0.0,0.0,37.818182,12.918182,...,1.0,24.0,MetLife Stadium,cold,FieldTurf,0.01,2.85,67.14,63.86,34.22


In [47]:
prediction_2024_data_cleaned.columns

Index(['position', 'games', 'targets', 'rec_ypg', 'ypr', 'receiving_fumbles',
       'target_share', 'air_yards_share', 'carries', 'rush_ypg',
       'rush_td_percentage', 'rushing_fumbles', 'attempts', 'comp_percentage',
       'pass_ypg', 'passing_2pt_conversions', 'td_percentage', 'sacks',
       'sack_fumbles', 'offense_snaps', 'teams_offense_snaps', 'round',
       'overall', 'stadium_name', 'stadium_weather_type', 'stadium_surface',
       'target_per_game', 'carries_per_game', 'team_off_snaps_per_game',
       'off_snaps_per_game', 'attempts_per_game'],
      dtype='object')

In [48]:
# Encoding categorical columns for model
encoded_prediction_2024_data = pd.get_dummies(prediction_2024_data_cleaned)
encoded_prediction_2024_data

Unnamed: 0,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,rush_td_percentage,...,stadium_name_Soldier Field,stadium_name_State Farm Stadium,stadium_name_TIAA Bank Field,stadium_name_U.S. Bank Stadium,stadium_weather_type_cold,stadium_weather_type_indoor,stadium_weather_type_moderate,stadium_weather_type_warm,stadium_surface_FieldTurf,stadium_surface_Grass
0,15.400000,119.600000,76.596000,16.020000,1.400000,0.268000,0.384000,1.000000,0.904000,0.066000,...,False,False,False,False,True,False,False,False,False,True
1,8.000000,18.000000,30.750000,20.500000,0.000000,0.070000,0.160000,0.000000,0.000000,0.000000,...,False,False,False,False,False,True,False,False,True,False
2,2.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.250000,4.875000,0.050000,...,False,False,False,False,True,False,False,False,True,False
3,13.571429,52.000000,20.691429,7.080000,0.857143,0.115714,0.007143,168.142857,61.561429,0.037143,...,False,False,False,False,True,False,False,False,False,True
4,13.272727,0.181818,-0.057273,-0.909091,0.000000,0.000000,0.000000,37.818182,12.918182,0.036364,...,False,False,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,11.333333,0.666667,0.073333,0.666667,0.000000,0.000000,0.003333,31.000000,14.380000,0.060000,...,False,False,False,False,True,False,False,False,True,False
520,12.000000,23.000000,9.417500,6.367500,0.500000,0.050000,0.002500,116.750000,41.467500,0.030000,...,False,False,False,False,False,True,False,False,True,False
521,12.000000,9.500000,3.770000,3.265000,0.000000,0.020000,0.000000,60.500000,20.525000,0.005000,...,False,False,False,False,False,True,False,False,False,True
522,16.000000,108.000000,53.620000,11.140000,0.000000,0.240000,0.240000,8.000000,3.500000,0.120000,...,False,False,False,False,False,False,True,False,True,False


In [49]:
predictions_2024 = lr_model.predict(encoded_prediction_2024_data)

predictions_2024

array([ 2.41000415e+02,  6.20371682e+01,  1.21633448e+01,  2.02398813e+02,
        2.13585851e+02,  1.55531873e+02,  4.77596799e+01,  1.23211987e+02,
        2.59627436e+01,  1.11239160e+02,  6.11265341e+00,  3.99249369e+01,
        1.06778670e+02,  8.09165960e+01,  1.42805078e+02,  2.74514058e+02,
        2.20789431e+02,  1.19486117e+00,  5.29124289e+01,  2.69449967e+02,
        4.21588935e+01,  3.58778173e+01,  1.75794680e+02,  8.17058183e+00,
        6.99294042e+01, -3.93421502e+00,  1.82448951e+02,  2.28250156e+02,
        1.02694044e+02, -4.66430709e+00,  7.09539019e+01,  2.04526040e+02,
        2.45774745e+01,  4.83021704e+01,  2.71405700e+02,  1.74232167e+01,
        2.08096988e+01,  6.78879239e+01,  6.99347804e+01,  1.98711016e+02,
        2.00930869e+02,  2.85537823e+01,  2.88998278e+01,  3.46208078e+01,
        6.88225807e+01,  1.99979463e+02,  1.23006337e+01,  4.20314312e+01,
        5.87987509e+00,  1.64191700e+02,  1.04764986e+01,  1.76533644e+02,
        4.24182738e+01,  

In [50]:
# Creating copy of original dataframe to create a clean predictions dataframe with each player
fantasy_2024_predictions = prediction_data.copy()

# Creating a new column that holds model predictions & average per week score
fantasy_2024_predictions['fantasy_2024_score_prediction'] = predictions_2024
fantasy_2024_predictions['fantasy_2024_per_week_score_prediction'] = (predictions_2024/17)

# Rounding the fantasy score predictions & per week scores to 2 decimal places 
fantasy_2024_predictions['fantasy_2024_score_prediction'] = round(fantasy_2024_predictions['fantasy_2024_score_prediction'], 2)
fantasy_2024_predictions['fantasy_2024_per_week_score_prediction'] = round(fantasy_2024_predictions['fantasy_2024_per_week_score_prediction'], 2)


#### ================== ALL PLAYERS ========================= ####

# Chosing the columns that are shown in the dataframe
fantasy_2024_predictions = fantasy_2024_predictions[['name', 'team', 'position', 'fantasy_2024_score_prediction', 'fantasy_2024_per_week_score_prediction']]

# Sorting all players in descending order
fantasy_2024_predictions = fantasy_2024_predictions.sort_values(by='fantasy_2024_score_prediction', ascending=False)

# Displaying all players
display(fantasy_2024_predictions.reset_index(drop=True).head(10))

#### ================== WRs ========================= ####

# Filtering for WRs
fantasy_2024_predictions_WR = fantasy_2024_predictions[fantasy_2024_predictions['position'] == 'WR']

# # Sorting all WRs in descending order
fantasy_2024_predictions_WR = fantasy_2024_predictions_WR.sort_values(by='fantasy_2024_score_prediction', ascending=False)

# Displaying all WRs
display(fantasy_2024_predictions_WR.reset_index(drop=True).head(10))

#### ================== QBs ========================= ####

# Filtering for QBs
fantasy_2024_predictions_QB = fantasy_2024_predictions[fantasy_2024_predictions['position'] == 'QB']

# # Sorting all QBs in descending order
fantasy_2024_predictions_QB = fantasy_2024_predictions_QB.sort_values(by='fantasy_2024_score_prediction', ascending=False)

# Displaying all QBs
display(fantasy_2024_predictions_QB.reset_index(drop=True).head(10))

#### ================== RBs ========================= ####

# Filtering for RBs
fantasy_2024_predictions_RB = fantasy_2024_predictions[fantasy_2024_predictions['position'] == 'RB']

# # Sorting all RBs in descending order
fantasy_2024_predictions_RB = fantasy_2024_predictions_RB.sort_values(by='fantasy_2024_score_prediction', ascending=False)

# Displaying all RBs
display(fantasy_2024_predictions_RB.reset_index(drop=True).head(10))

#### ================== TEs ========================= ####

# Filtering for TEs
fantasy_2024_predictions_TE = fantasy_2024_predictions[fantasy_2024_predictions['position'] == 'TE']

# # Sorting all TEs in descending order
fantasy_2024_predictions_TE = fantasy_2024_predictions_TE.sort_values(by='fantasy_2024_score_prediction', ascending=False)

# Displaying all TEs
display(fantasy_2024_predictions_TE.reset_index(drop=True).head(10))

Unnamed: 0,name,team,position,fantasy_2024_score_prediction,fantasy_2024_per_week_score_prediction
0,Josh Allen,BUF,QB,318.6,18.74
1,Puka Nacua,LA,WR,318.36,18.73
2,Justin Herbert,LAC,QB,308.04,18.12
3,Justin Jefferson,MIN,WR,297.64,17.51
4,Lamar Jackson,BAL,QB,293.24,17.25
5,Patrick Mahomes,KC,QB,290.89,17.11
6,Trevor Lawrence,JAX,QB,284.14,16.71
7,CeeDee Lamb,DAL,WR,277.58,16.33
8,Alvin Kamara,NO,RB,274.51,16.15
9,Tyreek Hill,MIA,WR,273.1,16.06


Unnamed: 0,name,team,position,fantasy_2024_score_prediction,fantasy_2024_per_week_score_prediction
0,Puka Nacua,LA,WR,318.36,18.73
1,Justin Jefferson,MIN,WR,297.64,17.51
2,CeeDee Lamb,DAL,WR,277.58,16.33
3,Tyreek Hill,MIA,WR,273.1,16.06
4,Amon-Ra St. Brown,DET,WR,269.45,15.85
5,Ja'Marr Chase,CIN,WR,261.09,15.36
6,Keenan Allen,LAC,WR,255.61,15.04
7,Garrett Wilson,NYJ,WR,245.33,14.43
8,Mike Evans,TB,WR,244.87,14.4
9,DeAndre Hopkins,TEN,WR,243.39,14.32


Unnamed: 0,name,team,position,fantasy_2024_score_prediction,fantasy_2024_per_week_score_prediction
0,Josh Allen,BUF,QB,318.6,18.74
1,Justin Herbert,LAC,QB,308.04,18.12
2,Lamar Jackson,BAL,QB,293.24,17.25
3,Patrick Mahomes,KC,QB,290.89,17.11
4,Trevor Lawrence,JAX,QB,284.14,16.71
5,Russell Wilson,DEN,QB,269.24,15.84
6,Jalen Hurts,PHI,QB,268.01,15.77
7,Dak Prescott,DAL,QB,266.98,15.7
8,C.J. Stroud,HOU,QB,260.11,15.3
9,Kyler Murray,ARI,QB,259.73,15.28


Unnamed: 0,name,team,position,fantasy_2024_score_prediction,fantasy_2024_per_week_score_prediction
0,Alvin Kamara,NO,RB,274.51,16.15
1,Bijan Robinson,ATL,RB,271.41,15.97
2,Christian McCaffrey,SF,RB,260.97,15.35
3,Najee Harris,PIT,RB,257.89,15.17
4,Ezekiel Elliott,NE,RB,255.79,15.05
5,Travis Etienne,JAX,RB,252.86,14.87
6,Josh Jacobs,LV,RB,248.05,14.59
7,Jahmyr Gibbs,DET,RB,231.89,13.64
8,Austin Ekeler,LAC,RB,228.25,13.43
9,Joe Mixon,CIN,RB,222.91,13.11


Unnamed: 0,name,team,position,fantasy_2024_score_prediction,fantasy_2024_per_week_score_prediction
0,Travis Kelce,KC,TE,246.08,14.48
1,Sam LaPorta,DET,TE,208.9,12.29
2,George Kittle,SF,TE,187.58,11.03
3,Mark Andrews,BAL,TE,180.3,10.61
4,Dalton Kincaid,BUF,TE,161.38,9.49
5,Evan Engram,JAX,TE,157.95,9.29
6,T.J. Hockenson,MIN,TE,146.27,8.6
7,Kyle Pitts,ATL,TE,139.18,8.19
8,Jimmy Graham,NO,TE,138.09,8.12
9,Dallas Goedert,PHI,TE,134.59,7.92


# Creating csv files with results

In [51]:
# Saving all 524 players 2024 predictions data as a CSV file
fantasy_2024_predictions.to_csv('fantasy_football_2024_predictions.csv', index=False)

In [52]:
# Reorganizing columns and adding 2024 season column for concat of players past seasons fantasy scores data
fantasy_2024_predictions_trend = fantasy_2024_predictions.copy()
fantasy_2024_predictions_trend['season'] = 2024
fantasy_2024_predictions_trend = fantasy_2024_predictions_trend.rename(columns = {'fantasy_2024_score_prediction' : 'fantasy_points_ppr'})
fantasy_2024_predictions_trend = fantasy_2024_predictions_trend[['name','team', 'position','season','fantasy_points_ppr']]
fantasy_2024_predictions_trend.head()

Unnamed: 0,name,team,position,season,fantasy_points_ppr
273,Josh Allen,BUF,QB,2024,318.6
402,Puka Nacua,LA,WR,2024,318.36
288,Justin Herbert,LAC,QB,2024,308.04
289,Justin Jefferson,MIN,WR,2024,297.64
327,Lamar Jackson,BAL,QB,2024,293.24


In [53]:
# Reorganizing columns for concat of 2024 data
all_seasons_data_trend = all_seasons_data.copy()
all_seasons_data_trend = all_seasons_data_trend[['name','team', 'position','season','fantasy_points_ppr']]
all_seasons_data_trend.head()

Unnamed: 0,name,team,position,season,fantasy_points_ppr
0,A.J. Brown,TEN,WR,2019,217.1
1,A.J. Brown,TEN,WR,2020,247.5
2,A.J. Brown,TEN,WR,2021,180.9
3,A.J. Brown,PHI,WR,2022,299.6
4,A.J. Brown,PHI,WR,2023,289.6


In [54]:
# Concatinating players past season fantasy scores with the models 2024 score predictions
score_trends_df = pd.concat([all_seasons_data_trend, fantasy_2024_predictions_trend], axis=0)

# Reogranizing columns and sorting by 'name' and then 'season'
score_trends_df = score_trends_df[['name', 'team', 'position', 'season', 'fantasy_points_ppr' ]]
score_trends_df = score_trends_df.sort_values(['name', 'season']).reset_index(drop = True)

score_trends_df.head(21)

Unnamed: 0,name,team,position,season,fantasy_points_ppr
0,A.J. Brown,TEN,WR,2019,217.1
1,A.J. Brown,TEN,WR,2020,247.5
2,A.J. Brown,TEN,WR,2021,180.9
3,A.J. Brown,PHI,WR,2022,299.6
4,A.J. Brown,PHI,WR,2023,289.6
5,A.J. Brown,PHI,WR,2024,241.0
6,A.T. Perry,NO,WR,2023,60.6
7,A.T. Perry,NO,WR,2024,62.04
8,AJ McCarron,CIN,QB,2017,2.64
9,AJ McCarron,HOU,QB,2019,16.9


In [55]:
score_trends_df.to_csv('fantasy_football_players_career_scores.csv', index=False)