In [63]:
# Import required libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [64]:
data = pd.read_csv('training_data.csv')
data.head()

Unnamed: 0,name,team,position,games,receptions,targets,receiving_yards,rec_ypg,receiving_tds,ypr,...,sacks,sack_fumbles,offense_snaps,teams_offense_snaps,round,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr
0,A.J. Brown,TEN,WR,16,52,84,1051.0,65.69,8,20.21,...,0.0,0,678.0,997.0,2.0,51.0,Nissan Stadium,moderate,Grass,217.1
1,A.J. Brown,TEN,WR,14,70,106,1075.0,76.79,11,15.36,...,0.0,0,760.0,945.0,2.0,51.0,Nissan Stadium,moderate,Grass,247.5
2,A.J. Brown,TEN,WR,13,63,105,869.0,66.85,5,13.79,...,0.0,0,604.0,876.0,2.0,51.0,Nissan Stadium,moderate,Grass,180.9
3,A.J. Brown,PHI,WR,17,88,145,1496.0,88.0,11,17.0,...,0.0,0,1004.0,1189.0,2.0,51.0,Lincoln Financial Field,cold,Grass,299.6
4,A.J. Brown,PHI,WR,17,106,158,1456.0,85.65,7,13.74,...,0.0,0,1019.0,1154.0,2.0,51.0,Lincoln Financial Field,cold,Grass,289.6


In [65]:
data.columns

Index(['name', 'team', 'position', 'games', 'receptions', 'targets',
       'receiving_yards', 'rec_ypg', 'receiving_tds', 'ypr',
       'receiving_fumbles', 'receiving_2pt_conversions', 'target_share',
       'air_yards_share', 'carries', 'rushing_yards', 'rush_ypg',
       'rushing_tds', 'rush_td_percentage', 'rushing_fumbles',
       'rushing_2pt_conversions', 'completions', 'attempts', 'comp_percentage',
       'passing_yards', 'pass_ypg', 'passing_tds', 'passing_2pt_conversions',
       'td_percentage', 'interceptions', 'sacks', 'sack_fumbles',
       'offense_snaps', 'teams_offense_snaps', 'round', 'overall',
       'stadium_name', 'stadium_weather_type', 'stadium_surface',
       'fantasy_points_ppr'],
      dtype='object')

In [66]:
data = data.drop(columns = ['name', 'team', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_2pt_conversions', 
                            'rushing_yards', 'rushing_tds', 'rushing_2pt_conversions', 'completions', 'passing_yards', 
                            'passing_tds', 'interceptions'])
data

Unnamed: 0,position,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,...,sacks,sack_fumbles,offense_snaps,teams_offense_snaps,round,overall,stadium_name,stadium_weather_type,stadium_surface,fantasy_points_ppr
0,WR,16,84,65.69,20.21,1.0,0.20,0.29,3,3.75,...,0.0,0,678.0,997.0,2.0,51.0,Nissan Stadium,moderate,Grass,217.10
1,WR,14,106,76.79,15.36,2.0,0.27,0.35,0,0.00,...,0.0,0,760.0,945.0,2.0,51.0,Nissan Stadium,moderate,Grass,247.50
2,WR,13,105,66.85,13.79,0.0,0.28,0.45,2,0.77,...,0.0,0,604.0,876.0,2.0,51.0,Nissan Stadium,moderate,Grass,180.90
3,WR,17,145,88.00,17.00,2.0,0.29,0.41,0,0.00,...,0.0,0,1004.0,1189.0,2.0,51.0,Lincoln Financial Field,cold,Grass,299.60
4,WR,17,158,85.65,13.74,2.0,0.30,0.42,0,0.00,...,0.0,0,1019.0,1154.0,2.0,51.0,Lincoln Financial Field,cold,Grass,289.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2101,WR,16,102,40.75,11.64,1.0,0.21,0.26,1,0.00,...,0.0,0,941.0,1063.0,2.0,37.0,New Era Field,cold,FieldTurf,165.20
2102,WR,11,20,14.00,11.00,0.0,0.05,0.04,0,0.00,...,0.0,0,248.0,771.0,2.0,37.0,Allegiant Stadium,indoor,Grass,36.56
2103,WR,15,70,36.40,11.62,1.0,0.14,0.24,2,0.20,...,0.0,0,614.0,1026.0,2.0,37.0,Allegiant Stadium,indoor,Grass,105.90
2104,WR,16,121,51.44,10.04,0.0,0.22,0.26,4,1.12,...,0.0,0,930.0,1082.0,2.0,37.0,TIAA Bank Field,warm,Grass,198.10


In [67]:
# Encoding categorical columns for model
encoded_data = pd.get_dummies(data)
encoded_data

Unnamed: 0,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,rush_td_percentage,...,stadium_name_Soldier Field,stadium_name_State Farm Stadium,stadium_name_TIAA Bank Field,stadium_name_U.S. Bank Stadium,stadium_weather_type_cold,stadium_weather_type_indoor,stadium_weather_type_moderate,stadium_weather_type_warm,stadium_surface_FieldTurf,stadium_surface_Grass
0,16,84,65.69,20.21,1.0,0.20,0.29,3,3.75,0.33,...,False,False,False,False,False,False,True,False,False,True
1,14,106,76.79,15.36,2.0,0.27,0.35,0,0.00,0.00,...,False,False,False,False,False,False,True,False,False,True
2,13,105,66.85,13.79,0.0,0.28,0.45,2,0.77,0.00,...,False,False,False,False,False,False,True,False,False,True
3,17,145,88.00,17.00,2.0,0.29,0.41,0,0.00,0.00,...,False,False,False,False,True,False,False,False,False,True
4,17,158,85.65,13.74,2.0,0.30,0.42,0,0.00,0.00,...,False,False,False,False,True,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2101,16,102,40.75,11.64,1.0,0.21,0.26,1,0.00,0.00,...,False,False,False,False,True,False,False,False,True,False
2102,11,20,14.00,11.00,0.0,0.05,0.04,0,0.00,0.00,...,False,False,False,False,False,True,False,False,False,True
2103,15,70,36.40,11.62,1.0,0.14,0.24,2,0.20,0.00,...,False,False,False,False,False,True,False,False,False,True
2104,16,121,51.44,10.04,0.0,0.22,0.26,4,1.12,0.00,...,False,False,True,False,False,False,False,True,False,True


In [68]:
# Defining y (target variable)
y = encoded_data['fantasy_points_ppr']

# Defining X (features)
X = encoded_data.drop(columns = 'fantasy_points_ppr')

X.shape

(2106, 63)

In [69]:
# Spliting model into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=42)

In [70]:
lr_model = LinearRegression()

In [71]:
lr_model.fit(X_train, y_train)

In [72]:
intercept = lr_model.intercept_
intercept

-9.038865919951547

In [73]:
m = lr_model.coef_
m

array([ 1.08175753e+00,  1.44975250e+00,  1.64774209e+00, -7.53834106e-01,
       -3.39064198e-01, -2.74178224e+02, -9.59526852e+00,  5.10205193e-01,
        5.84696326e-01,  2.01562709e+01,  1.59880565e+00,  4.55855109e-01,
        3.43177297e+00,  4.84189756e-02,  5.38977260e+00,  6.56244520e+01,
       -4.54421452e-01, -2.86351604e+00,  2.97876185e-03, -7.87182259e-03,
       -6.01545745e+00,  1.81662892e-01, -8.36461804e+00, -3.65299175e+00,
        7.43564424e+00,  4.58196555e+00,  4.57554709e-01, -1.23873546e+00,
        4.42967909e+00, -9.93650118e+00,  8.26452153e+00, -7.16446075e-01,
       -5.03282130e+00, -3.57196566e+00,  2.56803548e+00,  1.12149078e+00,
       -1.62189031e+00,  2.57772145e+00, -5.42276989e+00,  5.21336165e+00,
        8.54334462e-01,  3.91101178e+00,  2.18229187e+00, -4.20790762e+00,
        2.81323449e+00, -4.86153936e+00,  3.64649427e+00, -7.60233372e+00,
        3.44963017e+00,  4.16506708e+00,  4.33206999e+00, -5.20663852e-01,
       -9.63744649e-01, -

In [74]:
predictions = lr_model.predict(X_train)

In [75]:
len(predictions)

1579

In [76]:
r2_score(y_train, predictions)

0.9649431595700579

In [77]:
mse = mean_squared_error(y_train, predictions)
mse

335.86987759548003

In [78]:
prediction_data = pd.read_csv('predicting_data.csv')
prediction_data.head()

Unnamed: 0,name,team,position,games,receptions,targets,receiving_yards,rec_ypg,receiving_tds,ypr,...,interceptions,sacks,sack_fumbles,offense_snaps,teams_offense_snaps,round,overall,stadium_name,stadium_weather_type,stadium_surface
0,A.J. Brown,PHI,WR,15.4,75.8,119.6,1189.4,76.596,8.4,16.02,...,0.0,0.0,0.0,813.0,1032.2,2.0,51.0,Lincoln Financial Field,cold,Grass
1,A.T. Perry,NO,WR,8.0,12.0,18.0,246.0,30.75,4.0,20.5,...,0.0,0.0,0.0,319.0,519.0,6.0,195.0,Mercedes-Benz Superdome,indoor,FieldTurf
2,AJ McCarron,CIN,QB,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.25,2.0,0.0,0.0,0.0,0.0,0.0,Paul Brown Stadium,cold,FieldTurf
3,Aaron Jones,GB,RB,13.571429,38.857143,52.0,296.571429,20.691429,2.571429,7.08,...,0.0,0.0,0.0,482.285714,890.285714,5.0,182.0,Lambeau Field,cold,Grass
4,Aaron Rodgers,NYJ,QB,13.272727,0.181818,0.181818,-0.909091,-0.057273,0.0,-0.909091,...,5.363636,29.090909,3.363636,847.636364,891.181818,1.0,24.0,MetLife Stadium,cold,FieldTurf


In [79]:
prediction_data_cleaned.columns

Index(['position', 'games', 'receptions', 'targets', 'receiving_yards',
       'rec_ypg', 'receiving_tds', 'ypr', 'receiving_fumbles',
       'receiving_2pt_conversions', 'target_share', 'air_yards_share',
       'carries', 'rushing_yards', 'rush_ypg', 'rushing_tds',
       'rush_td_percentage', 'rushing_fumbles', 'rushing_2pt_conversions',
       'completions', 'attempts', 'comp_percentage', 'passing_yards',
       'pass_ypg', 'passing_tds', 'passing_2pt_conversions', 'td_percentage',
       'interceptions', 'sacks', 'sack_fumbles', 'offense_snaps',
       'teams_offense_snaps', 'round', 'overall', 'stadium_weather_type',
       'stadium_surface'],
      dtype='object')

In [84]:
prediction_data_cleaned = prediction_data.drop(columns = ['name', 'team', 'receptions', 'receiving_yards', 'receiving_tds', 'receiving_2pt_conversions', 
                            'rushing_yards', 'rushing_tds', 'rushing_2pt_conversions', 'completions', 'passing_yards', 
                            'passing_tds', 'interceptions'])
prediction_data_cleaned.head()

Unnamed: 0,position,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,...,td_percentage,sacks,sack_fumbles,offense_snaps,teams_offense_snaps,round,overall,stadium_name,stadium_weather_type,stadium_surface
0,WR,15.4,119.6,76.596,16.02,1.4,0.268,0.384,1.0,0.904,...,0.116,0.0,0.0,813.0,1032.2,2.0,51.0,Lincoln Financial Field,cold,Grass
1,WR,8.0,18.0,30.75,20.5,0.0,0.07,0.16,0.0,0.0,...,0.33,0.0,0.0,319.0,519.0,6.0,195.0,Mercedes-Benz Superdome,indoor,FieldTurf
2,QB,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.25,4.875,...,0.005,2.0,0.0,0.0,0.0,0.0,0.0,Paul Brown Stadium,cold,FieldTurf
3,RB,13.571429,52.0,20.691429,7.08,0.857143,0.115714,0.007143,168.142857,61.561429,...,0.042857,0.0,0.0,482.285714,890.285714,5.0,182.0,Lambeau Field,cold,Grass
4,QB,13.272727,0.181818,-0.057273,-0.909091,0.0,0.0,0.0,37.818182,12.918182,...,0.052727,29.090909,3.363636,847.636364,891.181818,1.0,24.0,MetLife Stadium,cold,FieldTurf


In [85]:
# Encoding categorical columns for model
encoded_prediction_data = pd.get_dummies(prediction_data_cleaned)
encoded_prediction_data

Unnamed: 0,games,targets,rec_ypg,ypr,receiving_fumbles,target_share,air_yards_share,carries,rush_ypg,rush_td_percentage,...,stadium_name_Soldier Field,stadium_name_State Farm Stadium,stadium_name_TIAA Bank Field,stadium_name_U.S. Bank Stadium,stadium_weather_type_cold,stadium_weather_type_indoor,stadium_weather_type_moderate,stadium_weather_type_warm,stadium_surface_FieldTurf,stadium_surface_Grass
0,15.400000,119.600000,76.596000,16.020000,1.400000,0.268000,0.384000,1.000000,0.904000,0.066000,...,False,False,False,False,True,False,False,False,False,True
1,8.000000,18.000000,30.750000,20.500000,0.000000,0.070000,0.160000,0.000000,0.000000,0.000000,...,False,False,False,False,False,True,False,False,True,False
2,2.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.250000,4.875000,0.050000,...,False,False,False,False,True,False,False,False,True,False
3,13.571429,52.000000,20.691429,7.080000,0.857143,0.115714,0.007143,168.142857,61.561429,0.037143,...,False,False,False,False,True,False,False,False,False,True
4,13.272727,0.181818,-0.057273,-0.909091,0.000000,0.000000,0.000000,37.818182,12.918182,0.036364,...,False,False,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519,11.333333,0.666667,0.073333,0.666667,0.000000,0.000000,0.003333,31.000000,14.380000,0.060000,...,False,False,False,False,True,False,False,False,True,False
520,12.000000,23.000000,9.417500,6.367500,0.500000,0.050000,0.002500,116.750000,41.467500,0.030000,...,False,False,False,False,False,True,False,False,True,False
521,12.000000,9.500000,3.770000,3.265000,0.000000,0.020000,0.000000,60.500000,20.525000,0.005000,...,False,False,False,False,False,True,False,False,False,True
522,16.000000,108.000000,53.620000,11.140000,0.000000,0.240000,0.240000,8.000000,3.500000,0.120000,...,False,False,False,False,False,False,True,False,True,False


In [86]:
list_of_lists = encoded_prediction_data.values.tolist()  # Get a list of rows
test = list_of_lists[5]

real_predictions = lr_model.predict([test])
real_predictions



array([160.05047641])

In [87]:
names_list = prediction_data['name'].tolist()

In [88]:
# Assuming encoded_prediction_data has data
list_of_lists = encoded_prediction_data.values.tolist()  # Get a list of rows

# Initialize an empty list to store all predictions
all_predictions = []

# Loop through each index from 0 to 524 (inclusive)
for index in range(524):
  # Get the data for the current index
  data_for_prediction = list_of_lists[index]

  # Make prediction using your model
  prediction = lr_model.predict([data_for_prediction])

  # Append the prediction to the all_predictions list
  all_predictions.append(prediction)  # Assuming single prediction per row

# Print or use the all_predictions list for further analysis
all_predictions



[array([226.10292925]),
 array([65.98481716]),
 array([0.03570406]),
 array([202.48393255]),
 array([221.97106862]),
 array([160.05047641]),
 array([46.95854051]),
 array([150.79324548]),
 array([32.50862276]),
 array([111.90751406]),
 array([3.57939838]),
 array([41.66119988]),
 array([111.70721244]),
 array([88.1652287]),
 array([155.72053547]),
 array([269.63377875]),
 array([218.12812875]),
 array([2.38917698]),
 array([57.35587889]),
 array([259.75509747]),
 array([47.96227359]),
 array([36.88734318]),
 array([186.60708472]),
 array([-2.73414523]),
 array([70.66730938]),
 array([-10.04614775]),
 array([183.80535597]),
 array([228.21484421]),
 array([104.00213774]),
 array([-9.48039665]),
 array([69.10299984]),
 array([207.75941753]),
 array([24.3161922]),
 array([54.66234133]),
 array([259.90921749]),
 array([43.96403304]),
 array([21.71938536]),
 array([68.96144622]),
 array([70.03329471]),
 array([201.89737798]),
 array([195.24878866]),
 array([33.36364262]),
 array([31.14026748

In [89]:
fantasy_2024_predictions = pd.DataFrame(list(zip(names_list, all_predictions)), columns = ['name', '2024_Fantasy_Prediction'])
fantasy_2024_predictions = fantasy_2024_predictions.merge(prediction_data, how='left', on='name')
fantasy_2024_predictions = fantasy_2024_predictions[['name', 'team', 'position', '2024_Fantasy_Prediction']]
fantasy_2024_predictions

Unnamed: 0,name,team,position,2024_Fantasy_Prediction
0,A.J. Brown,PHI,WR,[226.102929247163]
1,A.T. Perry,NO,WR,[65.98481715944158]
2,AJ McCarron,CIN,QB,[0.03570406190679343]
3,Aaron Jones,GB,RB,[202.48393255108954]
4,Aaron Rodgers,NYJ,QB,[221.97106861857446]
...,...,...,...,...
519,Zach Wilson,NYJ,QB,[144.77501905484294]
520,Zack Moss,IND,RB,[104.60179728069548]
521,Zamir White,LV,RB,[44.91425685462852]
522,Zay Flowers,BAL,WR,[193.4077139795765]


In [90]:
mse = mean_squared_error(y_test, all_predictions)
mse

ValueError: Found input variables with inconsistent numbers of samples: [527, 524]

In [92]:
r2_score(y_train, real_predictions)

ValueError: Found input variables with inconsistent numbers of samples: [1579, 524]