In [298]:
#dependencies and libraries
import numpy as np
import pandas as pd
import hvplot.pandas
from sklearn.preprocessing import MinMaxScaler
from numpy.random import seed
from tensorflow import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import glob
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score



%matplotlib inline



In [299]:
#import and consolidate CSVs of historical fanatasy football data from 1970 - 2019
root_directory = r"Resources"

all_files = glob.glob(f'{root_directory}\*.csv')

li = []
#apply year to correspnding data using CSV file name
for filename in all_files:
    frame = pd.read_csv(filename, index_col=None, header=0,)
    frame["Year"] = filename[10:14]
    li.append(frame)
#concatenate all 
df = pd.concat(li, axis=0, ignore_index=True)

In [300]:
#validate dataframe
df

Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingTD,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt
0,0,Ron Johnson,NYG,RB,23.0,14.0,14.0,0.0,0.0,0.0,...,0.0,0.0,1027.0,8.0,263.0,487.0,4.0,261.40,1970,
1,1,Dick Gordon,CHI,WR,26.0,14.0,14.0,0.0,0.0,0.0,...,0.0,0.0,17.0,0.0,4.0,1026.0,13.0,249.30,1970,
2,2,Gene Washington,SFO,WR,23.0,13.0,13.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1100.0,12.0,235.00,1970,
3,3,Gary Garrison,SDG,WR,26.0,14.0,14.0,0.0,0.0,0.0,...,0.0,0.0,7.0,0.0,4.0,1006.0,12.0,217.30,1970,
4,4,MacArthur Lane,STL,RB,28.0,14.0,14.0,0.0,0.0,0.0,...,0.0,0.0,977.0,11.0,206.0,365.0,2.0,240.20,1970,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26123,615,Ray-Ray McCloud,CAR,0,23.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.00,2019,0.0
26124,616,Darrius Shepherd,GNB,WR,24.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-0.90,2019,2.0
26125,617,Jarrett Stidham,NWE,QB,23.0,3.0,0.0,2.0,4.0,14.0,...,0.0,4.0,-2.0,0.0,2.0,0.0,0.0,-1.64,2019,0.0
26126,618,Michael Walker,JAX,WR,23.0,7.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,15.0,0.0,-0.50,2019,3.0


In [301]:
#initiate running back specific dataframe
rbdf=df.loc[df['Pos'] == 'RB']
rbdf  = rbdf.sort_values('G',ascending = False).groupby('Year').head(50)

rbdf

Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingTD,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt
11903,313,Charles Way,NYG,RB,23.0,16.0,4.0,0.0,0.0,0.0,...,0.0,0.0,6.0,0.0,2.0,76.0,1.0,21.2,1995,12.0
14541,313,Lorenzo Neal,TEN,RB,30.0,16.0,6.0,0.0,0.0,0.0,...,0.0,0.0,-2.0,0.0,1.0,31.0,2.0,23.9,2000,13.0
5862,390,Robert Newhouse,DAL,RB,33.0,16.0,0.0,0.0,0.0,0.0,...,0.0,0.0,34.0,0.0,9.0,0.0,0.0,-2.6,1983,
14592,364,Jerald Sowell,NYJ,RB,26.0,16.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,84.0,0.0,14.4,2000,7.0
5868,396,Rick Berns,RAI,RB,27.0,16.0,0.0,0.0,0.0,0.0,...,0.0,0.0,22.0,0.0,6.0,0.0,0.0,2.2,1983,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5046,34,James Wilder,TAM,RB,24.0,9.0,9.0,0.0,0.0,0.0,...,0.0,0.0,324.0,3.0,83.0,466.0,1.0,146.0,1982,
5037,25,Andra Franklin,MIA,RB,23.0,9.0,9.0,0.0,0.0,0.0,...,0.0,0.0,701.0,7.0,177.0,9.0,0.0,110.0,1982,
5145,133,Charles Alexander,CIN,RB,25.0,9.0,9.0,0.0,0.0,0.0,...,0.0,0.0,207.0,1.0,64.0,85.0,1.0,53.2,1982,
5151,139,Mel Carver,TAM,RB,23.0,9.0,1.0,0.0,0.0,0.0,...,0.0,0.0,229.0,1.0,70.0,46.0,1.0,39.5,1982,


In [302]:
#finding unique postions listed on the dataframe
df.Pos.unique()

array(['RB', 'WR', 'QB', 'TE', '0'], dtype=object)

In [303]:
#initiate wide receiver specific dataframe
wrdf=df.loc[df['Pos'] == 'WR']
wrdf  = wrdf.sort_values('G',ascending = False).groupby('Year').head(100)
wrdf


Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingTD,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt
25610,102,Emmanuel Sanders,2TM,WR,32.0,17.0,16.0,1.0,1.0,35.0,...,1.0,1.0,0.0,0.0,0.0,869.0,5.0,188.3,2019,97.0
16650,169,Jerry Rice,2TM,WR,42.0,17.0,14.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,429.0,3.0,90.9,2004,64.0
23377,241,Andre Holmes,OAK,WR,27.0,16.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,201.0,4.0,58.1,2015,33.0
8730,16,Anthony Miller,SDG,WR,24.0,16.0,16.0,0.0,0.0,0.0,...,0.0,0.0,21.0,0.0,4.0,1252.0,10.0,260.3,1989,
22428,17,Emmanuel Sanders,DEN,WR,27.0,16.0,16.0,0.0,0.0,0.0,...,0.0,0.0,44.0,0.0,8.0,1404.0,9.0,299.8,2014,141.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1081,328,Creston Whitaker,NOR,WR,25.0,2.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,6.0,0.0,1.6,1972,
651,267,Coleman Zeno,NYG,WR,25.0,2.0,1.0,0.0,0.0,0.0,...,0.0,0.0,10.0,0.0,2.0,97.0,0.0,13.7,1971,
693,309,Gordon Bowdell,DEN,WR,23.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,19.0,0.0,2.9,1971,
1926,410,Speedy Thomas,NOR,WR,27.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.3,1974,


In [304]:
#initiate quarterback specific dataframe
qbdf=df.loc[df['Pos'] == 'QB']
qbdf  = qbdf.sort_values('G',ascending = False).groupby('Year').head(25)
qbdf

Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingTD,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt
19485,78,Jay Cutler,CHI,QB,26.0,16.0,16.0,336.0,555.0,3666.0,...,27.0,555.0,173.0,1.0,40.0,0.0,0.0,223.94,2009,0.0
12713,81,Trent Dilfer,TAM,QB,25.0,16.0,16.0,217.0,386.0,2555.0,...,21.0,386.0,99.0,1.0,33.0,0.0,0.0,178.10,1997,0.0
3517,332,David Humm,OAK,QB,26.0,16.0,0.0,14.0,26.0,151.0,...,0.0,26.0,-4.0,0.0,5.0,0.0,0.0,1.64,1978,
9757,85,Jeff George,IND,QB,24.0,16.0,16.0,292.0,485.0,2910.0,...,10.0,485.0,36.0,0.0,16.0,0.0,0.0,120.00,1991,
9756,84,John Friesz,SDG,QB,24.0,16.0,16.0,262.0,487.0,2896.0,...,12.0,487.0,18.0,0.0,10.0,0.0,0.0,115.64,1991,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5024,12,Ken Anderson,CIN,QB,33.0,9.0,9.0,218.0,309.0,2495.0,...,12.0,309.0,85.0,4.0,25.0,0.0,0.0,160.30,1982,
5336,324,Steve Dils,MIN,QB,27.0,9.0,0.0,11.0,26.0,68.0,...,0.0,26.0,5.0,0.0,1.0,0.0,0.0,3.22,1982,
5144,132,Paul McDonald,CLE,QB,24.0,9.0,3.0,73.0,149.0,993.0,...,5.0,149.0,-13.0,0.0,7.0,0.0,0.0,36.42,1982,
5049,37,Joe Theismann,WAS,QB,33.0,9.0,9.0,161.0,252.0,2033.0,...,13.0,252.0,150.0,0.0,31.0,0.0,0.0,122.32,1982,


In [305]:
#initiate tight end specific dataframe
tedf=df.loc[df['Pos'] == 'TE']
tedf  = tedf.sort_values('G',ascending = False).groupby('Year').head(25)
tedf

Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingTD,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt
13517,369,Deems May,SEA,TE,29.0,16.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,7.0,1.0,9.7,1998,3.0
9503,301,Pete Metzelaars,BUF,TE,30.0,16.0,5.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,60.0,1.0,20.0,1990,
9548,346,Scott Galbraith,CLE,TE,23.0,16.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,62.0,0.0,10.2,1990,
9546,344,Pat Carter,RAM,TE,24.0,16.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,58.0,0.0,13.8,1990,
17951,299,Anthony Becht,TAM,TE,29.0,16.0,16.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,115.0,1.0,35.5,2006,26.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5088,76,Paul Coffman,GNB,TE,26.0,9.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,287.0,2.0,63.7,1982,
5186,174,Joe Rose,MIA,TE,25.0,9.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,182.0,2.0,46.2,1982,
5167,155,Mark Brammer,BUF,TE,24.0,9.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,225.0,2.0,59.5,1982,
5178,166,Joe Senser,MIN,TE,26.0,9.0,9.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,261.0,1.0,61.1,1982,


# Linear Regression Test For Quarterbacks: PassingTDs and FantasyScore

In [306]:
#plot QB Age to Fantasy Points Scored (top 25)
qbdf25  = qbdf.sort_values('GS',ascending = False)
qb_plot = qbdf25.hvplot.scatter(
    x="Age",
    y="FantasyPoints",
    title="Fantasy Points by Age"
)
qb_plot

In [307]:
#define X with statistical metrics (age, passing TD. rushing TD, receiving TD, passing yards, rushing yards, receiving yards)
X = qbdf[['Age','PassingTD','PassingYds','RushingTD','RushingYds','ReceivingTD','ReceivingYds']]
X[:5]

Unnamed: 0,Age,PassingTD,PassingYds,RushingTD,RushingYds,ReceivingTD,ReceivingYds
19485,26.0,27.0,3666.0,1.0,173.0,0.0,0.0
12713,25.0,21.0,2555.0,1.0,99.0,0.0,0.0
3517,26.0,0.0,151.0,0.0,-4.0,0.0,0.0
9757,24.0,10.0,2910.0,0.0,36.0,0.0,0.0
9756,24.0,12.0,2896.0,0.0,18.0,0.0,0.0


In [308]:
#define Y as fanatasy points scored 
y = qbdf[["FantasyPoints"]]
y[:5]

Unnamed: 0,FantasyPoints
19485,223.94
12713,178.1
3517,1.64
9757,120.0
9756,115.64


In [309]:
#define model as LinearRegression
model = LinearRegression()

In [310]:
#fit model
model.fit(X, y)

LinearRegression()

In [311]:
#print model's slope
print(f"Model's slope: {model.coef_}")

Model's slope: [[0.37867342 4.68788079 0.02814858 5.76685258 0.10263945 6.0463689
  0.23134158]]


In [312]:
#print model's y-intercept
print(f"Model's y-intercept: {model.intercept_}")

Model's y-intercept: [-23.24338471]


In [313]:
#print model's formula
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]}X")

Model's formula: y = [-23.24338471] + [0.37867342 4.68788079 0.02814858 5.76685258 0.10263945 6.0463689
 0.23134158]X


In [314]:
#define predicted y values (Fantasy Points Scored)
predicted_y_values = model.predict(X)

In [315]:
#copy QB predicted 
qbdf_predicted_fantasy_points = qbdf.copy()

In [316]:
#initialize QB predicted points dataframe
qbdf_predicted_fantasy_points["PredictedFantasyPoints"]=predicted_y_values
qbdf_predicted_fantasy_points.head()

Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt,PredictedFantasyPoints
19485,78,Jay Cutler,CHI,QB,26.0,16.0,16.0,336.0,555.0,3666.0,...,555.0,173.0,1.0,40.0,0.0,0.0,223.94,2009,0.0,239.891068
12713,81,Trent Dilfer,TAM,QB,25.0,16.0,16.0,217.0,386.0,2555.0,...,386.0,99.0,1.0,33.0,0.0,0.0,178.1,1997,0.0,172.516721
3517,332,David Humm,OAK,QB,26.0,16.0,0.0,14.0,26.0,151.0,...,26.0,-4.0,0.0,5.0,0.0,0.0,1.64,1978,,-9.557998
9757,85,Jeff George,IND,QB,24.0,16.0,16.0,292.0,485.0,2910.0,...,485.0,36.0,0.0,16.0,0.0,0.0,120.0,1991,,118.330966
9756,84,John Friesz,SDG,QB,24.0,16.0,16.0,262.0,487.0,2896.0,...,487.0,18.0,0.0,10.0,0.0,0.0,115.64,1991,,125.465138


In [317]:
#plot QB predicted fantasy points
qbdf_predicted_fantasy_points25  = qbdf_predicted_fantasy_points.sort_values('GS',ascending = False)
qb_best_fit_line = qbdf_predicted_fantasy_points25.hvplot.scatter(
    x = "Age",
    y = "PredictedFantasyPoints",
    color = "red"
)
qb_best_fit_line

In [318]:
#combine QB fantasy points by age and best fit
qb_plot * qb_best_fit_line

In [319]:
#finding average fantasy points and predicted fantasy points per age 
mean_fp_qb=qbdf.groupby(['Age'])['FantasyPoints'].mean().hvplot()
mean_pfp_qb=qbdf_predicted_fantasy_points.groupby(['Age'])['PredictedFantasyPoints'].mean().hvplot(size=600)
mean_fp_qb*mean_pfp_qb

In [320]:
#testing model accuracy 
score = model.score(X, y, sample_weight=None)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print releveant metrics.
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")
def mae(actual, predicted):
    return np.mean(np.abs(actual-predicted))

print(f'Mean Absolute Error is {mae(y,predicted_y_values)}.')

The score is 0.979129863588066.
The r2 is 0.979129863588066.
The mean squared error is 178.3092210580734.
The root mean squared error is 13.353247584691651.
The standard deviation is FantasyPoints    92.432406
dtype: float64.
Mean Absolute Error is FantasyPoints    10.807116
dtype: float64.


# Linear Regression Test For Running Backs: PassingTDs and FantasyScore

In [321]:
#plot RB Age to Fantasy Points Scored (top 25)
rbdf25  = rbdf.sort_values('GS',ascending = False)
rb_plot = rbdf25.hvplot.scatter(
    x="Age",
    y="FantasyPoints",
    title="Fantasy Points by Age"
)
rb_plot

In [322]:
#define X with statistical metrics (age, passing TD. rushing TD, receiving TD, passing yards, rushing yards, receiving yards)
X = rbdf[['Age','PassingTD','PassingYds','RushingTD','RushingYds','ReceivingTD','ReceivingYds','RushingAtt']]
X[:5]

Unnamed: 0,Age,PassingTD,PassingYds,RushingTD,RushingYds,ReceivingTD,ReceivingYds,RushingAtt
11903,23.0,0.0,0.0,0.0,6.0,1.0,76.0,2.0
14541,30.0,0.0,0.0,0.0,-2.0,2.0,31.0,1.0
5862,33.0,0.0,0.0,0.0,34.0,0.0,0.0,9.0
14592,26.0,0.0,0.0,0.0,0.0,0.0,84.0,2.0
5868,27.0,0.0,0.0,0.0,22.0,0.0,0.0,6.0


In [323]:
#define Y as fanatasy points scored 
y = rbdf[["FantasyPoints"]]
y[:5]

Unnamed: 0,FantasyPoints
11903,21.2
14541,23.9
5862,-2.6
14592,14.4
5868,2.2


In [324]:
#define model as LinearRegression
model = LinearRegression()

#fit model
model.fit(X, y)

#print model metrics (slope, y-intercept, formula)
print(f"Model's slope: {model.coef_}")
print(f"Model's y-intercept: {model.intercept_}")
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]}X")

Model's slope: [[ 3.09784073e-01  4.71131574e+00 -6.59220116e-02  5.98254169e+00
   9.79300625e-02  5.61704859e+00  2.01975368e-01  9.74312491e-04]]
Model's y-intercept: [-7.34364265]
Model's formula: y = [-7.34364265] + [ 3.09784073e-01  4.71131574e+00 -6.59220116e-02  5.98254169e+00
  9.79300625e-02  5.61704859e+00  2.01975368e-01  9.74312491e-04]X


In [325]:
#define predicted y values (Fantasy Points Scored)
predicted_y_values = model.predict(X)
#copy RB predicted 
rbdf_predicted_fantasy_points = rbdf.copy()
#initialize QB predicted points dataframe
rbdf_predicted_fantasy_points["PredictedFantasyPoints"]=predicted_y_values
rbdf_predicted_fantasy_points.head()

Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt,PredictedFantasyPoints
11903,313,Charles Way,NYG,RB,23.0,16.0,4.0,0.0,0.0,0.0,...,0.0,6.0,0.0,2.0,76.0,1.0,21.2,1995,12.0,21.338097
14541,313,Lorenzo Neal,TEN,RB,30.0,16.0,6.0,0.0,0.0,0.0,...,0.0,-2.0,0.0,1.0,31.0,2.0,23.9,2000,13.0,19.250327
5862,390,Robert Newhouse,DAL,RB,33.0,16.0,0.0,0.0,0.0,0.0,...,0.0,34.0,0.0,9.0,0.0,0.0,-2.6,1983,,6.217623
14592,364,Jerald Sowell,NYJ,RB,26.0,16.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,84.0,0.0,14.4,2000,7.0,17.678623
5868,396,Rick Berns,RAI,RB,27.0,16.0,0.0,0.0,0.0,0.0,...,0.0,22.0,0.0,6.0,0.0,0.0,2.2,1983,,3.180835


In [326]:
#plot RB predicted fantasy points
rbdf_predicted_fantasy_points25  = rbdf_predicted_fantasy_points.sort_values('GS',ascending = False)
rb_best_fit_line = rbdf_predicted_fantasy_points25.hvplot.scatter(
    x = "Age",
    y = "PredictedFantasyPoints",
    color = "red"
)
rb_best_fit_line

In [327]:
#combine RB fantasy points by age and best fit
rb_plot * rb_best_fit_line

In [328]:
#finding average fantasy points and predicted fantasy points per age 
mean_fp_rb=rbdf.groupby(['Age'])['FantasyPoints'].mean().hvplot()
mean_pfp_rb=rbdf_predicted_fantasy_points.groupby(['Age'])['PredictedFantasyPoints'].mean().hvplot()
mean_fp_rb*mean_pfp_rb

In [329]:
#testing model accuracy 
score = model.score(X, y, sample_weight=None)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print releveant metrics.
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")
def mae(actual, predicted):
    return np.mean(np.abs(actual-predicted))

print(f'Mean Absolute Error is {mae(y,predicted_y_values)}.')

The score is 0.9938831788850169.
The r2 is 0.9938831788850169.
The mean squared error is 45.63858108096353.
The root mean squared error is 6.755633284967704.
The standard deviation is FantasyPoints    86.378007
dtype: float64.
Mean Absolute Error is FantasyPoints    4.803457
dtype: float64.


# Linear Regression Test For Wide Receivers: FantasyScore


In [330]:
#plot RB Age to Fantasy Points Scored (top 25)
wrdf25  = wrdf.sort_values('G',ascending = False)
wr_plot = wrdf25.hvplot.scatter(
    x="Age",
    y="FantasyPoints",
    title="Fantasy Points by Age"
)
wr_plot

In [331]:
#define X with statistical metrics (age, passing TD. rushing TD, receiving TD, passing yards, rushing yards, receiving yards)
X = wrdf[['Age','PassingTD','PassingYds','RushingTD','RushingYds','ReceivingTD','ReceivingYds']]
X[:5]

Unnamed: 0,Age,PassingTD,PassingYds,RushingTD,RushingYds,ReceivingTD,ReceivingYds
25610,32.0,1.0,35.0,0.0,0.0,5.0,869.0
16650,42.0,0.0,0.0,0.0,0.0,3.0,429.0
23377,27.0,0.0,0.0,0.0,0.0,4.0,201.0
8730,24.0,0.0,0.0,0.0,21.0,10.0,1252.0
22428,27.0,0.0,0.0,0.0,44.0,9.0,1404.0


In [332]:
#define Y as fanatasy points scored 
y = wrdf[["FantasyPoints"]]
y[:5]

Unnamed: 0,FantasyPoints
25610,188.3
16650,90.9
23377,58.1
8730,260.3
22428,299.8


In [333]:
#define model as LinearRegression
model = LinearRegression()
#fit model
model.fit(X, y)
#print model metrics (slope, y-intercept, formula)
print(f"Model's slope: {model.coef_}")
print(f"Model's y-intercept: {model.intercept_}")
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]}X")

Model's slope: [[0.39732278 8.47905274 0.01289346 5.5932665  0.11835292 5.60399599
  0.16787561]]
Model's y-intercept: [-9.6528335]
Model's formula: y = [-9.6528335] + [0.39732278 8.47905274 0.01289346 5.5932665  0.11835292 5.60399599
 0.16787561]X


In [334]:
#define predicted y values (Fantasy Points Scored)
predicted_y_values = model.predict(X)
#copy WR predicted 
wrdf_predicted_fantasy_points = wrdf.copy()
#initialize QB predicted points dataframe
wrdf_predicted_fantasy_points["PredictedFantasyPoints"]=predicted_y_values
wrdf_predicted_fantasy_points.head()

Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt,PredictedFantasyPoints
25610,102,Emmanuel Sanders,2TM,WR,32.0,17.0,16.0,1.0,1.0,35.0,...,1.0,0.0,0.0,0.0,869.0,5.0,188.3,2019,97.0,185.8957
16650,169,Jerry Rice,2TM,WR,42.0,17.0,14.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,429.0,3.0,90.9,2004,64.0,95.865346
23377,241,Andre Holmes,OAK,WR,27.0,16.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,201.0,4.0,58.1,2015,33.0,57.233862
8730,16,Anthony Miller,SDG,WR,24.0,16.0,16.0,0.0,0.0,0.0,...,0.0,21.0,0.0,4.0,1252.0,10.0,260.3,1989,,268.588542
22428,17,Emmanuel Sanders,DEN,WR,27.0,16.0,16.0,0.0,0.0,0.0,...,0.0,44.0,0.0,8.0,1404.0,9.0,299.8,2014,141.0,292.415724


In [335]:
#plot WR predicted fantasy points
wrdf_predicted_fantasy_points25  = wrdf_predicted_fantasy_points.sort_values('G',ascending = False)
wr_best_fit_line = wrdf_predicted_fantasy_points25.hvplot.scatter(
    x = "Age",
    y = "PredictedFantasyPoints",
    color = "red"
)
wr_best_fit_line

In [336]:
#finding average fantasy points and predicted fantasy points per age 
mean_fp_wr=wrdf.groupby(['Age'])['FantasyPoints'].mean().hvplot()
mean_pfp_wr=wrdf_predicted_fantasy_points.groupby(['Age'])['PredictedFantasyPoints'].mean().hvplot()
mean_fp_wr*mean_pfp_wr

In [337]:
#combine WR fantasy points by age and best fit
wr_plot * wr_best_fit_line

In [338]:
#testing model accuracy 
score = model.score(X, y, sample_weight=None)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print releveant metrics.
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")
def mae(actual, predicted):
    return np.mean(np.abs(actual-predicted))

print(f'Mean Absolute Error is {mae(y,predicted_y_values)}.')


The score is 0.9897925120861412.
The r2 is 0.9897925120861412.
The mean squared error is 65.1146407165154.
The root mean squared error is 8.069364331625843.
The standard deviation is FantasyPoints    79.869302
dtype: float64.
Mean Absolute Error is FantasyPoints    5.809617
dtype: float64.


# Linear Regression Test For Tight End: FantasyScore


In [339]:
#plot TE Age to Fantasy Points Scored (top 25)
tedf25  = tedf.sort_values('GS',ascending = False)
te_plot = tedf25.hvplot.scatter(
    x="Age",
    y="FantasyPoints",
    title="Fantasy Points by Age"
)
te_plot

In [340]:
#define X with statistical metrics (age, passing TD. rushing TD, receiving TD, passing yards, rushing yards, receiving yards)
X = tedf[['Age','ReceivingTD','ReceivingYds']]
X[:5]

Unnamed: 0,Age,ReceivingTD,ReceivingYds
13517,29.0,1.0,7.0
9503,30.0,1.0,60.0
9548,23.0,0.0,62.0
9546,24.0,0.0,58.0
17951,29.0,1.0,115.0


In [341]:
#define Y as fanatasy points scored 
y = tedf[["FantasyPoints"]]
y[:5]

Unnamed: 0,FantasyPoints
13517,9.7
9503,20.0
9548,10.2
9546,13.8
17951,35.5


In [342]:
#define model as LinearRegression
model = LinearRegression()
#fit model
model.fit(X, y)
#print model metrics (slope, y-intercept, formula)
print(f"Model's slope: {model.coef_}")
print(f"Model's y-intercept: {model.intercept_}")
print(f"Model's formula: y = {model.intercept_} + {model.coef_[0]}X")

Model's slope: [[0.23539238 6.1468345  0.17554374]]
Model's y-intercept: [-3.97364547]
Model's formula: y = [-3.97364547] + [0.23539238 6.1468345  0.17554374]X


In [343]:
#define predicted y values (Fantasy Points Scored)
predicted_y_values = model.predict(X)
#copy TE predicted 
tedf_predicted_fantasy_points = tedf.copy()
#initialize QB predicted points dataframe
tedf_predicted_fantasy_points["PredictedFantasyPoints"]=predicted_y_values
tedf_predicted_fantasy_points.head()

Unnamed: 0.1,Unnamed: 0,Player,Tm,Pos,Age,G,GS,Cmp,Att,Yds,...,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints,Year,Tgt,PredictedFantasyPoints
13517,369,Deems May,SEA,TE,29.0,16.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,7.0,1.0,9.7,1998,3.0,10.228374
9503,301,Pete Metzelaars,BUF,TE,30.0,16.0,5.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,60.0,1.0,20.0,1990,,19.767585
9548,346,Scott Galbraith,CLE,TE,23.0,16.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,62.0,0.0,10.2,1990,,12.324091
9546,344,Pat Carter,RAM,TE,24.0,16.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,58.0,0.0,13.8,1990,,11.857308
17951,299,Anthony Becht,TAM,TE,29.0,16.0,16.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,115.0,1.0,35.5,2006,26.0,29.187098


In [344]:
#plot TE predicted fantasy points
tedf_predicted_fantasy_points25  = tedf_predicted_fantasy_points.sort_values('GS',ascending = False)
te_best_fit_line = tedf_predicted_fantasy_points25.hvplot.scatter(
    x = "Age",
    y = "PredictedFantasyPoints",
    color = "red"
)
te_best_fit_line

In [345]:
#combine TE fantasy points by age and best fit
te_plot * te_best_fit_line

In [346]:
#finding average fantasy points and predicted fantasy points per age 
mean_fp_te=tedf.groupby(['Age'])['FantasyPoints'].mean().hvplot()
mean_pfp_te=tedf_predicted_fantasy_points.groupby(['Age'])['PredictedFantasyPoints'].mean().hvplot()
mean_fp_te*mean_pfp_te

In [347]:
#testing model accuracy 
score = model.score(X, y, sample_weight=None)
r2 = r2_score(y, predicted_y_values)
mse = mean_squared_error(y, predicted_y_values)
rmse = np.sqrt(mse)
std = np.std(y)

# Print releveant metrics.
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")
print(f"The standard deviation is {std}.")


#def adj_r_squared(X, actual, predicted): #X is your training dataset
    
#    numerator = 1 - (1 - r2) * len(actual)-1
#    print(len(actual))
#    denominator = len(actual) - X.shape[1]-1 #X.shape[1] will give number of independent variables
#    print(X.shape[1])
#    return numerator/denominator

#print(adj_r_squared(X,y,predicted_y_values))

def mae(actual, predicted):
    return np.mean(np.abs(actual-predicted))

print(f'Mean Absolute Error is {mae(y,predicted_y_values)}.')

The score is 0.989356714202088.
The r2 is 0.989356714202088.
The mean squared error is 32.39875195741824.
The root mean squared error is 5.691990157881357.
The standard deviation is FantasyPoints    55.172959
dtype: float64.
Mean Absolute Error is FantasyPoints    3.925191
dtype: float64.
