In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
##Read in CSVs reformat to keep player ID consistent across data
qb_2018_base_df = pd.read_csv("QB_stats_base/fantasy-football-leaders2018.csv")
qb_2018_extra_df = pd.read_csv("QB_stats_extra/advanced-qb-metrics2018.csv").rename(columns={'PlayerPlayerId': 'PlayerID'})
qb_2018_extra_df = qb_2018_extra_df.drop(columns = ["PlayerShortName"], axis = 1)
qb_2019_base_df = pd.read_csv("QB_stats_base/fantasy-football-leaders2019.csv")
qb_2019_extra_df = pd.read_csv("QB_stats_extra/advanced-qb-metrics2019.csv").rename(columns={'PlayerPlayerId': 'PlayerID'})
qb_2019_extra_df = qb_2019_extra_df.drop(columns = ["PlayerShortName"], axis = 1)
qb_2020_base_df = pd.read_csv("QB_stats_base/fantasy-football-leaders2020.csv")
qb_2020_extra_df = pd.read_csv("QB_stats_extra/advanced-qb-metrics2020.csv").rename(columns={'PlayerPlayerId': 'PlayerID'})
qb_2020_extra_df = qb_2020_extra_df.drop(columns = ["PlayerShortName"], axis = 1)
qb_2021_base_df = pd.read_csv("QB_stats_base/fantasy-football-leaders2020.csv")
qb_2021_extra_df = pd.read_csv("QB_stats_extra/advanced-qb-metrics2020.csv").rename(columns={'PlayerPlayerId': 'PlayerID'})
qb_2021_extra_df = qb_2021_extra_df.drop(columns = ["PlayerShortName"], axis = 1)

#Remove Duplicate Columns between the extra stats df and base df
diff_cols = qb_2018_extra_df.columns.difference(qb_2018_base_df.columns)
diff_cols = diff_cols.union(['PlayerID'])

qb_2018_extra_df = qb_2018_extra_df[diff_cols]
qb_2019_extra_df = qb_2019_extra_df[diff_cols]
qb_2020_extra_df = qb_2020_extra_df[diff_cols]
qb_2021_extra_df = qb_2021_extra_df[diff_cols]

In [3]:
#Creating df with target to merge with provided DF
qb_2019_target_df = qb_2019_base_df.loc[:,["PlayerID","FantasyPoints"]]
qb_2019_target_names_df = qb_2019_base_df.loc[:,["PlayerID","Name"]]
qb_2020_target_df = qb_2020_base_df.loc[:,["PlayerID","FantasyPoints"]]
qb_2020_target_names_df = qb_2019_base_df.loc[:,["PlayerID","Name"]]

In [4]:
#Create merged dataframe to prepare for model 
qb_2018_merged_df = pd.merge(qb_2018_base_df, qb_2018_extra_df, on="PlayerID")
qb_2018_merged_df = qb_2018_merged_df.drop(["FantasyPoints"], axis = 1)
qb_2018_targetmerged_df = pd.merge(qb_2018_merged_df, qb_2019_target_df, on="PlayerID").set_index("PlayerID")
qb_2019_merged_df = pd.merge(qb_2019_base_df, qb_2019_extra_df, on="PlayerID")
qb_2019_merged_df = qb_2019_merged_df.drop(["FantasyPoints"], axis = 1)
qb_2019_targetmerged_df = pd.merge(qb_2019_merged_df, qb_2020_target_df, on="PlayerID").set_index("PlayerID")
qb_2020_mergedtest_df = pd.merge(qb_2020_base_df, qb_2020_extra_df, on="PlayerID")

In [5]:
#Create target (next years Fantasy Point total)
y = qb_2018_targetmerged_df["FantasyPoints"]

In [6]:
#Remove non-numerical columns and unwanted features.
X = qb_2018_targetmerged_df.drop(columns = ["Rank","FantasyPoints","Name","Team","Position","PassingCompletions","PassingYardsPerAttempt","PassingRating","RushingYardsPerAttempt","FantasyPointsPerGame","AirYardsPerAttempt","AirYardsPerGame","DeepBallCompletionPercentage","MoneyThrows","PressuredCompletionPercentage","PassingCompletionPercentage","CompletionPercentage","ProtectionRate","TouchdownRate","RushingYards","PassAttemptsPerGame"], axis = 1)

#Scale data
scaler = StandardScaler().fit(X)
X_scaled = scaler.transform(X)

In [7]:
#Perform linear regression on features from 2018 and fantasy football point totals from 2019 
LinReg_model  = LinearRegression(fit_intercept=False).fit(X_scaled,y)

#Print coefficients and associated features

for i, col in enumerate(X.columns):
    print('{} is associated with {}'.format(col,LinReg_model.coef_[i]))
    
#print intercept, in this case to remove negative values the intercept was removed.
LinReg_model.intercept_

Played is associated with 65.15289243974225
PassingAttempts is associated with -154.44091791083784
PassingYards is associated with -39.71233759502157
PassingTouchdowns is associated with 48.588650951766034
PassingInterceptions is associated with -23.1107499331149
RushingAttempts is associated with -34.44171766795856
RushingTouchdowns is associated with 29.345005287541515
AirYards is associated with 187.34460420363484
DeepBallAttempts is associated with -66.56439793483399
Interceptions is associated with -23.110749933114835
PlayerAgeExact is associated with -14.873726276519166


0.0

In [8]:
#predict 2020 values using 2019 features
QB_proj = LinReg_model.predict(qb_2019_merged_df.drop(columns = ["PlayerID","Rank","Name","Team","Position","PassingCompletions","PassingYardsPerAttempt","PassingRating","RushingYardsPerAttempt","FantasyPointsPerGame","AirYardsPerAttempt","AirYardsPerGame","DeepBallCompletionPercentage","MoneyThrows","PressuredCompletionPercentage","PassingCompletionPercentage","CompletionPercentage","ProtectionRate","TouchdownRate","RushingYards","PassAttemptsPerGame"], axis = 1))

#values were 3 orders of magnitude larger than realistic values
QB_proj = QB_proj/1000



In [14]:
#create df for a visual representation of effectiveness

test_2019 = qb_2019_merged_df.loc[:,["Rank","PlayerID","Name","Team"]].rename(columns={'Rank': '2019 Rank','Team':'2019 Team'})
test_2019["QB2020_proj"] = QB_proj
test_2020 = qb_2020_mergedtest_df.loc[:,["Rank","PlayerID","Team","FantasyPoints"]].rename(columns={'Rank': '2020 Actual Rank','Team':'2020 Team'})

In [15]:
test_2019_2020_df = pd.merge(test_2019,test_2020,on = "PlayerID")
test_2019_2020_df.sort_values(["QB2020_proj"], ascending=False)

Unnamed: 0,2019 Rank,PlayerID,Name,2019 Team,QB2020_proj,2020 Actual Rank,2020 Team,FantasyPoints
8,11,732,Matt Ryan,ATL,255.449218,12,ATL,280.44
1,3,14536,Russell Wilson,SEA,223.184417,6,SEA,359.78
13,17,8283,Ryan Fitzpatrick,MIA,217.021759,28,MIA,152.74
2,4,18857,Deshaun Watson,HOU,191.7517,5,HOU,369.32
14,18,8244,Philip Rivers,LAC,187.167095,20,IND,239.96
10,13,17922,Jared Goff,LAR,178.912201,19,LAR,239.98
7,10,17920,Carson Wentz,PHI,165.365806,22,PHI,198.4
0,1,19781,Lamar Jackson,BAL,158.878287,10,BAL,332.78
23,29,9038,Matthew Stafford,DET,158.289412,15,DET,260.56
9,12,4314,Tom Brady,NE,157.332494,8,TB,337.92
