In [14]:
import pandas as pd

In [28]:
years = [year for year in range(2020, 2025)]

rookie_data = {}
season_data = {}

In [36]:
for year in years:
    rookie_data[year] = pd.read_csv(f'data/{year}rookiestats_QB.csv')
    season_data[year] = pd.read_csv(f'data/{year}playerstats_QB.csv')

# Rookie QB Model:
- Games Played
- Completion Percentage
- passing yds per game
- passing tds per game
- interceptions per game

In [37]:
def compute_features(df):
    df['GamesPlayed'] = df['Coll_games'].fillna(0)

    df['CompletionPct'] = df['Coll_pass_cmp_pct'].fillna(0)
    
    df['PassAttPerGame'] = df['Coll_pass_att'] / df['Coll_games'].replace(0, pd.NA)
    df['PassAttPerGame'] = df['PassAttPerGame'].fillna(0)

    df['PassYdsPerGame'] = df['Coll_pass_yds_per_g'].fillna(0)

    df['PassTDsPerGame'] = df['Coll_pass_td'] / df['Coll_games'].replace(0, pd.NA)
    df['PassTDsPerGame'] = df['PassTDsPerGame'].fillna(0)

    df['PassIntPerGame'] = df['Coll_pass_int'] / df['Coll_games'].replace(0, pd.NA)
    df['PassIntPerGame'] = df['PassIntPerGame'].fillna(0)

    df['RushYdsPerGame'] = df['Coll_rush_yds_per_g'].fillna(0)

    df['RushTDsPerGame'] = df['Coll_rush_td'] / df['Coll_games'].replace(0, pd.NA)
    df['RushTDsPerGame'] = df['RushTDsPerGame'].fillna(0)

In [38]:
for year in years:
    compute_features(rookie_data[year])

In [40]:
from helpers import append_total_fantasy_points

merged_data = {}
# Append total fantasy points
for year in years:
    merged_data[year] = append_total_fantasy_points(rookie_data[year], season_data[year])

In [41]:
# Keep only the features we want
features = [
    'GamesPlayed',
    'CompletionPct',
    'PassYdsPerGame',
    'PassTDsPerGame',
    'PassIntPerGame'
]

train_years = years[:-1]
test_year = years[-1]

df_train = pd.concat([merged_data[year] for year in train_years], ignore_index=True)
df_test = merged_data[test_year]

In [42]:
y_test = df_test['FantasyPtsPPR']
y_train = df_train['FantasyPtsPPR']
X_test = df_test[features]
X_train = df_train[features]

# print(X_train.head())
# print(y_train.head())
# print(X_test.head())
# print(y_test.head())

print(X_train.info())
print(X_test.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   GamesPlayed     45 non-null     float64
 1   CompletionPct   45 non-null     float64
 2   PassYdsPerGame  45 non-null     float64
 3   PassTDsPerGame  45 non-null     float64
 4   PassIntPerGame  45 non-null     float64
dtypes: float64(5)
memory usage: 1.9 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   GamesPlayed     11 non-null     float64
 1   CompletionPct   11 non-null     float64
 2   PassYdsPerGame  11 non-null     float64
 3   PassTDsPerGame  11 non-null     float64
 4   PassIntPerGame  11 non-null     float64
dtypes: float64(5)
memory usage: 572.0 bytes
None


In [43]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:5])
print(X_test[:5])

[[ 0.17837314  0.6062811   0.28485805  0.41140007 -1.26014918]
 [-0.33948437  0.65515722  0.2623929   1.32455774 -0.98443035]
 [ 0.60992107  0.13707036  0.47356527  0.63968948  0.03384942]
 [ 0.09206356 -0.1366359   0.27437431 -0.19279448  1.32652879]
 [ 1.64563609  0.25437304 -0.5807989  -0.40857825 -0.85641804]]
[[ 0.00575397  0.42055185  0.96630081  1.14827247 -0.74224489]
 [ 1.5593265   0.3619005   0.35674652 -0.13649453 -0.82150559]
 [-0.59841312  0.22504737  0.88842163  0.55489627  0.09087309]
 [ 0.95515941  0.06864379  1.17298015  0.41140007  1.03176359]
 [ 0.26468273  0.48897841 -0.78298522 -0.70069552 -1.29806052]]


In [44]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 45
Testing samples: 11
Features: 5
Samples per feature: 9.0
Training target stats: mean=63.8, std=84.9
Testing target stats: mean=112.3, std=138.4


In [45]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 74.60969513598808
Training R^2: 0.21106228037664831
Training MAE: 59.82606196104598
Testing RMSE: 120.91647503892595
Testing R^2: 0.1600981378567562
Testing MAE: 92.74477439891133


In [46]:
print(preds[:5])
print(y_test.head())

[128.27631185  97.26876855  95.72876535  52.72001019  92.77699761]
0    254.5
1    355.8
2    177.1
3     44.1
4      0.0
Name: FantasyPtsPPR, dtype: float64


In [47]:
from helpers import print_model_coefficients
print_model_coefficients(model, features)

Intercept: 63.82000000000003
Feature Coefficients:
GamesPlayed: -8.5005
CompletionPct: 11.4495
PassYdsPerGame: 30.4628
PassTDsPerGame: 1.2731
PassIntPerGame: -38.7904


In [48]:
from helpers import compute_rank_squared_error, build_results_df
results = build_results_df(y_test, preds, df_24_merged['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("2024 Rank Squared Error:", rank_squared_error)

             Player  Actual   Predicted  ActualRank  PredictedRank  RankError
0    Caleb Williams   254.5  128.276312           3              1         -2
1    Jayden Daniels   355.8   97.268769           1              2          1
2        Drake Maye   177.1   95.728765           4              3         -1
3     Michael Penix    44.1   52.720010           6              8          2
4     J.J. McCarthy     0.0   92.776998           8              4         -4
5            Bo Nix   317.2   89.431398           2              5          3
6   Spencer Rattler    67.3   38.585173           5             10          5
7     Jordan Travis     0.0   62.485242           8              7         -1
8        Joe Milton    19.2   72.961600           7              6         -1
9       Devin Leary     0.0   34.408905           8             11          3
10    Michael Pratt     0.0   45.530180           8              9          1
2024 Rank Squared Error: 72
