In [12]:
import pandas as pd

# Train with 2022-2023 data, Test with 2023-2024 data
df_22 = pd.read_csv('data/2022playerstats_WR.csv')
df_23 = pd.read_csv('data/2023playerstats_WR.csv')
df_24 = pd.read_csv('data/2024playerstats_WR.csv')
print(df_22.head())
print(df_23.head())
print(df_24.head())

   Rk            Player Team FantPos  Age  GamesPlayed  GamesStarted  PassCmp  \
0   5  Justin Jefferson  MIN      WR   23           17            17        2   
1   8     Davante Adams  LVR      WR   30           17            17        0   
2  11       Tyreek Hill  MIA      WR   28           17            17        0   
3  13        A.J. Brown  PHI      WR   25           17            16        0   
4  14      Stefon Diggs  BUF      WR   29           16            16        0   

   PassAtt  PassYds  ...  Receiving_YAC/R  Receiving_ADOT  Receiving_BrkTkl  \
0        2       34  ...              4.9            10.1               6.0   
1        0        0  ...              4.9            11.8               9.0   
2        0        0  ...              4.1            12.2               5.0   
3        0        0  ...              6.2            12.1               7.0   
4        0        0  ...              3.9            11.2               2.0   

   Receiving_Rec/Br  Receiving_Drop  R

# WR Model:
- targets per game
- recs per game
- rec yards per game
- rec tds per game

In [13]:
from helpers import compute_wr_features

In [14]:
# Compute the features needed
compute_wr_features(df_22)
compute_wr_features(df_23)

print(df_22.head())
print(df_23.head())

   Rk            Player Team FantPos  Age  GamesPlayed  GamesStarted  PassCmp  \
0   5  Justin Jefferson  MIN      WR   23           17            17        2   
1   8     Davante Adams  LVR      WR   30           17            17        0   
2  11       Tyreek Hill  MIA      WR   28           17            17        0   
3  13        A.J. Brown  PHI      WR   25           17            16        0   
4  14      Stefon Diggs  BUF      WR   29           16            16        0   

   PassAtt  PassYds  ...  Receiving_Drop  Receiving_Drop%  Receiving_Int  \
0        2       34  ...             5.0              2.7            5.0   
1        0        0  ...             7.0              3.9            6.0   
2        0        0  ...             8.0              4.7            6.0   
3        0        0  ...             5.0              3.4            3.0   
4        0        0  ...             8.0              5.2            1.0   

   Receiving_Rat                       Awards  Year  Tar

In [15]:
# Keep only the features we want
features = [
    'TargetsPerGame',
    'RecsPerGame',
    'RecYdsPerGame',
    'RecTDsPerGame'
]

# Merge the data frames to remove players that are not in both
df_22_23_merged = df_22.merge(df_23, on='Player', how='inner', suffixes=('', '_23'))
df_23_24_merged = df_23.merge(df_24, on='Player', how='inner', suffixes=('', '_24'))

y_test = df_23_24_merged['FantasyPtsPPR_24']
y_train = df_22_23_merged['FantasyPtsPPR_23']

X_test = df_23_24_merged[features]
X_train = df_22_23_merged[features]

print(y_test.head())
print(X_test.head())
print(y_train.head())
print(X_train.head())

0    263.4
1    218.2
2    316.2
3    240.4
4    206.6
Name: FantasyPtsPPR_24, dtype: float64
   TargetsPerGame  RecsPerGame  RecYdsPerGame  RecTDsPerGame
0       10.647059     7.941176     102.882353       0.705882
1       10.687500     7.437500     112.437500       0.812500
2       10.250000     7.437500      94.687500       0.625000
3        8.000000     4.647059      73.823529       0.764706
4        9.411765     6.176471      87.411765       0.352941
0    202.2
1    265.4
2    376.4
3    289.6
4    273.8
Name: FantasyPtsPPR_23, dtype: float64
   TargetsPerGame  RecsPerGame  RecYdsPerGame  RecTDsPerGame
0       10.823529     7.529412     106.411765       0.470588
1       10.588235     5.882353      89.176471       0.823529
2       10.000000     7.000000     100.588235       0.411765
3        8.529412     5.176471      88.000000       0.647059
4        9.625000     6.750000      89.312500       0.687500


In [16]:
# Normalize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:5])
print(X_test[:5])

[[2.3611031  2.54970558 2.99189454 1.47066387]
 [2.28184403 1.72497364 2.31620055 3.24644823]
 [2.08369635 2.28461318 2.76358838 1.17469981]
 [1.58832716 1.3715171  2.27007809 2.35855605]
 [1.95737721 2.15943065 2.32153346 2.56203134]]
[[2.3016588  2.75588857 2.85352717 2.65452011]
 [2.31528145 2.5036826  3.22812799 3.19095497]
 [2.16790912 2.5036826  2.53225543 2.24756953]
 [1.40999425 1.10642469 1.71430249 2.95048417]
 [1.88554868 1.87224721 2.24701686 0.87873575]]


In [17]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 181
Testing samples: 184
Features: 4
Samples per feature: 45.2
Training target stats: mean=81.1, std=89.3
Testing target stats: mean=84.1, std=85.4


In [18]:
# Train and evaluate
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 49.01344761079798
Training R^2: 0.6969570189674076
Training MAE: 36.58444884666653
Testing RMSE: 60.109931504491705
Testing R^2: 0.5019045091869618
Testing MAE: 45.050065182237184


In [19]:
print(preds[:5])
print(y_test.head())

[276.15623519 306.77244593 257.17538846 208.81184231 247.68235331]
0    263.4
1    218.2
2    316.2
3    240.4
4    206.6
Name: FantasyPtsPPR_24, dtype: float64


In [20]:
# Model coefficients
print("Intercept:", model.intercept_)
print("Feature Coefficients:")
for feat, coef in zip(features, model.coef_):
    print(f"{feat}: {coef:.4f}")

Intercept: 81.10220994475138
Feature Coefficients:
TargetsPerGame: 32.6678
RecsPerGame: -24.3558
RecYdsPerGame: 68.0933
RecTDsPerGame: -2.7576


In [21]:
from helpers import compute_rank_squared_error, build_results_df

# rank_squared_error = compute_rank_squared_error(y_test, preds, df_23_24_merged['Player'], True)
results = build_results_df(y_test, preds, df_23_24_merged['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("Testing Rank Squared Error:", rank_squared_error)

                       Player  Actual   Predicted  ActualRank  PredictedRank  RankError
0                 CeeDee Lamb   263.4  276.156235           6              3         -3
1                 Tyreek Hill   218.2  306.772446          15              1        -14
2           Amon-Ra St. Brown   316.2  257.175388           3              5          2
3                  Mike Evans   240.4  208.811842          11             14          3
4                  Puka Nacua   206.6  247.682353          23              6        -17
5                  D.J. Moore   238.1  217.813404          13             12         -1
6                  A.J. Brown   216.9  240.143239          17              8         -9
7                Deebo Samuel   153.6  159.770274          39             35         -4
8                Nico Collins   210.6  229.334061          20              9        -11
9               Brandon Aiyuk    62.4  223.872790          91             11        -80
10               Keenan Allen   

In [22]:
import joblib
import os

model_package = {
    'model': model,
    'scaler': scaler,
    'features': features
}

os.makedirs('models', exist_ok=True)
filepath = os.path.join('models', 'wr_model.joblib')

joblib.dump(model_package, filepath)

print("Model saved as 'wr_model.joblib'")

Model saved as 'wr_model.joblib'
