In [14]:
import pandas as pd

In [15]:
years = [year for year in range(2020, 2025)]

rookie_data = {}
season_data = {}

In [16]:
for year in years:
    rookie_data[year] = pd.read_csv(f'data/{year}rookiestats_WR.csv')
    season_data[year] = pd.read_csv(f'data/{year}playerstats_WR.csv')

# Rookie WR Model:
- games played
- recs per game
- rec yards per game
- rec tds per game

In [17]:
def compute_features(df):
    df['GamesPlayed'] = df['Coll_games'].fillna(0)

    df['RecsPerGame'] = df['Coll_rec'] / df['Coll_games'].replace(0, pd.NA)
    df['RecsPerGame'] = df['RecsPerGame'].fillna(0)

    df['RecYdsPerGame'] = df['Coll_rec_yds_per_g'].fillna(0)

    df['RecTDsPerGame'] = df['Coll_rec_td'] / df['Coll_games'].replace(0, pd.NA)
    df['RecTDsPerGame'] = df['RecTDsPerGame'].fillna(0)

In [18]:
for year in years:
    compute_features(rookie_data[year])

In [19]:
from helpers import append_total_fantasy_points

merged_data = {}
# Append total fantasy points
for year in years:
    merged_data[year] = append_total_fantasy_points(rookie_data[year], season_data[year])

In [20]:
features = [
    'GamesPlayed',
    'RecsPerGame',
    'RecYdsPerGame',
    'RecTDsPerGame'
]

train_years = years[:-1]
test_year = years[-1]

df_train = pd.concat([merged_data[year] for year in train_years], ignore_index=True)
df_test = merged_data[test_year]

In [21]:
y_test = df_test['FantasyPtsPPR']
y_train = df_train['FantasyPtsPPR']
X_test = df_test[features]
X_train = df_train[features]

In [22]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [23]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 124
Testing samples: 34
Features: 4
Samples per feature: 31.0
Training target stats: mean=71.0, std=76.8
Testing target stats: mean=67.8, std=84.5


In [24]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 69.10185912148366
Training R^2: 0.18392930813930541
Training MAE: 54.4888707854187
Testing RMSE: 76.46479799175049
Testing R^2: 0.15666904986598496
Testing MAE: 62.110760125143344


In [25]:
from helpers import print_model_coefficients
print_model_coefficients(model, features)

Intercept: 70.97016129032258
Feature Coefficients:
GamesPlayed: 0.3553
RecsPerGame: -16.8155
RecYdsPerGame: 24.0029
RecTDsPerGame: 23.6798


In [26]:
from helpers import compute_rank_squared_error, build_results_df
results = build_results_df(y_test, preds, df_test['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("2024 Rank Squared Error:", rank_squared_error)

                 Player  Actual   Predicted  ActualRank  PredictedRank  RankError
0   Marvin Harrison Jr.   196.5  130.598237           4              1         -3
1          Malik Nabers   273.6  100.158932           2              5          3
2           Rome Odunze   144.9  105.228020           6              3         -3
3          Brian Thomas   284.0   91.925990           1              8          7
4         Xavier Worthy   187.2  102.562560           5              4         -1
5        Ricky Pearsall    93.5   43.237703          11             25         14
6        Xavier Legette   125.1   32.844319           8             30         22
7          Keon Coleman   111.5   74.919101           9             10          1
8         Ladd McConkey   240.9   52.851276           3             22         19
9          Ja'Lynn Polk    32.7   72.075302          17             14         -3
10      Adonai Mitchell    53.8   72.719264          14             13         -1
11       Malachi

In [None]:
import joblib
import os

model_package = {
    'model': model,
    'scaler': scaler,
    'features': features
}

os.makedirs('models', exist_ok=True)
filepath = os.path.join('models', 'rookie_wr_model.joblib')

joblib.dump(model_package, filepath)

print("Model saved as 'rookie_wr_model.joblib'")

Model saved as 'rookie_wr_model.joblib'
