In [1]:
import pandas as pd

In [2]:
df_22 = pd.read_csv('2022rookiestats_WR.csv')
df_23 = pd.read_csv('2023rookiestats_WR.csv')
df_24 = pd.read_csv('2024rookiestats_WR.csv')
df_22_season = pd.read_csv('2022playerstats_WR.csv')
df_23_season = pd.read_csv('2023playerstats_WR.csv')
df_24_season = pd.read_csv('2024playerstats_WR.csv')

# Rookie WR Model:
- games played
- recs per game
- rec yards per game
- rec tds per game

In [3]:
def compute_features(df):
    df['GamesPlayed'] = df['Coll_games'].fillna(0)

    df['RecsPerGame'] = df['Coll_rec'] / df['Coll_games'].replace(0, pd.NA)
    df['RecsPerGame'] = df['RecsPerGame'].fillna(0)

    df['RecYdsPerGame'] = df['Coll_rec_yds_per_g'].fillna(0)

    df['RecTDsPerGame'] = df['Coll_rec_td'] / df['Coll_games'].replace(0, pd.NA)
    df['RecTDsPerGame'] = df['RecTDsPerGame'].fillna(0)

In [4]:
compute_features(df_22)
compute_features(df_23)
compute_features(df_24)

In [5]:
from helpers import append_total_fantasy_points

df_22_merged = append_total_fantasy_points(df_22, df_22_season)
df_23_merged = append_total_fantasy_points(df_23, df_23_season)
df_24_merged = append_total_fantasy_points(df_24, df_24_season)

In [6]:
features = [
    'GamesPlayed',
    'RecsPerGame',
    'RecYdsPerGame',
    'RecTDsPerGame'
]

df_22_23_merged = pd.concat([df_22_merged, df_23_merged], ignore_index=True)

In [7]:
y_test = df_24_merged['FantasyPtsPPR']
y_train = df_22_23_merged['FantasyPtsPPR']
X_test = df_24_merged[features]
X_train = df_22_23_merged[features]

In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 56
Testing samples: 34
Features: 4
Samples per feature: 14.0
Training target stats: mean=77.2, std=75.5
Testing target stats: mean=67.8, std=84.5


In [10]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 65.76967497544113
Training R^2: 0.22647069134929754
Training MAE: 52.97045785559616
Testing RMSE: 77.92355677630422
Testing R^2: 0.12418479033012098
Testing MAE: 65.23853117815518


In [11]:
from helpers import print_model_coefficients
print_model_coefficients(model, features)

Intercept: 77.15892857142856
Feature Coefficients:
GamesPlayed: 6.0706
RecsPerGame: -2.1542
RecYdsPerGame: -5.4151
RecTDsPerGame: 41.4825


In [12]:
from helpers import compute_rank_squared_error, build_results_df
results = build_results_df(y_test, preds, df_24_merged['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("2024 Rank Squared Error:", rank_squared_error)

                 Player  Actual   Predicted  ActualRank  PredictedRank  RankError
0   Marvin Harrison Jr.   196.5  155.162469           4              1         -3
1          Malik Nabers   273.6   93.232136           2             12         10
2           Rome Odunze   144.9  103.367135           6              9          3
3          Brian Thomas   284.0  121.327668           1              3          2
4         Xavier Worthy   187.2  121.083457           5              4         -1
5        Ricky Pearsall    93.5   50.582790          11             25         14
6        Xavier Legette   125.1   47.965136           8             26         18
7          Keon Coleman   111.5  104.698746           9              8         -1
8         Ladd McConkey   240.9   64.480213           3             22         19
9          Ja'Lynn Polk    32.7   79.298773          17             16         -1
10      Adonai Mitchell    53.8   97.782618          14             10         -4
11       Malachi