In [1]:
import pandas as pd

In [2]:
years = [year for year in range(2020, 2025)]

rookie_data = {}
season_data = {}

In [3]:
for year in years:
    rookie_data[year] = pd.read_csv(f'data/{year}rookiestats_TE.csv')
    season_data[year] = pd.read_csv(f'data/{year}playerstats_TE.csv')

# Rookie TE Model:
- games played
- recs per game
- rec yards per game
- rec tds per game


In [4]:
from helpers import compute_rookie_te_features

In [5]:
for year in years:
    compute_rookie_te_features(rookie_data[year])

In [6]:
from helpers import append_total_fantasy_points

merged_data = {}
# Append total fantasy points
for year in years:
    merged_data[year] = append_total_fantasy_points(rookie_data[year], season_data[year])

In [7]:
features = [
    'GamesPlayed',
    'RecsPerGame',
    'RecYdsPerGame',
    'RecTDsPerGame'
]

train_years = years[:-1]
test_year = years[-1]

df_train = pd.concat([merged_data[year] for year in train_years], ignore_index=True)
df_test = merged_data[test_year]

In [8]:
y_test = df_test['FantasyPtsPPR']
y_train = df_train['FantasyPtsPPR']
X_test = df_test[features]
X_train = df_train[features]

# print(X_train.head())
# print(y_train.head())
# print(X_test.head())
# print(y_test.head())

print(X_train.info())
print(X_test.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   GamesPlayed    53 non-null     float64
 1   RecsPerGame    53 non-null     float64
 2   RecYdsPerGame  53 non-null     float64
 3   RecTDsPerGame  53 non-null     float64
dtypes: float64(4)
memory usage: 1.8 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   GamesPlayed    12 non-null     float64
 1   RecsPerGame    12 non-null     float64
 2   RecYdsPerGame  12 non-null     float64
 3   RecTDsPerGame  12 non-null     float64
dtypes: float64(4)
memory usage: 516.0 bytes
None


In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 53
Testing samples: 12
Features: 4
Samples per feature: 13.2
Training target stats: mean=41.4, std=50.2
Testing target stats: mean=49.6, std=73.0


In [11]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 43.17917614541477
Training R^2: 0.24636357056477742
Training MAE: 32.548648511895315
Testing RMSE: 61.26792318842506
Testing R^2: 0.23078414573817674
Testing MAE: 38.99092138463458


In [12]:
from helpers import print_model_coefficients
print_model_coefficients(model, features)

Intercept: 41.37735849056603
Feature Coefficients:
GamesPlayed: -9.1611
RecsPerGame: 13.8267
RecYdsPerGame: 11.9635
RecTDsPerGame: -7.3717


In [13]:
from helpers import compute_rank_squared_error, build_results_df
results = build_results_df(y_test, preds, df_test['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("2024 Rank Squared Error:", rank_squared_error)


               Player  Actual  Predicted  ActualRank  PredictedRank  RankError
0        Brock Bowers   262.7  92.949462           1              1          0
1         Ben Sinnott    13.8  50.844292           7              4         -3
2          Tip Reiman     9.7  14.232226           9             10          1
3   Ja'Tavion Sanders    73.2  62.021318           3              3          0
4        Theo Johnson    68.1  29.354718           4              8          4
5           Erick All    35.8  43.193624           5              6          1
6           AJ Barner    78.5  23.925063           2              9          7
7         Cade Stover    34.3  29.532018           6              7          1
8         Jared Wiley     1.7   4.982863          11             12          1
9    Tanner McLachlan     0.0  90.828651          12              2        -10
10        Jaheim Bell     4.0  47.462693          10              5         -5
11         Devin Culp    13.8  11.470712           7

In [14]:
import joblib
import os

model_package = {
    'model': model,
    'scaler': scaler,
    'features': features
}

os.makedirs('models', exist_ok=True)
filepath = os.path.join('models', 'rookie_te_model.joblib')

joblib.dump(model_package, filepath)

print("Model saved as 'rookie_te_model.joblib'")

Model saved as 'rookie_te_model.joblib'
