In [1]:
import pandas as pd

# Train with 2022-2023 data, Test with 2023-2024 data
df_22 = pd.read_csv('data/2022playerstats_RB_FB.csv')
df_23 = pd.read_csv('data/2023playerstats_RB_FB.csv')
df_24 = pd.read_csv('data/2024playerstats_RB_FB.csv')
print(df_22.head())
print(df_23.head())
print(df_24.head())

           Player FantPos  Age  GamesPlayed  GamesStarted  PassCmp  PassAtt  \
0   Kyle Juszczyk      FB   31           16            12        0        0   
1     Alec Ingold      FB   26           17            14        0        0   
2        C.J. Ham      FB   29           17             4        0        0   
3      Derek Watt      FB   30           17             3        0        0   
4  Zander Horvath      FB   24           15             2        0        0   

   PassYds  PassTD  PassInt  ...  Team_NWE  Team_NYG  Team_NYJ  Team_PHI  \
0        0       0        0  ...     False     False     False     False   
1        0       0        0  ...     False     False     False     False   
2        0       0        0  ...     False     False     False     False   
3        0       0        0  ...     False     False     False     False   
4        0       0        0  ...     False     False     False     False   

   Team_PIT  Team_SEA  Team_SFO  Team_TAM  Team_TEN  Team_WAS  
0   

# RB Model:
- rush attempts per game
- rush yards per game
- rush tds per game
- targets per game
- recs per game
- rec yards per game
- rec tds per game

In [2]:
from helpers import compute_rb_fb_features

In [3]:
# Compute the features needed
compute_rb_fb_features(df_22)
compute_rb_fb_features(df_23)

print(df_22.head())
print(df_23.head())

           Player FantPos  Age  GamesPlayed  GamesStarted  PassCmp  PassAtt  \
0   Kyle Juszczyk      FB   31           16            12        0        0   
1     Alec Ingold      FB   26           17            14        0        0   
2        C.J. Ham      FB   29           17             4        0        0   
3      Derek Watt      FB   30           17             3        0        0   
4  Zander Horvath      FB   24           15             2        0        0   

   PassYds  PassTD  PassInt  ...  Team_TAM  Team_TEN  Team_WAS  \
0        0       0        0  ...     False     False     False   
1        0       0        0  ...     False     False     False   
2        0       0        0  ...     False     False     False   
3        0       0        0  ...     False     False     False   
4        0       0        0  ...     False     False     False   

   RushAttPerGame  RushYdsPerGame  RushTDsPerGame  TargetsPerGame  \
0        0.437500        1.625000        0.062500        1.

In [4]:
# Keep only the features we want
features = [
    'RushAttPerGame',
    'RushYdsPerGame',
    'RushTDsPerGame',
    'TargetsPerGame',
    'RecsPerGame',
    'RecYdsPerGame',
    'RecTDsPerGame'
]

# Merge the data frames to remove players that are not in both
df_22_23_merged = df_22.merge(df_23, on='Player', how='inner', suffixes=('', '_23'))
df_23_24_merged = df_23.merge(df_24, on='Player', how='inner', suffixes=('', '_24'))

y_test = df_23_24_merged['FantasyPtsPPR_24']
y_train = df_22_23_merged['FantasyPtsPPR_23']

X_test = df_23_24_merged[features]
X_train = df_22_23_merged[features]

print(y_test.head())
print(X_test.head())
print(y_train.head())
print(X_train.head())



0    57.6
1    26.3
2    17.2
3    29.3
4     0.0
Name: FantasyPtsPPR_24, dtype: float64
   RushAttPerGame  RushYdsPerGame  RushTDsPerGame  TargetsPerGame  \
0        0.294118        0.352941             0.0        1.000000   
1        0.117647        0.000000             0.0        0.941176   
2        0.000000        0.000000             0.0        0.411765   
3        0.411765        0.529412             0.0        0.176471   
4        0.500000        1.625000             0.0        0.375000   

   RecsPerGame  RecYdsPerGame  RecTDsPerGame  
0     0.823529       7.000000       0.117647  
1     0.764706       7.000000       0.000000  
2     0.294118       3.058824       0.058824  
3     0.176471       0.470588       0.000000  
4     0.187500       0.125000       0.000000  
0    38.5
1    24.9
2    10.2
3     1.3
4     1.2
Name: FantasyPtsPPR_23, dtype: float64
   RushAttPerGame  RushYdsPerGame  RushTDsPerGame  TargetsPerGame  \
0        0.437500        1.625000        0.062500       

In [5]:
# Normalize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:20])
print(X_test[:20])

[[-1.0366297  -0.99807488 -0.55448794 -0.14792951 -0.07924301  0.33177492
   0.22308562]
 [-1.05126177 -1.0411556  -0.5710241  -0.2014091  -0.32634382 -0.34301836
   0.17884378]
 [-1.07161943 -1.04335079 -0.30644553 -0.61064424 -0.56451327 -0.46228415
  -0.5290256 ]
 [-1.11233476 -1.05871716 -0.83560268 -0.63544636 -0.60897157 -0.51124589
   0.27322637]
 [-1.11233476 -1.05871716 -0.83560268 -0.67760998 -0.60897157 -0.2907088
  -0.5290256 ]
 [-1.04108293 -1.02359403 -0.83560268 -0.57344104 -0.51687938 -0.53760991
  -0.5290256 ]
 [-1.0614406  -1.04335079 -0.83560268 -0.94547298 -0.94558439 -0.93307016
  -0.5290256 ]
 [-1.04941107 -1.02818398 -0.83560268 -0.88459503 -0.82000413 -0.91480937
  -0.5290256 ]
 [-1.09070474 -1.040058   -0.83560268 -0.74085541 -0.93963015 -0.93542409
  -0.5290256 ]
 [-1.11233476 -1.05871716 -0.83560268 -0.8338634  -0.80268272 -0.9393473
  -0.5290256 ]
 [-1.10215593 -1.05432677 -0.83560268 -0.98267617 -0.99321828 -0.9519016
  -0.5290256 ]
 [-1.10151975 -1.0517199

In [6]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 127
Testing samples: 114
Features: 7
Samples per feature: 18.1
Training target stats: mean=79.2, std=89.1
Testing target stats: mean=95.6, std=96.5


In [7]:
# Train and evaluate
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 63.56993819459673
Training R^2: 0.4868930615120072
Training MAE: 45.86003365603715
Testing RMSE: 68.95834810705766
Testing R^2: 0.4852090281226462
Testing MAE: 49.51246320371384


In [8]:
print(preds[:5])
print(y_test.head())

[29.62625835 38.20544082 20.38470299  7.838018   13.5001959 ]
0    57.6
1    26.3
2    17.2
3    29.3
4     0.0
Name: FantasyPtsPPR_24, dtype: float64


In [9]:
# Model coefficients
print("Intercept:", model.intercept_)
print("Feature Coefficients:")
for feat, coef in zip(features, model.coef_):
    print(f"{feat}: {coef:.4f}")

Intercept: 79.23543307086612
Feature Coefficients:
RushAttPerGame: -10.5874
RushYdsPerGame: 53.6804
RushTDsPerGame: -11.8215
TargetsPerGame: 50.6101
RecsPerGame: -69.8880
RecYdsPerGame: 56.9811
RecTDsPerGame: -5.3094


In [10]:
from helpers import compute_rank_squared_error, build_results_df

results = build_results_df(y_test, preds, df_23_24_merged['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("Testing Rank Squared Error:", rank_squared_error)

                    Player  Actual   Predicted  ActualRank  PredictedRank  RankError
0            Kyle Juszczyk    57.6   29.626258          57             86         29
1              Alec Ingold    26.3   38.205441          82             76         -6
2           Patrick Ricard    17.2   20.384703          89             95          6
3           Michael Burton    29.3    7.838018          76            114         38
4         Khari Blasingame     0.0   13.500196         108            101         -7
5                 C.J. Ham    13.5   12.310481          90            103         13
6            Jakob Johnson     0.0   15.192797         108             99         -9
7           Reggie Gilliam     0.7    8.635808         103            113         10
8            Adam Prentice     0.0   14.868949         108            100         -8
9      Christian McCaffrey    47.8  229.456562          63              1        -62
10          Raheem Mostert    70.9   86.953729          51       

In [11]:
import joblib
import os

model_package = {
    'model': model,
    'scaler': scaler,
    'features': features
}

os.makedirs('models', exist_ok=True)
filepath = os.path.join('models', 'rb_fb_model.joblib')

joblib.dump(model_package, filepath)

print("Model saved as 'rb_fb_model.joblib'")

Model saved as 'rb_fb_model.joblib'
