In [53]:
import pandas as pd

In [54]:
# Train with 2022-23 data, test with 2024 data
df_22 = pd.read_csv('data/2022rookiestats_RB_FB.csv')
df_23 = pd.read_csv('data/2023rookiestats_RB_FB.csv')
df_24 = pd.read_csv('data/2024rookiestats_RB_FB.csv')
df_22_season = pd.read_csv('data/2022playerstats_RB_FB.csv')
df_23_season = pd.read_csv('data/2023playerstats_RB_FB.csv')
df_24_season = pd.read_csv('data/2024playerstats_RB_FB.csv')
# print(df_22.head())
# print(df_23.head())
# print(df_24.head())

# Rookie RB/FB Model:
- Games Played
- rush attempts per game
- rush yards per game
- rush tds per game

In [55]:
def compute_features(df):
    df['GamesPlayed'] = df['Coll_games'].fillna(0)

    df['RushAttPerGame'] = df['Coll_rush_att'] / df['Coll_games'].replace(0, pd.NA)
    df['RushAttPerGame'] = df['RushAttPerGame'].fillna(0)

    df['RushYdsPerGame'] = df['Coll_rush_yds_per_g'].fillna(0)

    df['RushTDsPerGame'] = df['Coll_rush_td'] / df['Coll_games'].replace(0, pd.NA)
    df['RushTDsPerGame'] = df['RushTDsPerGame'].fillna(0)

    df['RecsPerGame'] = df['Coll_rec'] / df['Coll_games'].replace(0, pd.NA)
    df['RecsPerGame'] = df['RecsPerGame'].fillna(0)

    df['RecYdsPerGame'] = df['Coll_rec_yds_per_g'].fillna(0)

    df['RecTDsPerGame'] = df['Coll_rec_td'] / df['Coll_games'].replace(0, pd.NA)
    df['RecTDsPerGame'] = df['RecTDsPerGame'].fillna(0)

In [56]:
compute_features(df_22)
compute_features(df_23)
compute_features(df_24)

In [57]:
from helpers import append_total_fantasy_points

df_22_merged = append_total_fantasy_points(df_22, df_22_season)
df_23_merged = append_total_fantasy_points(df_23, df_23_season)
df_24_merged = append_total_fantasy_points(df_24, df_24_season)

print(df_22_merged.head())

   Unnamed: 0  Pick   Tm              Player Pos  Age      To  AP1  PB  St  \
0          37    36  NYJ         Breece Hall  RB   21  2024.0    0   0   2   
1          42    41  SEA  Kenneth Walker III  RB   21  2024.0    0   0   3   
2          64    63  BUF          James Cook  RB   22  2024.0    0   2   2   
3          92    91  TAM       Rachaad White  RB   23  2024.0    0   0   2   
4          94    93  SFO  Tyrion Davis-Price  RB   21  2024.0    0   0   0   

   ...  Coll_pass_yds_per_g  Coll_pass_rating  GamesPlayed  RushAttPerGame  \
0  ...                  NaN               NaN         36.0       19.944444   
1  ...                  NaN               NaN         33.0       14.545455   
2  ...                  NaN               NaN         50.0        4.600000   
3  ...                  NaN               NaN         15.0       14.933333   
4  ...                  NaN               NaN         35.0       10.828571   

   RushYdsPerGame  RushTDsPerGame  RecsPerGame  RecYdsPerGame 

In [58]:
# Keep only the features we want
features = [
    'GamesPlayed',
    'RushAttPerGame',
    'RushYdsPerGame',
    'RushTDsPerGame',
]

df_22_23_merged = pd.concat([df_22_merged, df_23_merged], ignore_index=True)

print(df_22_23_merged)

    Unnamed: 0  Pick   Tm              Player Pos  Age      To  AP1  PB  St  \
0           37    36  NYJ         Breece Hall  RB   21  2024.0    0   0   2   
1           42    41  SEA  Kenneth Walker III  RB   21  2024.0    0   0   3   
2           64    63  BUF          James Cook  RB   22  2024.0    0   2   2   
3           92    91  TAM       Rachaad White  RB   23  2024.0    0   0   2   
4           94    93  SFO  Tyrion Davis-Price  RB   21  2024.0    0   0   0   
5           99    98  WAS  Brian Robinson Jr.  RB   23  2024.0    0   0   3   
6          108   107  HOU       Dameon Pierce  RB   22  2024.0    0   0   1   
7          123   122  LVR         Zamir White  RB   22  2024.0    0   0   0   
8          124   123  LAC      Isaiah Spiller  RB   21  2023.0    0   0   0   
9          132   131  TEN      Hassan Haskins  RB   22  2024.0    0   0   0   
10         152   151  ATL      Tyler Allgeier  RB   22  2024.0    0   0   0   
11         155   154  JAX        Snoop Conner  RB   

In [59]:
y_test = df_24_merged['FantasyPtsPPR']
y_train = df_22_23_merged['FantasyPtsPPR']
X_test = df_24_merged[features]
X_train = df_22_23_merged[features]

print(X_train.head())
print(y_train.head())

   GamesPlayed  RushAttPerGame  RushYdsPerGame  RushTDsPerGame
0         36.0       19.944444           109.3        1.388889
1         33.0       14.545455            84.7        1.060606
2         50.0        4.600000            30.1        0.280000
3         15.0       14.933333            95.1        1.333333
4         35.0       10.828571            49.8        0.428571
0    115.1
1    202.5
2    105.7
3    139.1
4      9.9
Name: FantasyPtsPPR, dtype: float64


In [60]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:5])
print(X_test[:5])

[[-0.29129798  2.24247954  1.85749886  2.36747579]
 [-0.59526109  0.7161978   0.75035798  1.25980127]
 [ 1.12719653 -2.09535881 -1.7069547  -1.37407831]
 [-2.41903973  0.82585022  1.21841754  2.18002318]
 [-0.39261901 -0.33455631 -0.82034188 -0.87277648]]
[[-1.60847145 -0.47046189 -0.16775885  0.02839322]
 [-0.29129798 -0.91431086 -0.66282184 -0.06940815]
 [ 0.62059135  0.83841456  0.67384824  2.03006109]
 [-0.69658212 -0.82498625 -0.77983673 -0.3154397 ]
 [-0.49394005 -0.33598168 -0.01923995 -0.53252636]]


In [61]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 40
Testing samples: 17
Features: 4
Samples per feature: 10.0
Training target stats: mean=63.5, std=74.9
Testing target stats: mean=55.5, std=69.9


In [62]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 70.45130843643301
Training R^2: 0.0916484442727089
Training MAE: 58.24357432939858
Testing RMSE: 69.06023051689476
Testing R^2: -0.03850449580994342
Testing MAE: 56.02697957959382


In [63]:
print(preds[:5])
print(y_test.head())

[60.70220795 65.14183815 95.74396486 50.4090682  62.98741594]
0     7.5
1    47.0
2    33.5
3     2.8
4    26.7
Name: FantasyPtsPPR, dtype: float64


In [64]:
from helpers import print_model_coefficients
print_model_coefficients(model, features)

Intercept: 63.5275
Feature Coefficients:
GamesPlayed: 8.0228
RushAttPerGame: -35.2277
RushYdsPerGame: 41.1337
RushTDsPerGame: 14.3125


In [65]:
from helpers import compute_rank_squared_error, build_results_df
results = build_results_df(y_test, preds, df_24_merged['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("2024 Rank Squared Error:", rank_squared_error)

              Player  Actual  Predicted  ActualRank  PredictedRank  RankError
0    Jonathon Brooks     7.5  60.702208          12              8         -4
1        Trey Benson    47.0  65.141838           7              6         -1
2        Blake Corum    33.5  95.743965           8              1         -7
3     MarShawn Lloyd     2.8  50.409068          15             15          0
4      Jaylen Wright    26.7  62.987416          10              7         -3
5       Bucky Irving   244.4  67.001509           1              5          4
6       Will Shipley    15.7  59.017875          11              9         -2
7          Ray Davis   116.1  43.719739           3             16         13
8     Isaac Guerendo    94.2  52.612594           4             13          9
9      Braelon Allen    85.2  84.057153           5              2         -3
10     Audric Estime    48.7  76.033012           6              4         -2
11       Rasheen Ali     3.1  79.115470          14             