In [1]:
import pandas as pd

In [2]:
years = [year for year in range(2020, 2025)]

rookie_data = {}
season_data = {}

In [3]:
for year in years:
    rookie_data[year] = pd.read_csv(f'data/{year}rookiestats_RB_FB.csv')
    season_data[year] = pd.read_csv(f'data/{year}playerstats_RB_FB.csv')

# Rookie RB/FB Model:
- Games Played
- rush attempts per game
- rush yards per game
- rush tds per game

In [4]:
def compute_features(df):
    df['GamesPlayed'] = df['Coll_games'].fillna(0)

    df['RushAttPerGame'] = df['Coll_rush_att'] / df['Coll_games'].replace(0, pd.NA)
    df['RushAttPerGame'] = df['RushAttPerGame'].fillna(0)

    df['RushYdsPerGame'] = df['Coll_rush_yds_per_g'].fillna(0)

    df['RushTDsPerGame'] = df['Coll_rush_td'] / df['Coll_games'].replace(0, pd.NA)
    df['RushTDsPerGame'] = df['RushTDsPerGame'].fillna(0)

    df['RecsPerGame'] = df['Coll_rec'] / df['Coll_games'].replace(0, pd.NA)
    df['RecsPerGame'] = df['RecsPerGame'].fillna(0)

    df['RecYdsPerGame'] = df['Coll_rec_yds_per_g'].fillna(0)

    df['RecTDsPerGame'] = df['Coll_rec_td'] / df['Coll_games'].replace(0, pd.NA)
    df['RecTDsPerGame'] = df['RecTDsPerGame'].fillna(0)

In [5]:
for year in years:
    compute_features(rookie_data[year])

In [6]:
from helpers import append_total_fantasy_points

merged_data = {}
# Append total fantasy points
for year in years:
    merged_data[year] = append_total_fantasy_points(rookie_data[year], season_data[year])

In [7]:
# Keep only the features we want
features = [
    'GamesPlayed',
    'RushAttPerGame',
    'RushYdsPerGame',
    'RushTDsPerGame',
]

train_years = years[:-1]
test_year = years[-1]

df_train = pd.concat([merged_data[year] for year in train_years], ignore_index=True)
df_test = merged_data[test_year]

In [8]:
y_test = df_test['FantasyPtsPPR']
y_train = df_train['FantasyPtsPPR']
X_test = df_test[features]
X_train = df_train[features]

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:5])
print(X_test[:5])

[[ 0.20260135 -0.5961655  -0.61520901 -0.33039307]
 [ 0.41330676 -0.33078189 -0.03082209 -0.64545813]
 [ 0.20260135  2.38260979  3.05755177  1.83404798]
 [-0.32416217  0.9970983   0.43890663  0.29088167]
 [ 0.30795406  1.21326956  1.41535058  0.79954441]]
[[-1.69374731 -0.30546237 -0.13438433  0.11225429]
 [-0.32416217 -0.65033587 -0.54123598  0.01698636]
 [ 0.62401217  0.71154304  0.55726347  2.0620714 ]
 [-0.74557298 -0.58093008 -0.63740091 -0.22267205]
 [-0.53486757 -0.20097039 -0.01232884 -0.43413534]]


In [10]:
from helpers import print_diagnostics
print_diagnostics(X_train, X_test, features, y_train, y_test)

Training samples: 78
Testing samples: 17
Features: 4
Samples per feature: 19.5
Training target stats: mean=70.6, std=78.3
Testing target stats: mean=55.5, std=69.9


In [11]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from helpers import evaluate_model
evaluate_model(y_train, train_preds, y_test, preds)

Training RMSE: 73.17624447800044
Training R^2: 0.11450773797349012
Training MAE: 59.94638327505876
Testing RMSE: 70.20158229441802
Testing R^2: -0.07311467895351687
Testing MAE: 59.1455443426862


In [12]:
print(preds[:5])
print(y_test.head())

[74.41817497 71.3903004  90.21659866 57.72691377 74.48331361]
0     7.5
1    47.0
2    33.5
3     2.8
4    26.7
Name: FantasyPtsPPR, dtype: float64


In [13]:
from helpers import print_model_coefficients
print_model_coefficients(model, features)

Intercept: 70.62564102564103
Feature Coefficients:
GamesPlayed: 3.3199
RushAttPerGame: -54.3891
RushYdsPerGame: 62.2788
RushTDsPerGame: 10.4331


In [14]:
from helpers import compute_rank_squared_error, build_results_df
results = build_results_df(y_test, preds, df_test['Player'])
rank_squared_error = compute_rank_squared_error(results)

print(results.to_string())
print("2024 Rank Squared Error:", rank_squared_error)

              Player  Actual  Predicted  ActualRank  PredictedRank  RankError
0    Jonathon Brooks     7.5  74.418175          12              7         -5
1        Trey Benson    47.0  71.390300           7              8          1
2        Blake Corum    33.5  90.216599           8              2         -6
3     MarShawn Lloyd     2.8  57.726914          15             11         -4
4      Jaylen Wright    26.7  74.483314          10              6         -4
5       Bucky Irving   244.4  77.200360           1              5          4
6       Will Shipley    15.7  61.347132          11             10         -1
7          Ray Davis   116.1  43.272026           3             17         14
8     Isaac Guerendo    94.2  57.596570           4             12          8
9      Braelon Allen    85.2  90.668945           5              1         -4
10     Audric Estime    48.7  81.838230           6              3         -3
11       Rasheen Ali     3.1  80.525738          14             