In [29]:
import pandas as pd

# Train with 2023 data, Test with 2024 data
df_train = pd.read_csv('2023playerstats_RB_FB.csv')
df_test = pd.read_csv('2024playerstats_RB_FB.csv')
print(df_train.head())
print(df_train.info())
print(df_test.head())
print(df_test.info())

           Player FantPos  Age  GamesPlayed  GamesStarted  PassCmp  PassAtt  \
0     Andrew Beck      FB   27           15            12        0        0   
1   Kyle Juszczyk      FB   32           17            16        0        0   
2     Alec Ingold      FB   27           17            13        0        0   
3  Patrick Ricard      FB   29           17             9        0        0   
4     Nick Bawden      FB   27           16             2        0        0   

   PassYds  PassTD  PassInt  ...  Team_NWE  Team_NYG  Team_NYJ  Team_PHI  \
0        0       0        0  ...     False     False     False     False   
1        0       0        0  ...     False     False     False     False   
2        0       0        0  ...     False     False     False     False   
3        0       0        0  ...     False     False     False     False   
4        0       0        0  ...     False     False      True     False   

   Team_PIT  Team_SEA  Team_SFO  Team_TAM  Team_TEN  Team_WAS  
0   

# RB Model:
- rush attempts per game
- rush yards per game
- rush tds per game
- targets per game
- recs per game
- rec yards per game
- rec tds per game

In [31]:
# Compute the features needed
df_train['RushAttPerGame'] = df_train['RushAtt'] / df_train['GamesPlayed'].replace(0, pd.NA)
df_train['RushAttPerGame'] = df_train['RushAttPerGame'].fillna(0)
df_test['RushAttPerGame'] = df_test['RushAtt'] / df_test['GamesPlayed'].replace(0, pd.NA)
df_test['RushAttPerGame'] = df_test['RushAttPerGame'].fillna(0)

df_train['RushYdsPerGame'] = df_train['RushYds'] / df_train['GamesPlayed'].replace(0, pd.NA)
df_train['RushYdsPerGame'] = df_train['RushYdsPerGame'].fillna(0)
df_test['RushYdsPerGame'] = df_test['RushYds'] / df_test['GamesPlayed'].replace(0, pd.NA)
df_test['RushYdsPerGame'] = df_test['RushYdsPerGame'].fillna(0)

df_train['RushTDsPerGame'] = df_train['RushTD'] / df_train['GamesPlayed'].replace(0, pd.NA)
df_train['RushTDsPerGame'] = df_train['RushTDsPerGame'].fillna(0)
df_test['RushTDsPerGame'] = df_test['RushTD'] / df_test['GamesPlayed'].replace(0, pd.NA)
df_test['RushTDsPerGame'] = df_test['RushTDsPerGame'].fillna(0)

df_train['TargetsPerGame'] = df_train['Targets'] / df_train['GamesPlayed'].replace(0, pd.NA)
df_train['TargetsPerGame'] = df_train['TargetsPerGame'].fillna(0)
df_test['TargetsPerGame'] = df_test['Targets'] / df_test['GamesPlayed'].replace(0, pd.NA)
df_test['TargetsPerGame'] = df_test['TargetsPerGame'].fillna(0)

df_train['RecsPerGame'] = df_train['Receptions'] / df_train['GamesPlayed'].replace(0, pd.NA)
df_train['RecsPerGame'] = df_train['RecsPerGame'].fillna(0)
df_test['RecsPerGame'] = df_test['Receptions'] / df_test['GamesPlayed'].replace(0, pd.NA)
df_test['RecsPerGame'] = df_test['RecsPerGame'].fillna(0)

df_train['RecYdsPerGame'] = df_train['RecYds'] / df_train['GamesPlayed'].replace(0, pd.NA)
df_train['RecYdsPerGame'] = df_train['RecYdsPerGame'].fillna(0)
df_test['RecYdsPerGame'] = df_test['RecYds'] / df_test['GamesPlayed'].replace(0, pd.NA)
df_test['RecYdsPerGame'] = df_test['RecYdsPerGame'].fillna(0)

df_train['RecTDsPerGame'] = df_train['RecTD'] / df_train['GamesPlayed'].replace(0, pd.NA)
df_train['RecTDsPerGame'] = df_train['RecTDsPerGame'].fillna(0)
df_test['RecTDsPerGame'] = df_test['RecTD'] / df_test['GamesPlayed'].replace(0, pd.NA)
df_test['RecTDsPerGame'] = df_test['RecTDsPerGame'].fillna(0)

print(df_train.head(30))
print(df_train.tail())
print(df_test.head(10))
print(df_test.tail())

                 Player FantPos  Age  GamesPlayed  GamesStarted  PassCmp  \
0           Andrew Beck      FB   27           15            12        0   
1         Kyle Juszczyk      FB   32           17            16        0   
2           Alec Ingold      FB   27           17            13        0   
3        Patrick Ricard      FB   29           17             9        0   
4           Nick Bawden      FB   27           16             2        0   
5        Michael Burton      FB   31           17             3        0   
6      Khari Blasingame      FB   27           16             2        0   
7              C.J. Ham      FB   30           17             5        0   
8           Keith Smith      FB   31           13             6        0   
9         Jakob Johnson      FB   29           13             5        0   
10         Nick Bellore      FB   34           16             0        0   
11        Jason Cabinda      FB   27            4             0        0   
12       Reg

In [33]:
# Remove all features that we don't need
features = [
    'RushAttPerGame',
    'RushYdsPerGame',
    'RushTDsPerGame',
    'TargetsPerGame',
    'RecsPerGame',
    'RecYdsPerGame',
    'RecTDsPerGame'
]

y_test = df_test['FantasyPtsPPR']
X_test = df_test[features]

y_train = df_train['FantasyPtsPPR']
X_train = df_train[features]

print(y_test.head())
print(X_test.head())
print(y_train.to_string())
print(X_train.head())



0    57.6
1    29.3
2    26.3
3    17.2
4    13.5
Name: FantasyPtsPPR, dtype: float64
   RushAttPerGame  RushYdsPerGame  RushTDsPerGame  TargetsPerGame  \
0        0.294118        1.529412        0.058824        1.823529   
1        0.411765        0.470588        0.058824        0.588235   
2        0.666667        1.133333        0.066667        0.800000   
3        0.000000        0.000000        0.000000        0.294118   
4        0.117647        0.588235        0.058824        0.352941   

   RecsPerGame  RecYdsPerGame  RecTDsPerGame  
0     1.117647      11.764706       0.117647  
1     0.588235       3.823529       0.058824  
2     0.733333       6.400000       0.000000  
3     0.176471       1.294118       0.058824  
4     0.294118       2.058824       0.000000  
0       38.8
1       38.5
2       24.9
3       16.2
4       11.5
5        6.7
6        5.8
7       10.2
8        6.3
9        2.2
10       0.0
11       0.0
12       1.3
13       0.0
14       0.0
15       2.4
16       

In [34]:
# Normalize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train[:20])
print(X_test[:20])

[[-0.92178181 -0.89701252 -0.35320522 -0.41683441 -0.3545666  -0.5155091
   1.20133014]
 [-0.92897814 -0.89069324 -0.63536795 -0.32555543 -0.27567788 -0.13075918
   1.00215584]
 [-0.96136162 -0.9052762  -0.63536795 -0.36582557 -0.32712704 -0.13075918
  -0.49165146]
 [-0.98295061 -0.9052762  -0.63536795 -0.72825681 -0.7387204  -0.58566938
   0.25525219]
 [-0.96001231 -0.8949466  -0.37084039 -0.88178672 -0.83197202 -0.78723873
  -0.49165146]
 [-0.90738915 -0.88340176 -0.63536795 -0.88933737 -0.84161874 -0.88441637
  -0.49165146]
 [-0.89119741 -0.83813382 -0.63536795 -0.75342565 -0.83197202 -0.92430589
  -0.49165146]
 [-0.97215612 -0.88826274 -0.63536795 -0.64771654 -0.63582206 -0.7689914
  -0.49165146]
 [-0.98295061 -0.9052762  -0.63536795 -0.85216493 -0.79412719 -0.64573215
  -0.49165146]
 [-0.98295061 -0.9052762  -0.63536795 -0.90482588 -0.92868656 -0.83218788
  -0.49165146]
 [-0.98295061 -0.9052762  -0.63536795 -1.01014778 -0.99596624 -0.93873401
  -0.49165146]
 [-0.93707401 -0.905276

In [35]:
# Train and evaluate
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X_train, y_train)

train_preds = model.predict(X_train)
preds = model.predict(X_test)

from sklearn.metrics import mean_squared_error, r2_score
print("Training MSE:", mean_squared_error(y_train, train_preds))
print("Training R^2:", r2_score(y_train, train_preds))
print("Test MSE:", mean_squared_error(y_test, preds))
print("Test R^2:", r2_score(y_test, preds))

Training MSE: 683.4590376352056
Training R^2: 0.9059387636116892
Test MSE: 854.998056567208
Test R^2: 0.8976719884168238


In [36]:
print(preds[:5])
print(y_test.head())

[46.73084651 25.07032218 20.32171988  7.59593296  4.80731159]
0    57.6
1    29.3
2    26.3
3    17.2
4    13.5
Name: FantasyPtsPPR, dtype: float64


In [37]:
# Model coefficients
print("Intercept:", model.intercept_)
print("Feature Coefficients:")
for feat, coef in zip(features, model.coef_):
    print(f"{feat}: {coef:.4f}")

Intercept: 70.45240963855422
Feature Coefficients:
RushAttPerGame: 13.0480
RushYdsPerGame: 22.7538
RushTDsPerGame: 13.7392
TargetsPerGame: -17.1432
RecsPerGame: 38.9607
RecYdsPerGame: 7.0071
RecTDsPerGame: 14.7021
