In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from scipy.stats import norm



In [64]:
df = pd.read_csv('../files/2024_fantasy_rb_stats.csv')

Linear Regression with Touchdowns Not Used as Feature

In [91]:
features = ['Rush_ATT', 'TGT', 'GP', 'Rec_YDS', 'Rush_YDS']

X = df[features]
y = df['PTS']

model = LinearRegression()
model.fit(X,y)

df['ML_exp_fantasy'] = model.predict(X)
y_pred = model.predict(X)
df['deltas'] = df['ML_exp_fantasy'] - df['PTS']

y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
mae = mean_absolute_error(y, y_pred)
rmse = np.sqrt(mean_squared_error(y, y_pred))


print("=== RB Volume Model Summary ===")
print(f"R^2 Score:       {r2:.3f}")
print(f"Mean Absolute Error (MAE): {mae:.2f} fantasy points")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f} fantasy points")





=== RB Volume Model Summary ===
R^2 Score:       0.981
Mean Absolute Error (MAE): 9.76 fantasy points
Root Mean Squared Error (RMSE): 13.14 fantasy points


In [68]:
# Intercept + coefficients
print("Intercept (baseline points):", model.intercept_)

coef_table = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_
})

print("\nFeature Weights (Fantasy Points contribution per unit):")
print(coef_table)


Intercept (baseline points): 0.8922112957745156

Feature Weights (Fantasy Points contribution per unit):
    Feature  Coefficient
0  Rush_ATT    -0.013451
1       TGT     0.290379
2        GP    -0.186239
3   Rec_YDS     0.205159
4  Rush_YDS     0.153592


In [70]:
df.to_csv('../files/2024_fantasy_rb_stats_with_ML.csv', index=False)

WR - Linear Regression

In [71]:
df_wr = pd.read_csv('../files/2024_fantasy_wr_stats.csv')


In [72]:
features = ['Rush_ATT', 'TGT', 'GP', 'Rec_YDS', 'Rush_YDS']

X = df_wr[features]
y = df_wr['PTS']

model = LinearRegression()
model.fit(X,y)

df_wr['ML_exp_fantasy'] = model.predict(X)
y_pred = model.predict(X)
df_wr['deltas'] = df_wr['ML_exp_fantasy'] - df_wr['PTS']

y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
mae = mean_absolute_error(y, y_pred)
rmse = np.sqrt(mean_squared_error(y, y_pred))


print("=== RB Volume Model Summary ===")
print(f"R^2 Score:       {r2:.3f}")
print(f"Mean Absolute Error (MAE): {mae:.2f} fantasy points")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f} fantasy points")


=== RB Volume Model Summary ===
R^2 Score:       0.982
Mean Absolute Error (MAE): 7.83 fantasy points
Root Mean Squared Error (RMSE): 11.43 fantasy points


In [73]:
# Intercept + coefficients
print("Intercept (baseline points):", model.intercept_)

coef_table = pd.DataFrame({
    "Feature": X.columns,
    "Coefficient": model.coef_
})

print("\nFeature Weights (Fantasy Points contribution per unit):")
print(coef_table)


Intercept (baseline points): -0.3570337207069656

Feature Weights (Fantasy Points contribution per unit):
    Feature  Coefficient
0  Rush_ATT    -0.088165
1       TGT     0.414457
2        GP    -0.070945
3   Rec_YDS     0.171835
4  Rush_YDS     0.110492


In [75]:
df_wr.to_csv('../files/2024_fantasy_wr_stats_with_ML.csv', index=False)

TE - Linear Regression

In [76]:
df_te = pd.read_csv('../files/2024_fantasy_te_stats.csv')
features = [ 'TGT', 'GP', 'Rec_YDS']

X = df_te[features]
y = df_te['PTS']

model = LinearRegression()
model.fit(X,y)

df_te['ML_exp_fantasy'] = model.predict(X)
y_pred = model.predict(X)
df_te['deltas'] = df_te['ML_exp_fantasy'] - df_te['PTS']

y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
mae = mean_absolute_error(y, y_pred)
rmse = np.sqrt(mean_squared_error(y, y_pred))


print("=== RB Volume Model Summary ===")
print(f"R^2 Score:       {r2:.3f}")
print(f"Mean Absolute Error (MAE): {mae:.2f} fantasy points")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f} fantasy points")


=== RB Volume Model Summary ===
R^2 Score:       0.980
Mean Absolute Error (MAE): 6.35 fantasy points
Root Mean Squared Error (RMSE): 8.83 fantasy points


In [78]:
df_te.to_csv('../files/2024_fantasy_te_stats_with_ML.csv', index=False)

QBs

In [79]:
df_qb = pd.read_csv('../files/2024_fantasy_qb_stats.csv')

In [80]:
features = ['CMP', 'Pass_YDS', 'Rush_ATT', 'Rush_YDS', 'GP']

X = df_qb[features]
y = df_qb['PTS']

model = LinearRegression()
model.fit(X,y)

df_qb['ML_exp_fantasy'] = model.predict(X)
y_pred = model.predict(X)
df_qb['deltas'] = df_qb['ML_exp_fantasy'] - df_qb['PTS']

y_pred = model.predict(X)
r2 = r2_score(y, y_pred)
mae = mean_absolute_error(y, y_pred)
rmse = np.sqrt(mean_squared_error(y, y_pred))


print("=== RB Volume Model Summary ===")
print(f"R^2 Score:       {r2:.3f}")
print(f"Mean Absolute Error (MAE): {mae:.2f} fantasy points")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f} fantasy points")

=== RB Volume Model Summary ===
R^2 Score:       0.945
Mean Absolute Error (MAE): 19.07 fantasy points
Root Mean Squared Error (RMSE): 22.88 fantasy points


In [82]:
df_qb.to_csv('../files/2024_fantasy_qb_stats_with_ML.csv', index=False)

Normalized Residuals to give relative regression risk score of positive/negative TD regression!

In [None]:
def add_regression_risk(df, delta_col="deltas"):
    """
    Adds z-scores and signed regression risk (-1 to 1) to a DataFrame,
    based on the residual column (delta_col).
    
    Args:
        df (pd.DataFrame): The dataframe (RB/WR/TE/QB).
        delta_col (str): The name of the column holding residuals (exp - actual).
    
    Returns:
        pd.DataFrame: Original df with added 'z_score' and 'regression_risk' columns.
    """
    mean_delta = df[delta_col].mean()
    std_delta = df[delta_col].std()

    # z-score
    df["z_score"] = (df[delta_col] - mean_delta) / std_delta

    # extremeness → probability-like score
    risk = 2* norm.cdf(abs(df["z_score"])) - 1

    # signed risk (-1 to 1, direction + extremeness)
    df["rel_regression_risk"] = np.sign(df[delta_col]) * risk
    
    return df

# === Usage for each position ===
df   = add_regression_risk(df,   delta_col="deltas")
df_wr = add_regression_risk(df_wr, delta_col="deltas")
df_te = add_regression_risk(df_te, delta_col="deltas")
df_qb = add_regression_risk(df_qb, delta_col="deltas")


CSVs finally updated with ML work and relative regression risk

In [89]:
df.to_csv('../updated_files/2024_fantasy_rb_stats_with_ML.csv', index=False)
df_wr.to_csv('../updated_files/2024_fantasy_wr_stats_with_ML.csv', index=False)
df_te.to_csv('../updated_files/2024_fantasy_te_stats_with_ML.csv', index=False)
df_qb.to_csv('../updated_files/2024_fantasy_qb_stats_with_ML.csv', index=False)