In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import joblib

In [25]:
path = '/data/NFL_Data.csv'
data = pd.read_csv(path)

In [26]:
data

Unnamed: 0,Team_Season,Team,QB,RB,WR,TE,OL,Offense,IDL,EDGE,...,OL_P,IDL_P,EDGE_P,LB_P,S_P,CB_P,Defense_P,Offense_P,Dead_Open_Specials,W
0,Cardinals 2022,Cardinals,16348925,6213758,24621833,7121287,44681251,98987054,23503243,9842220,...,0.214607,0.112888,0.047273,0.073820,0.094174,0.034218,0.362372,0.475442,0.162186,4
1,Falcons 2022,Falcons,6303695,4894510,9535858,10678128,32848444,64260635,18104464,6077102,...,0.157774,0.086957,0.029189,0.023618,0.021461,0.067402,0.228627,0.308649,0.462725,7
2,Ravens 2022,Ravens,24100655,8179654,7616822,12307075,38678276,90882482,11195023,14877531,...,0.185775,0.053771,0.071458,0.037714,0.057958,0.154194,0.375094,0.436515,0.188390,10
3,Bills 2022,Bills,20230481,7299886,18982654,4501683,29350967,80365671,16458562,12078022,...,0.140975,0.079052,0.058012,0.115715,0.087184,0.126374,0.466336,0.386002,0.147662,13
4,Panthers 2022,Panthers,20461726,3829960,12882679,5253310,33291588,75719263,11405978,9462303,...,0.159902,0.054784,0.045448,0.111972,0.032674,0.080464,0.325342,0.363685,0.310973,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,Patriots 2013,Patriots,14603953,3682367,8749814,4150882,22775490,53962506,15169500,6420702,...,0.184268,0.122731,0.051947,0.087388,0.059763,0.069731,0.391560,0.436590,0.171850,12
316,Packers 2013,Packers,13013000,2429420,10223980,11341695,17211503,54219598,15831195,12650974,...,0.139252,0.128084,0.102354,0.073631,0.040204,0.112922,0.457196,0.438670,0.104135,8
317,Steelers 2013,Steelers,15393138,2549641,8799844,9207474,10836255,46786352,12586770,12823812,...,0.087672,0.101835,0.103753,0.074072,0.133337,0.089562,0.502558,0.378530,0.118912,8
318,Commanders 2013,Commanders,5972967,2212882,19735321,4645357,20025614,52592141,14227749,9152416,...,0.162020,0.115111,0.074049,0.084763,0.037376,0.057463,0.368762,0.425503,0.205735,3


In [27]:
features = ['QB_P', 'RB_P', 'WR_P', 'TE_P', 'OL_P', 'IDL_P', 'EDGE_P', 'LB_P', 'S_P', 'CB_P', 'Defense_P', 'Offense_P']
target = 'W'

X = data[features].values
y = data[target].values

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [30]:
rf_model = RandomForestRegressor(n_estimators=50, max_depth = 10, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

In [31]:

# Make predictions
y_train_pred = rf_model.predict(X_train)
y_test_pred = rf_model.predict(X_test)

In [32]:
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
print(f"Training MSE: {train_mse}")
print(f"Test MSE: {test_mse}")

Training MSE: 1.65128838441168
Test MSE: 12.601258696294865


In [33]:
# Calculate R^2 for both the training and test sets
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print(f"Training R^2: {train_r2}")
print(f"Test R^2: {test_r2}")

Training R^2: 0.8130961306911064
Test R^2: -0.0059002888217916105


In [34]:
import joblib

In [35]:
joblib.dump(rf_model, "models/rf_model.pkl")  # Save in a 'models' folder in the current directory
joblib.dump(scaler, "models/scaler.pkl")  # Save the scaler in the same folder

['models/scaler.pkl']

In [36]:
print("Min W:", y.min())
print("Max W:", y.max())
print("Unique W Values:", np.unique(y))


Min W: 0
Max W: 15
Unique W Values: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]


In [37]:
feature_importances = rf_model.feature_importances_
for name, importance in zip(features, feature_importances):
    print(f"{name}: {importance:.4f}")


QB_P: 0.0717
RB_P: 0.0948
WR_P: 0.0568
TE_P: 0.0716
OL_P: 0.0733
IDL_P: 0.0741
EDGE_P: 0.1063
LB_P: 0.0839
S_P: 0.1225
CB_P: 0.0827
Defense_P: 0.0929
Offense_P: 0.0694
