In [None]:
# CSE 404 Machine Learning Project NFL combine data to HOF percentage
# Group Members: Pranav Premchand, Daphne Martin, Zeeshan Naeem, Pranesh Muthukumar

# Data in combine data folder named as qb combine data.csv

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.svm import SVR
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

# Step 1: Load and prepare the data
data = pd.read_csv("qb_combine_data.csv")  # Load the QB combine data

# Preprocess height column to separate feet and inches
data['Height_Feet'] = data['Ht'].apply(lambda x: int(x.split("'")[0]))
data['Height_Inches'] = data['Ht'].apply(lambda x: int(x.split("'")[1].replace('"', '')))

X = data[['Height_Feet', 'Height_Inches', 'Wt', '40yd', 'Vertical', 'Broad Jump', '3Cone', 'Shuttle']]  # Features
y = data['Hof']  # Target variable (continuous)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2.1: Handle missing values with mean imputation
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Reshape input data for LSTM
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

# Step 3: Build and train the support vector regression model
svr_model = SVR()  # Default SVR
svr_model.fit(X_train_imputed, y_train)

# Step 4: Build and train the Ridge (L2 regularization) model
ridge_model = Ridge(alpha=0.1)  # You can adjust the alpha parameter for tuning the strength of regularization
ridge_model.fit(X_train_imputed, y_train)

# Step 5: Build and train the Lasso (L1 regularization) model
lasso_model = Lasso(alpha=0.1)  # You can adjust the alpha parameter for tuning the strength of regularization
lasso_model.fit(X_train_imputed, y_train)

# Step 6: Build and train the LSTM model
lstm_model = Sequential([
    LSTM(64, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])),  # LSTM layer
    Dropout(0.2),  # Dropout layer to prevent overfitting
    Dense(1)  # Output layer
])

lstm_model.compile(optimizer='adam', loss='mean_squared_error')

lstm_model.fit(X_train_reshaped, y_train, epochs=20, batch_size=64, validation_split=0.2)

# Step 7: Evaluate the models
svr_mse = mean_squared_error(y_test, svr_model.predict(X_test_imputed))
ridge_mse = mean_squared_error(y_test, ridge_model.predict(X_test_imputed))
lasso_mse = mean_squared_error(y_test, lasso_model.predict(X_test_imputed))
lstm_mse = mean_squared_error(y_test, lstm_model.predict(X_test_reshaped))

print("Support Vector Regression Mean Squared Error:", svr_mse)
print("Ridge Regression Mean Squared Error:", ridge_mse)
print("Lasso Regression Mean Squared Error:", lasso_mse)
print("LSTM Mean Squared Error:", lstm_mse)

# Step 8: Define function to predict HOF percentage for a single player's combine data
def predict_hof_percentage(height_feet, height_inches, weight, forty_yard, vertical_jump, broad_jump, three_cone, shuttle):
    new_data = pd.DataFrame([[height_feet, height_inches, weight, forty_yard, vertical_jump, broad_jump, three_cone, shuttle]],
                            columns=['Height_Feet', 'Height_Inches', 'Wt', '40yd', 'Vertical', 'Broad Jump', '3Cone', 'Shuttle'])
    new_data_imputed = imputer.transform(new_data)
    new_data_scaled = scaler.transform(new_data_imputed)
    new_data_reshaped = new_data_scaled.reshape(1, 1, new_data_scaled.shape[1])
    return svr_model.predict(new_data_imputed)[0], ridge_model.predict(new_data_imputed)[0], lasso_model.predict(new_data_imputed)[0], lstm_model.predict(new_data_reshaped)[0]

# Step 9: Example usage of the prediction function
height_feet = 6
height_inches = 4
weight = 211  # pounds
forty_yard = 5.28  # seconds
vertical_jump = 24.5  # inches
broad_jump = 99  # inches
three_cone = 7.2  # seconds
shuttle = 4.38  # seconds

svr_hof_percentage, ridge_hof_percentage, lasso_hof_percentage, lstm_hof_percentage = predict_hof_percentage(height_feet, height_inches, weight, forty_yard, vertical_jump, broad_jump, three_cone, shuttle)
print("Predicted Hall of Fame Percentage (SVR):", svr_hof_percentage * 100)
print("Predicted Hall of Fame Percentage (Ridge):", ridge_hof_percentage * 100)
print("Predicted Hall of Fame Percentage (Lasso):", lasso_hof_percentage * 100)
print("Predicted Hall of Fame Percentage (LSTM):", lstm_hof_percentage * 100)




Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20




Support Vector Regression Mean Squared Error: 0.06390573231342837
Ridge Regression Mean Squared Error: 0.0669366343869932
Lasso Regression Mean Squared Error: 0.0672040207656592
LSTM Mean Squared Error: 0.07118060810619492
Predicted Hall of Fame Percentage (SVR): 10.019855615576006
Predicted Hall of Fame Percentage (Ridge): 5.565339933277684
Predicted Hall of Fame Percentage (Lasso): 5.073319327731092
Predicted Hall of Fame Percentage (LSTM): [15.684931]
