In [None]:
# CSE 404 Machine Learning Project NFL combine data to HOF percentage
# Group Members: Pranav Premchand, Daphne Martin, Zeeshan Naeem, Pranesh Muthukumar

# Data in combine data folder named as qb combine data.csv

In [16]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.svm import SVR
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

# Step 1: Load and prepare the data
data = pd.read_csv("qb_combine_data.csv")  # Load the QB combine data

# Preprocess height column to separate feet and inches
data['Height_Feet'] = data['Ht'].apply(lambda x: int(x.split("'")[0]))
data['Height_Inches'] = data['Ht'].apply(lambda x: int(x.split("'")[1].replace('"', '')))

X = data[['Height_Feet', 'Height_Inches', 'Wt', '40yd', 'Vertical', 'Broad Jump', '3Cone', 'Shuttle']]  # Features
y = data['Hof']  # Target variable (continuous)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2.1: Handle missing values with mean imputation
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Reshape input data for LSTM
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

# Step 3: Build and train the support vector regression model
svr_model = SVR()  # Default SVR
svr_model.fit(X_train_imputed, y_train)

# Step 4: Build and train the Ridge (L2 regularization) model
ridge_model = Ridge(alpha=0.1)  # You can adjust the alpha parameter for tuning the strength of regularization
ridge_model.fit(X_train_imputed, y_train)

# Step 5: Build and train the Lasso (L1 regularization) model
lasso_model = Lasso(alpha=0.1)  # You can adjust the alpha parameter for tuning the strength of regularization
lasso_model.fit(X_train_imputed, y_train)

# Step 6: Build and train the LSTM model
lstm_model = Sequential([
    LSTM(64, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])),  # LSTM layer
    Dropout(0.2),  # Dropout layer to prevent overfitting
    Dense(1)  # Output layer
])

lstm_model.compile(optimizer='adam', loss='mean_squared_error')

lstm_model.fit(X_train_reshaped, y_train, epochs=20, batch_size=64, validation_split=0.2)

# Step 7: Evaluate the models
svr_mse = mean_squared_error(y_test, svr_model.predict(X_test_imputed))
ridge_mse = mean_squared_error(y_test, ridge_model.predict(X_test_imputed))
lasso_mse = mean_squared_error(y_test, lasso_model.predict(X_test_imputed))
lstm_mse = mean_squared_error(y_test, lstm_model.predict(X_test_reshaped))

print("Support Vector Regression Mean Squared Error:", svr_mse)
print("Ridge Regression Mean Squared Error:", ridge_mse)
print("Lasso Regression Mean Squared Error:", lasso_mse)
print("LSTM Mean Squared Error:", lstm_mse)

# Define a fixed random seed
random.seed(42)

# Step 8: Define function to predict HOF percentage for a single player's combine data
def predict_hof_percentage(height_feet, height_inches, weight, forty_yard, vertical_jump, three_cone, shuttle):
    # Count the number of missing inputs
    missing_inputs = [forty_yard, vertical_jump, three_cone, shuttle].count(None)

    if missing_inputs >= 3:
        # If 3 or more inputs are missing, apply a flat 30% boost to all Hall of Fame scores
        boost = 1.3  # 30%
    else:
        boost = 0

    # Apply weights to features
    weights = {
        '40yd': 0.4,
        'Vertical': 0.3,
        'Height_Feet': 0.1,
        'Wt': 0.1,
        '3Cone': 0.05,
        'Shuttle': 0.05
    }

    # Fill None values with averages
    imputed_forty_yard = X_train['40yd'].mean() if forty_yard is None else forty_yard
    imputed_vertical_jump = X_train['Vertical'].mean() if vertical_jump is None else vertical_jump
    imputed_three_cone = X_train['3Cone'].mean() if three_cone is None else three_cone
    imputed_shuttle = X_train['Shuttle'].mean() if shuttle is None else shuttle

    # Scale the features based on weights
    scaled_forty_yard = imputed_forty_yard * weights['40yd']
    scaled_vertical_jump = imputed_vertical_jump * weights['Vertical']
    scaled_height_weight = ((height_feet * 12 + height_inches) / 100 + weight / 300) * weights['Height_Feet']
    scaled_three_cone = imputed_three_cone * weights['3Cone']
    scaled_shuttle = imputed_shuttle * weights['Shuttle']


    # Calculate the predicted Hall of Fame percentage using SVR
    scaled_features = np.array([[scaled_height_weight, scaled_forty_yard, scaled_vertical_jump, weight, imputed_forty_yard, imputed_vertical_jump, imputed_three_cone, imputed_shuttle]])
    scaled_features_imputed = imputer.transform(scaled_features)
    scaled_features_scaled = scaler.transform(scaled_features_imputed)
    scaled_features_reshaped = scaled_features_scaled.reshape(1, 1, scaled_features_scaled.shape[1])

    # Predict Hall of Fame percentage using each model
    svr_hof_percentage = svr_model.predict(scaled_features_imputed)[0]
    ridge_hof_percentage = ridge_model.predict(scaled_features_imputed)[0]
    lasso_hof_percentage = lasso_model.predict(scaled_features_imputed)[0]
    lstm_hof_percentage = lstm_model.predict(scaled_features_reshaped)[0]

    # Add 30% boost to all Hall of Fame scores
    svr_hof_percentage += boost * svr_hof_percentage
    ridge_hof_percentage += boost * ridge_hof_percentage
    lasso_hof_percentage += boost * lasso_hof_percentage
    lstm_hof_percentage += boost * lstm_hof_percentage

    return svr_hof_percentage, ridge_hof_percentage, lasso_hof_percentage, lstm_hof_percentage

# Inputs for the hof prediction function
height_feet = 6
height_inches = 1
weight = 216 # pounds
forty_yard = None # seconds
vertical_jump = None # inches
three_cone = None # seconds
shuttle = None # seconds

svr_hof_percentage, ridge_hof_percentage, lasso_hof_percentage, lstm_hof_percentage = predict_hof_percentage(height_feet, height_inches, weight, forty_yard, vertical_jump, three_cone, shuttle)
print("Predicted Hall of Fame Percentage (SVR):", svr_hof_percentage*100)
print("Predicted Hall of Fame Percentage (Ridge):", ridge_hof_percentage*100)
print("Predicted Hall of Fame Percentage (Lasso):", lasso_hof_percentage*100)
print("Predicted Hall of Fame Percentage (LSTM):", lstm_hof_percentage*100)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Support Vector Regression Mean Squared Error: 0.06390573231342837
Ridge Regression Mean Squared Error: 0.0669366343869932
Lasso Regression Mean Squared Error: 0.0672040207656592
LSTM Mean Squared Error: 0.06735296571694052
Predicted Hall of Fame Percentage (SVR): 25.438747551824285
Predicted Hall of Fame Percentage (Ridge): -560.0231942275033
Predicted Hall of Fame Percentage (Lasso): 11.668634453781513
Predicted Hall of Fame Percentage (LSTM): [93.53237]


