In [3]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif

# Load the main dataset
data = pd.read_csv("/content/nba_stats.csv")  # Ensure this path is correct for your setup

# Separate features and target
X = data.drop("Pos", axis=1)  # Features
y = data["Pos"]               # Target variable

# Convert categorical columns to numeric (e.g., "Tm" for team) using one-hot encoding
X = pd.get_dummies(X, columns=["Tm"])

# Feature Selection: Select the top 20 features based on ANOVA F-value
selector = SelectKBest(f_classif, k=20)
X = selector.fit_transform(X, y)

# Standardize the feature data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Define a parameter grid for GridSearchCV
param_grid = {
    'hidden_layer_sizes': [(50, 30), (100,), (50, 30, 10)],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01],
    'learning_rate': ['constant', 'adaptive']
}

# Initialize the MLPClassifier and GridSearchCV
# Early stopping was causing issues, so it has been turned off.
mlp = MLPClassifier(max_iter=1000, random_state=0, early_stopping=False, validation_fraction=0.1)
grid_search = GridSearchCV(mlp, param_grid, cv=5, n_jobs=-1, verbose=0)

# Fit the model using grid search
grid_search.fit(X_train, y_train)

# Get the best model from grid search
best_mlp = grid_search.best_estimator_

# Make predictions with the best model
train_predictions = best_mlp.predict(X_train)
test_predictions = best_mlp.predict(X_test)

# Evaluate the model on training and test sets
train_accuracy = accuracy_score(y_train, train_predictions)
test_accuracy = accuracy_score(y_test, test_predictions)
train_conf_matrix = confusion_matrix(y_train, train_predictions)
test_conf_matrix = confusion_matrix(y_test, test_predictions)

print(f"\nTraining Accuracy: {train_accuracy:.2f}")
print("Training Confusion Matrix:")
print(train_conf_matrix)

print(f"\nTest Accuracy: {test_accuracy:.2f}")
print("Test Confusion Matrix:")
print(test_conf_matrix)

# Cross-validation using the best model
cv_scores = cross_val_score(best_mlp, X, y, cv=10)
print("\nCross-validation Scores:", cv_scores)
print("Average Cross-validation Accuracy:", np.mean(cv_scores))

# Load the dummy test set
dummy_test = pd.read_csv("/content/dummy_test.csv")  # Ensure this path is correct for your setup
y_dummy = dummy_test["Pos"]  # Extract target variable from dummy test set

# Preprocess the dummy test data
dummy_test = dummy_test.drop("Pos", axis=1)  # Drop target column from dummy test set
dummy_test = pd.get_dummies(dummy_test, columns=["Tm"])  # Ensure same encoding as training data
dummy_test = selector.transform(dummy_test)  # Select same features as training data
dummy_test = scaler.transform(dummy_test)    # Apply scaling

# Evaluate the model on the dummy test set
dummy_predictions = best_mlp.predict(dummy_test)
dummy_accuracy = accuracy_score(y_dummy, dummy_predictions)
dummy_conf_matrix = confusion_matrix(y_dummy, dummy_predictions)

print(f"\nDummy Test Accuracy: {dummy_accuracy:.2f}")
print("Dummy Test Confusion Matrix:")
print(dummy_conf_matrix)




Training Accuracy: 0.94
Training Confusion Matrix:
[[127   1   0   1   0]
 [  2 132   2   2   0]
 [  0   1 130   2   1]
 [  1   5   1 114  10]
 [  0   1   2   7 142]]

Test Accuracy: 0.60
Test Confusion Matrix:
[[21  8  0  0  0]
 [ 8 15  3 13  2]
 [ 0  0 28  2  2]
 [ 1  3  2 14  9]
 [ 0  1  8  6 25]]





Cross-validation Scores: [0.60465116 0.56976744 0.55813953 0.56976744 0.58139535 0.51764706
 0.56470588 0.62352941 0.44705882 0.52941176]
Average Cross-validation Accuracy: 0.5566073871409029

Dummy Test Accuracy: 0.80
Dummy Test Confusion Matrix:
[[15  1  0  0  0]
 [ 1 16  1  2  0]
 [ 0  0 16  1  1]
 [ 1  2  1 17  6]
 [ 0  0  1  3 18]]


