In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

# Load dataset
df = pd.read_csv('/content/heart_disease_uci.csv')

# Data preprocessing
# Handle missing values - fill numeric with median, categorical with mode
numeric_cols = ['age', 'trestbps', 'chol', 'thalch', 'oldpeak']
categorical_cols = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']

# Impute missing values
imputer_num = SimpleImputer(strategy='median')
df[numeric_cols] = imputer_num.fit_transform(df[numeric_cols])

imputer_cat = SimpleImputer(strategy='most_frequent')
df[categorical_cols] = imputer_cat.fit_transform(df[categorical_cols])

# Encode categorical variables
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))  # Convert to string before encoding
    label_encoders[col] = le

# Target variable - assuming 'num' is the target (0 = no disease, >0 = disease)
df['target'] = (df['num'] > 0).astype(int)

# Select features and target
features = numeric_cols + categorical_cols
X = df[features]
y = df['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Function to evaluate different configurations
def evaluate_model(hidden_layer_sizes, activation, max_iter=500):
    model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes,
                         activation=activation,
                         max_iter=max_iter,
                         random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy, model.loss_curve_

# Test different configurations
configurations = [
    {"hidden": (10,), "activation": "relu", "name": "Single(10) - ReLU"},
    {"hidden": (30,), "activation": "tanh", "name": "Single(30) - Tanh"},
    {"hidden": (10, 10), "activation": "relu", "name": "Double(10,10) - ReLU"},
    {"hidden": (20, 10), "activation": "logistic", "name": "Double(20,10) - Logistic"},
    {"hidden": (10, 5, 3), "activation": "relu", "name": "Triple(10,5,3) - ReLU"},
    {"hidden": (20, 15, 10), "activation": "tanh", "name": "Triple(20,15,10) - Tanh"},
]

results = []
loss_curves = []

for config in configurations:
    accuracy, loss_curve = evaluate_model(config["hidden"], config["activation"])
    results.append({
        "Configuration": config["name"],
        "Accuracy": accuracy,
        "Activation": config["activation"],
        "Architecture": str(config["hidden"])
    })
    loss_curves.append((config["name"], loss_curve))

    # Print detailed report for each configuration
    print(f"\nConfiguration: {config['name']}")
    print("Classification Report:")
    model = MLPClassifier(hidden_layer_sizes=config["hidden"],
                         activation=config["activation"],
                         max_iter=500,
                         random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

# Display results
results_df = pd.DataFrame(results)
print("\nSummary of Results:")
print(results_df.sort_values(by="Accuracy", ascending=False))

# Visualization
plt.figure(figsize=(14, 6))

# Accuracy comparison
plt.subplot(1, 2, 1)
plt.barh(results_df['Configuration'], results_df['Accuracy'], color='skyblue')
plt.title('Test Accuracy by Configuration')
plt.xlabel('Accuracy')
plt.xlim(0.7, 0.9)

# Training loss curves
plt.subplot(1, 2, 2)
for name, curve in loss_curves:
    plt.plot(curve, label=name)
plt.title('Training Loss Across Configurations')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()

# Feature importance analysis (using weights from the best model)
best_config_idx = np.argmax(results_df['Accuracy'])
best_model = MLPClassifier(hidden_layer_sizes=configurations[best_config_idx]["hidden"],
                         activation=configurations[best_config_idx]["activation"],
                         max_iter=500,
                         random_state=42)
best_model.fit(X_train, y_train)

# Get the weights from input to first hidden layer
if len(best_model.coefs_) > 0:
    input_weights = best_model.coefs_[0]
    feature_importance = np.mean(np.abs(input_weights), axis=1)

    plt.figure(figsize=(10, 6))
    plt.barh(features, feature_importance)
    plt.title('Feature Importance (Average Absolute Weight)')
    plt.xlabel('Importance')
    plt.tight_layout()
    plt.show()
else:
    print("Could not extract feature importance - model weights not available")