In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer

# Load and preprocess data
df = pd.read_csv('/content/heart_disease_uci.csv')

# Data preprocessing
numeric_cols = ['age', 'trestbps', 'chol', 'thalch', 'oldpeak']
categorical_cols = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal']

# Impute missing values
imputer_num = SimpleImputer(strategy='median')
df[numeric_cols] = imputer_num.fit_transform(df[numeric_cols])

imputer_cat = SimpleImputer(strategy='most_frequent')
df[categorical_cols] = imputer_cat.fit_transform(df[categorical_cols])

# Encode categorical variables
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))

# Target variable
df['target'] = (df['num'] > 0).astype(int)

# Select features and target
features = numeric_cols + categorical_cols
X = df[features]
y = df['target']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Experiment 10: Regularization Techniques

# Best architecture from Experiment 9
best_architecture = (20, 10)

def evaluate_regularized_model(regularization_params):
    # Remove 'name' key if present
    params = {k:v for k,v in regularization_params.items() if k != "name"}

    # For dropout simulation, we'll adjust alpha instead
    if "dropout" in params:
        # In scikit-learn, we can simulate dropout effect by increasing alpha
        params["alpha"] = params.pop("dropout") * 10  # Scaling factor
        params["solver"] = "adam"  # Adam works better with this approach

    model = MLPClassifier(hidden_layer_sizes=best_architecture,
                         activation='relu',
                         max_iter=500,
                         random_state=42,
                         **params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy, model.loss_curve_

# Test different regularization techniques
regularization_tests = [
    {"name": "No Regularization", "alpha": 0},
    {"alpha": 0.0001, "name": "L2 (alpha=0.0001)"},
    {"alpha": 0.001, "name": "L2 (alpha=0.001)"},
    {"alpha": 0.01, "name": "L2 (alpha=0.01)"},
    {"early_stopping": True, "validation_fraction": 0.2, "name": "Early Stopping"},
    {"alpha": 0.001, "early_stopping": True, "name": "L2 + Early Stopping"},
    {"dropout": 0.2, "name": "Simulated Dropout (alpha=2)"},
    {"alpha": 0.001, "early_stopping": True, "solver": "adam", "name": "L2 + Early Stop + Adam"}
]

results = []
loss_curves = []

for reg_test in regularization_tests:
    accuracy, loss_curve = evaluate_regularized_model(reg_test)
    results.append({
        "Regularization": reg_test["name"],
        "Accuracy": accuracy
    })
    loss_curves.append((reg_test["name"], loss_curve))

    print(f"\nRegularization: {reg_test['name']}")
    print("Classification Report:")
    model_params = {k:v for k,v in reg_test.items() if k not in ["name", "dropout"]}
    if "dropout" in reg_test:
        model_params["alpha"] = reg_test["dropout"] * 10
    model = MLPClassifier(hidden_layer_sizes=st_architecture,
                         activation='relu',
                         max_iter=500,
                         random_state=42,
                         **model_params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

# Display results
results_df = pd.DataFrame(results)
print("\nSummary of Regularization Results:")
print(results_df.sort_values(by="Accuracy", ascending=False))

# Visualization
plt.figure(figsize=(14, 6))

# Accuracy comparison
plt.subplot(1, 2, 1)
plt.barh(results_df['Regularization'], results_df['Accuracy'], color='lightgreen')
plt.title('Test Accuracy by Regularization Technique')
plt.xlabel('Accuracy')
plt.xlim(0.7, 0.9)

# Training loss curves
plt.subplot(1, 2, 2)
for name, curve in loss_curves:
    plt.plot(curve, label=name)
plt.title('Training Loss with Regularization')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()