In [3]:
#!pip install pandas numpy scikit-learn matplotlib keras tensorflow tqdm deap simanneal --quiet

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve
from keras.models import Sequential
from keras.layers import Dense

# Load and Preprocess the Data

In [6]:
# Load dataset from Kaggle
df = pd.read_csv('heart_statlog_cleveland_hungary_final.csv')

# Display the first few rows of the dataset
df.head()

Unnamed: 0,age,sex,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target
0,40,1,2,140,289,0,0,172,0,0.0,1,0
1,49,0,3,160,180,0,0,156,0,1.0,2,1
2,37,1,2,130,283,0,1,98,0,0.0,1,0
3,48,0,4,138,214,0,0,108,1,1.5,2,1
4,54,1,3,150,195,0,0,122,0,0.0,1,0


In [8]:
# Define features and target
X = df.drop(columns=['target'])
y = df['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define Neural Network Models

In [9]:
# Base Model with Gradient Descent
def build_model():
    model = Sequential()
    model.add(Dense(16, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model_gd = build_model()
model_gd.fit(X_train, y_train, epochs=50, batch_size=10, verbose=1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5985 - loss: 0.6686
Epoch 2/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7951 - loss: 0.4992
Epoch 3/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8371 - loss: 0.4127
Epoch 4/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8516 - loss: 0.3722
Epoch 5/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8469 - loss: 0.3610
Epoch 6/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8291 - loss: 0.3921
Epoch 7/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8240 - loss: 0.3845
Epoch 8/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8448 - loss: 0.3665
Epoch 9/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x21f2db0cf70>

# Define Optimization Techniques

In [19]:
# Genetic Algorithm Implementation
from deap import base, creator, tools, algorithms
import random
from sklearn.neural_network import MLPClassifier
from tqdm import tqdm

def genetic_algorithm(X_train, y_train, X_test, y_test):
    def eval_nn(individual):
        hidden_layer_sizes = tuple(int(x) for x in individual)
        clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, max_iter=10000)
        clf.fit(X_train, y_train)
        predictions = clf.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        return (accuracy,)

    if 'FitnessMax' in creator.__dict__:
        del creator.FitnessMax
    if 'Individual' in creator.__dict__:
        del creator.Individual

    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()
    toolbox.register("attr_int", random.randint, 5, 50)
    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_int, n=2)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    toolbox.register("mate", tools.cxBlend, alpha=0.5)
    toolbox.register("mutate", tools.mutUniformInt, low=5, up=50, indpb=0.2)
    toolbox.register("select", tools.selTournament, tournsize=3)
    toolbox.register("evaluate", eval_nn)

    population = toolbox.population(n=10)
    
    # Progress bar
    for gen in tqdm(range(5), desc="Genetic Algorithm Progress"):
        offspring = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.2)
        fits = toolbox.map(toolbox.evaluate, offspring)
        for fit, ind in zip(fits, offspring):
            ind.fitness.values = fit
        population = toolbox.select(offspring, k=len(population))

    best_ind = tools.selBest(population, 1)[0]
    best_hidden_layer_sizes = tuple(int(x) for x in best_ind)
    best_model = MLPClassifier(hidden_layer_sizes=best_hidden_layer_sizes, max_iter=10000)
    best_model.fit(X_train, y_train)

    return best_model

# Apply Genetic Algorithm to the model
model_ga = genetic_algorithm(X_train, y_train, X_test, y_test)

Genetic Algorithm Progress: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [02:01<00:00, 24.26s/it]


In [34]:
from simanneal import Annealer
from tqdm import tqdm
import random
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np

class NNAnnealer(Annealer):

    def __init__(self, state, X_train, y_train, X_test, y_test):
        super(NNAnnealer, self).__init__(state)
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.steps = 50
        self.progress_bar = tqdm(total=self.steps, desc="Simulated Annealing Progress")

    def move(self):
        # Randomly change one layer size
        idx = random.randint(0, len(self.state) - 1)
        change = random.choice([-1, 1]) * random.randint(1, 5)
        self.state[idx] = max(5, self.state[idx] + change)  # Ensure layer size is at least 5

    def energy(self):
        # Evaluate the current state
        hidden_layer_sizes = tuple(self.state)
        clf = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, max_iter=50, early_stopping=True, n_iter_no_change=5)
        clf.fit(self.X_train, self.y_train)
        predictions = clf.predict(self.X_test)
        accuracy = accuracy_score(self.y_test, predictions)
        return -accuracy  # Negative because we want to maximize accuracy

    def update(self, step, T, E, acceptance, improvement):
        self.progress_bar.update(1)

    def anneal(self):
        result = super(NNAnnealer, self).anneal()
        self.progress_bar.close()
        return result

def simulated_annealing(X_train, y_train, X_test, y_test):
    # Use a very small subset of the data for optimization
    X_train_small, _, y_train_small, _ = train_test_split(X_train, y_train, train_size=0.05, random_state=42, stratify=y_train)
    X_test_small, _, y_test_small, _ = train_test_split(X_test, y_test, train_size=0.05, random_state=42, stratify=y_test)

    # Initial state (starting point)
    init_state = [random.randint(5, 20) for _ in range(2)]

    # Create an instance of the NNAnnealer class
    annealer = NNAnnealer(init_state, X_train_small, y_train_small, X_test_small, y_test_small)

    # Set the annealing schedule
    annealer.steps = 50  # Total steps in the annealing process
    annealer.Tmax = 1.0  # Starting temperature
    annealer.Tmin = 0.01  # Ending temperature

    # Initialize the progress bar with the correct total number of steps
    annealer.progress_bar = tqdm(total=annealer.steps, desc="Simulated Annealing Progress")

    # Perform the annealing process
    state, e = annealer.anneal()

    # Train the final model with the best found state
    best_hidden_layer_sizes = tuple(state)
    best_model = MLPClassifier(hidden_layer_sizes=best_hidden_layer_sizes, max_iter=50, early_stopping=True, n_iter_no_change=5)
    best_model.fit(X_train, y_train)

    return best_model

# Apply Simulated Annealing to the model
model_sa = simulated_annealing(X_train, y_train, X_test, y_test)

Simulated Annealing Progress:   0%|                                                                                                 | 0/50 [00:00<?, ?it/s]
Simulated Annealing Progress:   0%|                                                                                                 | 0/50 [00:00<?, ?it/s][A

Simulated Annealing Progress:  24%|████████████████████▉                                                                  | 12/50 [00:00<00:00, 116.85it/s][A
Simulated Annealing Progress:  48%|██████████████████████████████████████████▏                                             | 24/50 [00:00<00:00, 90.35it/s][A
Simulated Annealing Progress:  68%|███████████████████████████████████████████████████████████▊                            | 34/50 [00:00<00:00, 84.13it/s][A
Simulated Annealing Progress: 51it [00:00, 91.50it/s]                                                                                                      [A


In [35]:
def randomized_hill_climbing(X_train, y_train, X_test, y_test, iterations=100):
    # Use a subset of the data for optimization
    X_train_small, _, y_train_small, _ = train_test_split(X_train, y_train, train_size=0.05, random_state=42, stratify=y_train)
    X_test_small, _, y_test_small, _ = train_test_split(X_test, y_test, train_size=0.05, random_state=42, stratify=y_test)

    # Initial state (starting point)
    current_state = [random.randint(5, 20) for _ in range(2)]
    best_state = current_state
    
    # Evaluate the initial state
    clf = MLPClassifier(hidden_layer_sizes=tuple(current_state), max_iter=50, early_stopping=True, n_iter_no_change=5)
    clf.fit(X_train_small, y_train_small)
    best_score = accuracy_score(y_test_small, clf.predict(X_test_small))
    
    progress_bar = tqdm(total=iterations, desc="Randomized Hill Climbing Progress")
    
    for _ in range(iterations):
        # Generate a neighboring state by randomly modifying one layer size
        next_state = current_state[:]
        idx = random.randint(0, len(next_state) - 1)
        change = random.choice([-1, 1]) * random.randint(1, 5)
        next_state[idx] = max(5, next_state[idx] + change)  # Ensure layer size is at least 5
        
        # Evaluate the neighboring state
        clf = MLPClassifier(hidden_layer_sizes=tuple(next_state), max_iter=50, early_stopping=True, n_iter_no_change=5)
        clf.fit(X_train_small, y_train_small)
        next_score = accuracy_score(y_test_small, clf.predict(X_test_small))
        
        # Accept the new state if it improves the performance
        if next_score > best_score:
            best_score = next_score
            best_state = next_state
            current_state = next_state
        
        progress_bar.update(1)
    
    progress_bar.close()
    
    # Train the final model with the best found state
    best_model = MLPClassifier(hidden_layer_sizes=tuple(best_state), max_iter=50, early_stopping=True, n_iter_no_change=5)
    best_model.fit(X_train, y_train)
    
    return best_model

# Apply Randomized Hill Climbing to the model
model_rhc = randomized_hill_climbing(X_train, y_train, X_test, y_test)

Randomized Hill Climbing Progress: 100%|█████████████████████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 87.00it/s]


# Evaluate Models

In [36]:
def evaluate_model(model, X_test, y_test):
    y_pred = (model.predict(X_test) > 0.5).astype("int32")
    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    sensitivity = cm[1,1] / (cm[1,1] + cm[1,0])
    specificity = cm[0,0] / (cm[0,0] + cm[0,1])
    auc = roc_auc_score(y_test, y_pred)
    return accuracy, sensitivity, specificity, auc

metrics_gd = evaluate_model(model_gd, X_test, y_test)
metrics_ga = evaluate_model(model_ga, X_test, y_test)
metrics_sa = evaluate_model(model_sa, X_test, y_test)
metrics_rhc = evaluate_model(model_rhc, X_test, y_test)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 


# Compile Results into a Table

In [37]:
results = pd.DataFrame({
    'Method': ['Gradient Descent', 'Genetic Algorithm', 'Simulated Annealing', 'Randomized Hill Climbing'],
    'Accuracy': [metrics_gd[0], metrics_ga[0], metrics_sa[0], metrics_rhc[0]],
    'Sensitivity': [metrics_gd[1], metrics_ga[1], metrics_sa[1], metrics_rhc[1]],
    'Specificity': [metrics_gd[2], metrics_ga[2], metrics_sa[2], metrics_rhc[2]],
    'AUC': [metrics_gd[3], metrics_ga[3], metrics_sa[3], metrics_rhc[3]]
})

results.to_csv('comparison_table.csv', index=False)