Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.svm import SVC
import random


Loading the Cleveland Heart Disease Dataset

In [2]:
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
column_names = [
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak",
    "slope", "ca", "thal", "target"
]
data = pd.read_csv(url, names=column_names, na_values='?')

# Display the first few rows of the dataset
print(data.head())


    age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
0  63.0  1.0  1.0     145.0  233.0  1.0      2.0    150.0    0.0      2.3   
1  67.0  1.0  4.0     160.0  286.0  0.0      2.0    108.0    1.0      1.5   
2  67.0  1.0  4.0     120.0  229.0  0.0      2.0    129.0    1.0      2.6   
3  37.0  1.0  3.0     130.0  250.0  0.0      0.0    187.0    0.0      3.5   
4  41.0  0.0  2.0     130.0  204.0  0.0      2.0    172.0    0.0      1.4   

   slope   ca  thal  target  
0    3.0  0.0   6.0       0  
1    2.0  3.0   3.0       2  
2    2.0  2.0   7.0       1  
3    3.0  0.0   3.0       0  
4    1.0  0.0   3.0       0  


Data Pre-Processing

In [3]:
# Handle missing values by dropping rows with missing values
data.dropna(inplace=True)

# Split the data into features and target
X = data.drop('target', axis=1)
y = data['target'].apply(lambda x: 1 if x > 0 else 0)  # Convert target to binary

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


JellyFish Optimization Algorithm

In [6]:
class JellyfishOptimizationAlgorithm:
    def __init__(self, population_size, dimensions, lower_bound, upper_bound, max_iter):
        self.population_size = population_size
        self.dimensions = dimensions
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound
        self.max_iter = max_iter
        self.population = self.initialize_population()

    def initialize_population(self):
        return np.random.uniform(self.lower_bound, self.upper_bound, (self.population_size, self.dimensions))

    def fitness(self, solution):
        # Ensure gamma is a positive value
        C = solution[0]
        gamma = abs(solution[1])  # Taking absolute value to ensure gamma is positive
        model = SVC(C=C, gamma=gamma, kernel='rbf')
        model.fit(X_train, y_train)
        predictions = model.predict(X_train)
        accuracy = accuracy_score(y_train, predictions)
        return accuracy

    def optimize(self):
        best_solution = None
        best_fitness = -1

        for iteration in range(self.max_iter):
            for individual in self.population:
                fitness_value = self.fitness(individual)
                if fitness_value > best_fitness:
                    best_fitness = fitness_value
                    best_solution = individual

            # Update population
            for i in range(self.population_size):
                if random.random() < 0.5:
                    self.population[i] = best_solution + np.random.uniform(-1, 1, self.dimensions)
                else:
                    self.population[i] = np.random.uniform(self.lower_bound, self.upper_bound, self.dimensions)

        return best_solution, best_fitness


Using SVM with JOA to predict heart diseases

In [7]:
# Define the bounds for C and gamma parameters of the SVM
lower_bound = [0.1, 0.001]
upper_bound = [100, 1]

# Instantiate the Jellyfish Optimization Algorithm
joa = JellyfishOptimizationAlgorithm(
    population_size=30, dimensions=2, lower_bound=lower_bound, upper_bound=upper_bound, max_iter=50
)

# Optimize the SVM parameters using JOA
best_solution, best_fitness = joa.optimize()
print(f"Best solution (C, gamma): {best_solution}, Best fitness: {best_fitness}")

# Train the SVM model with the best parameters
model = SVC(C=best_solution[0], gamma=best_solution[1], kernel='rbf')
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(f"Precision: {precision_score(y_test, y_pred)}")
print(f"Recall: {recall_score(y_test, y_pred)}")
print(f"F1 Score: {f1_score(y_test, y_pred)}")
print(f"ROC AUC Score: {roc_auc_score(y_test, y_pred)}")


Best solution (C, gamma): [34.74119704  0.76053058], Best fitness: 1.0
Accuracy: 0.9333333333333333
Precision: 0.8846153846153846
Recall: 0.9583333333333334
F1 Score: 0.92
ROC AUC Score: 0.9375000000000001
