The Butterfly Optimization Algorithm is a nature-inspired optimization algorithm that mimics the social behavior of butterflies. It can be applied to various optimization problems, including machine learning tasks such as feature selection and hyperparameter tuning. If you want to use the Butterfly Optimization Algorithm on the Cleveland Heart Disease dataset, you can follow these general steps:

## 1)Import necessary libraries and load the dataset:

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# Load the Cleveland Heart Disease dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
column_names = [
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang",
    "oldpeak", "slope", "ca", "thal", "target"
]
heart_data = pd.read_csv(url, header=None, names=column_names, na_values="?")
heart_data = heart_data.dropna()

# Separate features and target variable
X = heart_data.drop("target", axis=1)
y = heart_data["target"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.preprocessing import StandardScaler
# Normalize the data
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)

### 2. Define the fitness function:

In this example, the fitness function is the accuracy of a Random Forest Classifier on the selected features.

In [None]:
# Define the objective function for BOA
def boa_objective_function(weights):
    # Initialize BPNN with weights
    bpnn = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000, random_state=42)
    bpnn.coefs_ = weights.reshape(bpnn.coefs_[0].shape)  # Reshape weights to match the neural network structure

    # Train the BPNN
    bpnn.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = bpnn.predict(X_test)

    # Calculate accuracy (you can replace this with your desired performance metric)
    accuracy = accuracy_score(y_test, y_pred)

    # Since BOA tries to minimize the objective function, we negate accuracy
    return -accuracy

# Butterfly Optimization Algorithm
def butterfly_optimization_algorithm(objective_function, dim, population_size, max_iterations):
    # ... (Same BOA code as before)

# Set the dimensions for the optimization (number of weights in the neural network)
population_size = 20
max_iterations = 50

# Run BOA to optimize neural network weights
best_weights = butterfly_optimization_algorithm(boa_objective_function, dim=num_weights, population_size=population_size, max_iterations=max_iterations)

# Print the best weights found by BOA
print("Best Weights:", best_weights)

Note: Ensure you have the Butterfly Optimization library installed (butterfly_optimization). You can install it using:

In [None]:
pip install butterfly-optimization

## 3) Apply Butterfly Optimization:

In [None]:
# Define the number of butterflies and the maximum number of iterations
num_butterflies = 20
max_iterations = 50

# Create an instance of the ButterflyOptimization class
bo = ButterflyOptimization(fitness_function, num_variables=X.shape[1], num_butterflies=num_butterflies)

# Run the optimization
best_solution, _ = bo.run(max_iterations)

# Extract the selected features from the best solution
selected_features = np.where(best_solution > 0.5)[0]

# Print the selected features and their importance scores
print("Selected Features:", selected_features)
print("Best Solution:", best_solution[selected_features])


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.datasets import load_breast_cancer

def initialize_population(population_size, dimension):
    return np.random.rand(population_size, dimension)

def butterfly_optimization_algorithm(objective_function, population_size, dimension, iterations, alpha=0.5, beta=0.1):
    # Initialize population
    population = initialize_population(population_size, dimension)

    for iteration in range(iterations):
        # Evaluate fitness for each butterfly
        fitness_values = np.array([objective_function(individual) for individual in population])

        # Find the index of the best butterfly (minimization problem)
        best_index = np.argmin(fitness_values)
        best_butterfly = population[best_index]

        # Update position of each butterfly
        for i in range(population_size):
            if i != best_index:
                r1, r2 = np.random.rand(), np.random.rand()
                population[i] = population[i] + alpha * (best_butterfly - population[i]) + beta * (r1 - r2)

        # Clip positions to ensure they are within the search space
        population = np.clip(population, 0, 1)

    # Return the best solution found
    best_fitness = objective_function(best_butterfly)
    return best_butterfly, best_fitness

def objective_function(params):
    # Extract hyperparameters from the individual
    n_estimators = int(params[0] * 100)
    max_depth = int(params[1] * 10) + 1  # Adjusted to ensure max_depth is at least 1

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a Random Forest Classifier
    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    clf.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = clf.predict(X_test)

    # Evaluate the model using F1 score (you can choose other metrics)
    f1 = f1_score(y_test, y_pred)

    # Since we want to maximize the F1 score, return its negative value
    return -f1

# Load the Cleveland Heart Disease dataset
data = load_breast_cancer()
X = data.data  # Features
y = data.target  # Target

# Optimization parameters
population_size = 20
dimension = 2
iterations = 100
alpha = 0.5
beta = 0.1

# Run Butterfly Optimization Algorithm
best_solution, best_fitness = butterfly_optimization_algorithm(
    objective_function, population_size, dimension, iterations, alpha, beta
)

# Extract the best hyperparameters
best_n_estimators = int(best_solution[0] * 100)
best_max_depth = int(best_solution[1] * 10) + 1  # Adjusted to ensure max_depth is at least 1

# Train a model with the best hyperparameters on the full dataset
best_clf = RandomForestClassifier(n_estimators=best_n_estimators, max_depth=best_max_depth, random_state=42)
best_clf.fit(X, y)

# Make predictions on the entire dataset
y_pred = best_clf.predict(X)

# Evaluate the model's performance
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)

# Display results
print("Best Hyperparameters:")
print("Number of Estimators:", best_n_estimators)
print("Max Depth:", best_max_depth)
print("\nPerformance Metrics:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
