In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

  warn(


In [10]:
def initialize_fireflies(num_fireflies, num_features, num_selected_features):
    fireflies = np.zeros((num_fireflies, num_features), dtype=bool)
    
    for i in range(num_fireflies):
        selected_indices = np.random.choice(num_features, num_selected_features, replace=False)
        fireflies[i, selected_indices] = 1
    
    return fireflies

def evaluate_fireflies(X_train, y_train, X_test, y_test, fireflies):
    accuracies = []

    for firefly in fireflies:
        selected_features = np.where(firefly)[0]
        X_train_selected = X_train[:, selected_features]
        X_test_selected = X_test[:, selected_features]

        classifier = RandomForestClassifier(n_estimators=5, random_state=42)
        classifier.fit(X_train_selected, y_train)
        
        y_pred = classifier.predict(X_test_selected)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)

    return np.array(accuracies)

def move_fireflies(fireflies, attractiveness_matrix, beta=1.0):
    num_fireflies = fireflies.shape[0]
    num_features = fireflies.shape[1]
    new_fireflies = fireflies.copy()

    for i in range(num_fireflies):
        for j in range(num_fireflies):
            if attractiveness_matrix[i, j] > attractiveness_matrix[i, i]:
                epsilon = np.random.uniform(0, 1, size=num_features)
                new_fireflies[i] += beta * (fireflies[j] ^ fireflies[i]) + epsilon

    new_fireflies[new_fireflies > 1] = 1
    new_fireflies[new_fireflies < 0] = 0

    return new_fireflies

def firefly_algorithm_feature_selection(X_train, y_train, X_test, y_test, num_fireflies, num_iterations, num_selected_features, beta=1.0):
    num_features = X_train.shape[1]
    fireflies = initialize_fireflies(num_fireflies, num_features, num_selected_features)
    global_best = None
    global_best_accuracy = 0.0

    # Inside the firefly_algorithm_feature_selection function

    for iteration in range(1, num_iterations + 1):
        print("Iteration:", iteration)

        # Evaluate fireflies
        firefly_accuracies = evaluate_fireflies(X_train, y_train, X_test, y_test, fireflies)
        print("Firefly Accuracies:", firefly_accuracies)

        # Find the index of the best firefly
        best_firefly_index = np.argmax(firefly_accuracies)
        # print("Best Firefly Index:", best_firefly_index)
        
        if firefly_accuracies[best_firefly_index] > global_best_accuracy:
            global_best = fireflies[best_firefly_index].copy()
            global_best_accuracy = firefly_accuracies[best_firefly_index]

        # Update fireflies' positions
        attractiveness_matrix = np.exp(-beta * np.linalg.norm(fireflies ^ fireflies[:, np.newaxis], axis=2))
        # print("Attractiveness Matrix:", attractiveness_matrix)
        
        new_fireflies = move_fireflies(fireflies, attractiveness_matrix, beta=beta)
        # print("New Fireflies:", new_fireflies)
        
        fireflies = new_fireflies

    # Select top num_selected_features based on the global_best
    selected_indices = np.where(global_best)[0]
    print("Selected Feature Indices:", selected_indices)
    return selected_indices


In [11]:
num_selected_features = 28
result = firefly_algorithm_feature_selection(np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test), num_fireflies=10, num_iterations=10, num_selected_features=num_selected_features, beta=5.0)

Iteration: 1
Firefly Accuracies: [0.69814286 0.47109524 0.73842857 0.62209524 0.65390476 0.64266667
 0.60119048 0.67885714 0.64438095 0.66952381]
Iteration: 2
Firefly Accuracies: [0.69814286 0.47109524 0.73842857 0.62209524 0.65390476 0.64266667
 0.60119048 0.67885714 0.64438095 0.66952381]
Iteration: 3
Firefly Accuracies: [0.69814286 0.47109524 0.73842857 0.62209524 0.65390476 0.64266667
 0.60119048 0.67885714 0.64438095 0.66952381]
Iteration: 4
Firefly Accuracies: [0.69814286 0.47109524 0.73842857 0.62209524 0.65390476 0.64266667
 0.60119048 0.67885714 0.64438095 0.66952381]
Iteration: 5
Firefly Accuracies: [0.69814286 0.47109524 0.73842857 0.62209524 0.65390476 0.64266667
 0.60119048 0.67885714 0.64438095 0.66952381]
Iteration: 6
Firefly Accuracies: [0.69814286 0.47109524 0.73842857 0.62209524 0.65390476 0.64266667
 0.60119048 0.67885714 0.64438095 0.66952381]
Iteration: 7
Firefly Accuracies: [0.69814286 0.47109524 0.73842857 0.62209524 0.65390476 0.64266667
 0.60119048 0.67885714 0

In [15]:
X_train_subset = X_train.iloc[:, result]
X_test_subset = X_test.iloc[:, result]

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_subset, y_train)
y_pred = clf.predict(X_test_subset)
acc = accuracy_score(y_test, y_pred)

In [16]:
print(acc)

0.814
