In [1]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

  warn(


In [None]:
def initialize_nests(num_nests, num_features):
    return np.random.choice([0, 1], size=(num_nests, num_features), replace=True)

def evaluate_nests(X_train, y_train, X_test, y_test, nests):
    accuracies = []

    for nest in nests:
        selected_features = np.where(nest)[0]
        X_train_selected = X_train[:, selected_features]
        X_test_selected = X_test[:, selected_features]

        classifier = RandomForestClassifier(n_estimators=5, random_state=42)
        classifier.fit(X_train_selected, y_train)

        y_pred = classifier.predict(X_test_selected)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)

    return np.array(accuracies)

def levy_flight(scale, size):
    sigma_u = (np.math.gamma(1 + size) * np.sin(np.pi * size / 2)) / (np.math.gamma((1 + size) / 2) * size * 2 ** ((size - 1) / 2))
    sigma_v = 1

    u = np.random.normal(0, sigma_u, size)
    v = np.random.normal(0, sigma_v, size)

    step = u / (np.abs(v) ** (1 / size))

    return scale * step

def cuckoo_search_feature_selection(X_train, y_train, X_test, y_test, num_nests, num_iterations, pa, alpha):
    num_features = X_train.shape[1]
    nests = initialize_nests(num_nests, num_features)
    current_best_nest = None
    current_best_accuracy = 0.0

    for iteration in range(num_iterations):
        print("Iteration:", iteration + 1)

        # Evaluate nests
        nest_accuracies = evaluate_nests(X_train, y_train, X_test, y_test, nests)
        print("Best Accuracy in Iteration:", np.max(nest_accuracies))

        # Find the best nest
        max_accuracy_index = np.argmax(nest_accuracies)
        if nest_accuracies[max_accuracy_index] > current_best_accuracy:
            current_best_nest = nests[max_accuracy_index].copy()
            current_best_accuracy = nest_accuracies[max_accuracy_index]

        # Get a new solution via Levy flight
        new_nest_index = np.random.randint(num_nests)
        new_nest = nests[new_nest_index].copy()
        step_size = levy_flight(alpha, num_features)
        new_nest = (new_nest + step_size).clip(0, 1)

        # Replace the old nest with the new one based on probability pa
        if np.random.rand() < pa:
            nests[new_nest_index] = new_nest

    # Select top num_selected_features based on the current_best_nest
    selected_indices = np.where(current_best_nest)[0]
    print("Selected Feature Indices:", selected_indices)
    return selected_indices

In [None]:
# Example usage
num_nests = 20
num_iterations = 10
pa = 0.25
alpha = 1.5
result = cuckoo_search_feature_selection(np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test), num_nests=num_nests, num_iterations=num_iterations, pa=pa, alpha=alpha)

In [None]:
X_train_subset = X_train.iloc[:, result]
X_test_subset = X_test.iloc[:, result]

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_subset, y_train)
y_pred = clf.predict(X_test_subset)
acc = accuracy_score(y_test, y_pred)

print(acc)