In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml

# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


  warn(


In [26]:
def initialize_harmony_memory(num_harmonies, num_features, n_selected_features):
    harmonies = np.zeros((num_harmonies, num_features), dtype=bool)
    
    for i in range(num_harmonies):
        selected_indices = np.random.choice(num_features, n_selected_features, replace=False)
        harmonies[i, selected_indices] = 1
    
    return harmonies

def evaluate_harmonies(X_train, y_train, X_test, y_test, harmonies):
    accuracies = []

    for harmony in harmonies:
        selected_features = np.where(harmony)[0]
        X_train_selected = X_train[:, selected_features]
        X_test_selected = X_test[:, selected_features]

        classifier = RandomForestClassifier(n_estimators=5, random_state=42)
        classifier.fit(X_train_selected, y_train)

        y_pred = classifier.predict(X_test_selected)
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)

    return np.array(accuracies)

def update_harmony_memory(harmonies, best_harmony, worst_harmony, harmony_rate, pitch_adjustment_rate):
    for i in range(harmonies.shape[0]):
        rand = np.random.rand(harmonies.shape[1])

        # Adjust pitch
        pitch_adjustment = np.random.rand()
        pitch_mask = rand < pitch_adjustment_rate
        harmonies[i, pitch_mask] = best_harmony[pitch_mask]

        # Adjust value
        value_adjustment = np.random.rand()
        value_mask = rand < harmony_rate
        harmonies[i, value_mask] = best_harmony[value_mask] if np.random.rand() < 0.5 else worst_harmony[value_mask]

    return harmonies

def harmony_search_feature_selection(X_train, y_train, X_test, y_test, num_harmonies, num_iterations, n_selected_features, harmony_rate, pitch_adjustment_rate):
    num_features = X_train.shape[1]
    harmonies = initialize_harmony_memory(num_harmonies, num_features, n_selected_features)
    global_best = None
    global_best_accuracy = 0.0

    for iteration in range(1, num_iterations + 1):
        print("Iteration:", iteration)

        # Evaluate harmonies
        harmony_accuracies = evaluate_harmonies(X_train, y_train, X_test, y_test, harmonies)
        print("Harmony Accuracies:", harmony_accuracies)

        # Find the index of the best harmony
        best_harmony_index = np.argmax(harmony_accuracies)
        # print("Best Harmony Index:", best_harmony_index)

        if harmony_accuracies[best_harmony_index] > global_best_accuracy:
            global_best = harmonies[best_harmony_index].copy()
            global_best_accuracy = harmony_accuracies[best_harmony_index]

        # Update harmony memory
        worst_harmony_index = np.argmin(harmony_accuracies)
        harmonies = update_harmony_memory(harmonies, harmonies[best_harmony_index], harmonies[worst_harmony_index], harmony_rate, pitch_adjustment_rate)

        # Stop if the desired number of features is reached
        if np.sum(global_best) >= n_selected_features:
            break

    # Select top n_selected_features based on the global_best
    selected_indices = np.where(global_best)[0][:n_selected_features]
    print("Selected Feature Indices:", selected_indices)
    return selected_indices

In [27]:
# Example usage
num_iterations = 10
n_selected_features = 28
harmony_rate = 0.7
pitch_adjustment_rate = 0.1
result = harmony_search_feature_selection(np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test), num_harmonies=10, num_iterations=num_iterations, n_selected_features=n_selected_features, harmony_rate=harmony_rate, pitch_adjustment_rate=pitch_adjustment_rate)

Iteration: 1
Harmony Accuracies: [0.62752381 0.69514286 0.60528571 0.69814286 0.439      0.68414286
 0.65819048 0.70852381 0.62842857 0.63609524]
Selected Feature Indices: [ 96  98 121 125 127 128 246 278 279 281 295 302 313 351 453 455 458 480
 488 493 494 568 634 685 692 715 718 754]


In [28]:
X_train_subset = X_train.iloc[:, result]
X_test_subset = X_test.iloc[:, result]

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_subset, y_train)
y_pred = clf.predict(X_test_subset)
acc = accuracy_score(y_test, y_pred)

print(acc)

0.7645238095238095
