In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml

In [2]:
# Load the MNIST dataset with parser='auto'
mnist = fetch_openml('mnist_784', version=1, parser='auto')
X, y = mnist.data, mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [3]:
def objective(features, X_train, X_test, y_train, y_test):
    X_train_subset = X_train.iloc[:, features]
    X_test_subset = X_test.iloc[:, features]

    clf = RandomForestClassifier(n_estimators=5, random_state=42)
    clf.fit(X_train_subset, y_train)
    y_pred = clf.predict(X_test_subset)
    acc = accuracy_score(y_test, y_pred)

    return acc

def generate_solutions(pheromones, num_particles, num_selected_features):
    solutions = []

    for _ in range(num_particles):
        selected_indices = np.random.choice(len(pheromones), size=num_selected_features, replace=False, p=pheromones / np.sum(pheromones))
        solutions.append(selected_indices.tolist())

    return solutions

def local_search(current_solution, X_train, X_test, y_train, y_test):
    # Implement local search using a simple hill climbing strategy
    current_fitness = objective(current_solution, X_train, X_test, y_train, y_test)

    for _ in range(5):  # Perform 5 local search iterations
        new_solution = current_solution.copy()
        swap_indices = np.random.choice(len(new_solution), size=2, replace=False)
        new_solution[swap_indices[0]], new_solution[swap_indices[1]] = new_solution[swap_indices[1]], new_solution[swap_indices[0]]

        new_fitness = objective(new_solution, X_train, X_test, y_train, y_test)

        if new_fitness > current_fitness:
            current_solution = new_solution
            current_fitness = new_fitness

    return current_solution

def update_pheromones(pheromones, solutions, fitness_values):
    pheromone_update = np.zeros_like(pheromones)

    elite_indices = np.argsort(fitness_values)[-len(fitness_values) // 2:]
    for idx in elite_indices:
        pheromone_update[solutions[idx]] += fitness_values[idx] * 2.0

    return pheromone_update

def aco_feature_selection(X_train, X_test, y_train, y_test, num_particles, num_iterations, num_selected_features):
    num_features = X_train.shape[1]

    pheromones = np.ones(num_features)

    for iteration in range(num_iterations):
        solutions = generate_solutions(pheromones, num_particles, num_selected_features)

        # Apply local search to improve solutions
        solutions = [local_search(solution, X_train, X_test, y_train, y_test) for solution in solutions]

        fitness_values = [objective(features, X_train, X_test, y_train, y_test) for features in solutions]
        print(fitness_values)

        pheromones *= 0.5  # Evaporation
        pheromones += update_pheromones(pheromones, solutions, fitness_values)

    best_features = np.argsort(pheromones)[-num_selected_features:]

    return best_features

In [4]:
# Example usage with parameters
num_particles = 4
num_iterations = 3
num_selected_features = 28

selected_features = aco_feature_selection(X_train, X_test, y_train, y_test, num_particles, num_iterations, num_selected_features)
print("Selected Features:", selected_features)

[0.5606190476190476, 0.7118571428571429, 0.6614761904761904, 0.7440952380952381]
[0.6834285714285714, 0.5823809523809523, 0.687952380952381, 0.7188095238095238]
[0.665952380952381, 0.6299523809523809, 0.5285238095238095, 0.5991904761904762]
Selected Features: [395 319 775  62 403 601 517  42  59 379 267 550 531 259 723  52  99 554
 735 536 195 243 338 515 698  55 613 176]


In [5]:
X_train_subset = X_train.iloc[:, selected_features]
X_test_subset = X_test.iloc[:, selected_features]

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_subset, y_train)
y_pred = clf.predict(X_test_subset)
acc = accuracy_score(y_test, y_pred)

In [6]:
print(acc)

0.7322857142857143
