In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

In [3]:
# Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1, parser='auto')
X, y = mnist.data, mnist.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [17]:
def objective(features):
    X_train_subset = X_train.iloc[:, features]
    X_test_subset = X_test.iloc[:, features]

    clf = RandomForestClassifier(n_estimators=5, random_state=42)
    clf.fit(X_train_subset, y_train)
    y_pred = clf.predict(X_test_subset)
    acc = accuracy_score(y_test, y_pred)

    return acc

# Simulated Annealing
def simulated_annealing(n_features, max_iterations=1000, initial_temperature=1.0, cooling_rate=0.95):
    current_solution = np.random.choice(X.shape[1], n_features, replace=False)
    current_score = objective(current_solution)

    best_solution = np.copy(current_solution)
    best_score = current_score

    for iteration in range(max_iterations):

        if iteration % 10 == 0:
            print(best_score)

        temperature = initial_temperature * cooling_rate**iteration

        # Propose a new solution by randomly changing one feature
        new_solution = np.copy(current_solution)
        index_to_change = np.random.randint(0, n_features)
        new_feature = np.random.randint(0, X.shape[1])
        while new_feature in new_solution:
            new_feature = np.random.randint(0, X.shape[1])
        new_solution[index_to_change] = new_feature

        # Calculate the objective function value for the new solution
        new_score = objective(new_solution)

        # Accept the new solution if it's better or with a certain probability if it's worse
        if new_score > current_score or np.random.rand() < np.exp((new_score - current_score) / temperature):
            current_solution = np.copy(new_solution)
            current_score = new_score

        # Update the best solution if needed
        if current_score > best_score:
            best_solution = np.copy(current_solution)
            best_score = current_score

    return best_solution

In [None]:
# Set the number of features to select
n_feature = 28

# Run simulated annealing
selected_features = simulated_annealing(n_feature)

# Print the selected features
print("Selected Features:", selected_features)

In [19]:
X_train_subset = X_train.iloc[:, selected_features]
X_test_subset = X_test.iloc[:, selected_features]

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_subset, y_train)
y_pred = clf.predict(X_test_subset)
acc = accuracy_score(y_test, y_pred)

In [20]:
print(acc)

0.9250952380952381


In [30]:
print(selected_features)

[432 318 327 484 184 182 353 372 434 625 265 515 350 481 518 656 237 241
 595 289 244 130 154 439 541 321 324 149]


In [11]:
current_solution = np.random.choice(X.shape[1], n_feature, replace=False)

In [12]:
X_train_subset = X_train.iloc[:, current_solution]
X_test_subset = X_test.iloc[:, current_solution]

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_subset, y_train)
y_pred = clf.predict(X_test_subset)
acc = accuracy_score(y_test, y_pred)

In [13]:
print(acc)

0.5840952380952381


In [16]:
print(current_solution)

[677  72 761 639 137 196 552 700 511 275 481 279 218 722 504 152 399 782
 241 648 526 281 122 656   2  26 153 548]


In [21]:
X_train_subset = X_train
X_test_subset = X_test

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_subset, y_train)
y_pred = clf.predict(X_test_subset)
acc = accuracy_score(y_test, y_pred)

In [22]:
print(acc)

0.9667619047619047
