In [4]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tqdm import tqdm

In [3]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1,parser = 'auto')
X, y = mnist.data, mnist.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Example usage:
# Assuming X_train, y_train, X_test, and y_test are already defined
num_features = 28  # Change this according to your actual number of features
pop_size = 16
max_iter = 10

num_particles = 16
col_len = 28
max_value = 783
std_dev = 150
mean = max_value / 2
pop_size = (num_particles, col_len)

population = np.random.normal(mean, std_dev, pop_size).astype(int)
population[population > max_value] = max_value
population[population < 0] = 0

totalfeat = 784

In [4]:
# Fitness function for RandomForestClassifier
def fitness_rf_classifier(particle, X_train, X_test, y_train, y_test):
    rf_classifier = RandomForestClassifier(n_estimators=10, random_state=42)
    rf_classifier.fit(X_train.iloc[:, particle], y_train)

    y_pred = rf_classifier.predict(X_test.iloc[:, particle])

    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

In [5]:
def whale_optimization_algorithm(pop, X_train, X_test, y_train, y_test, pop_size, totalfeat, max_iterations):
    population = pop
    
    for iteration in tqdm(range(max_iterations)):
        for i in range(pop_size):
            a, A, C, l, p = np.random.uniform(-1, 1, 5)
            
            D = np.abs(C * population[int(p)].astype(int) - population[i].astype(int))
            X_rand = population[int(l)] - A * D
            new_solution = np.clip(X_rand, 0, totalfeat - 1).astype(int)
            
            fitness_current = fitness_rf_classifier(population[i], X_train, X_test, y_train, y_test)
            fitness_new = fitness_rf_classifier(new_solution, X_train, X_test, y_train, y_test)
            
            if fitness_new > fitness_current:
                population[i] = new_solution
    
    best_individual = population[np.argmax([fitness_rf_classifier(ind, X_train, X_test, y_train, y_test) for ind in population])]
    return best_individual

In [11]:
population

array([722, 469, 277, 565, 357, 309, 267, 355, 384, 295, 433, 268, 374,
       245, 583, 465,  47, 345, 337,  68, 403, 623, 206, 379, 369, 617,
       128, 368])

In [None]:
# Run the WOA algorithm
best_solution = whale_optimization_algorithm(population, X_train, X_test, y_train, y_test, pop_size[0], totalfeat, max_iter)

print("\nBest solution found:")
print(best_solution)
accuracy = fitness_rf_classifier(best_solution, X_train, X_test, y_train, y_test)
print("Accuracy of best solution = %.6f" % accuracy)

In [1]:
from sklearn.metrics import classification_report

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [9]:
new_population = [132 ,240 ,321, 301 ,327, 408 ,291 ,220 ,222, 432, 238 ,284 ,573, 147, 289,
                 409 ,405 ,381, 483 ,514 ,307 ,281 ,601, 516 ,459 ,368, 271, 491]

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

rf_classifier.fit(X_train.iloc[:,new_population], y_train)

y_pred = rf_classifier.predict(X_test.iloc[:,new_population])
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8739523809523809


In [10]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.93      0.96      0.94      2058
           1       0.93      0.97      0.95      2364
           2       0.84      0.84      0.84      2133
           3       0.88      0.84      0.86      2176
           4       0.81      0.85      0.83      1936
           5       0.85      0.85      0.85      1915
           6       0.92      0.94      0.93      2088
           7       0.90      0.84      0.87      2248
           8       0.85      0.84      0.84      1992
           9       0.82      0.80      0.81      2090

    accuracy                           0.87     21000
   macro avg       0.87      0.87      0.87     21000
weighted avg       0.87      0.87      0.87     21000

