In [2]:
from sklearn.datasets import fetch_openml

In [3]:
mnist = fetch_openml('mnist_784', version=1,parser ='auto')
X, y = mnist.data, mnist.target

In [4]:
import numpy as np
import random

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import xgboost as xgb
from sklearn.metrics import accuracy_score

In [70]:
def cal_pop_fitness(pop, X_train, y_train, X_test, y_test):
    fitness = []
    for i in range(len(pop)):
        columns = np.unique(pop[i])
        
        X_train_ind = X_train.iloc[:,columns]
        rf_classifier = RandomForestClassifier(n_estimators=5, random_state=42)
        rf_classifier.fit(X_train_ind, y_train)

        y_pred = rf_classifier.predict(X_test.iloc[:,columns])

        accuracy = accuracy_score(y_test, y_pred)

        fitness.append(accuracy)

    return fitness

def fit_svm(pop, X_train, y_train, X_test, y_test):
    fitness = []
    for i in range(len(pop)):
        columns = np.unique(pop[i])
        
        X_train_ind = X_train.iloc[:, columns]
        svm_classifier = SVC(kernel='linear', C=1.0)
        svm_classifier.fit(X_train_ind, y_train)

        y_pred = svm_classifier.predict(X_test.iloc[:, columns])

        accuracy = accuracy_score(y_test, y_pred)

        fitness.append(accuracy)

    return fitness

def fit_xgboost(pop, X_train, y_train, X_test, y_test):
    fitness = []
    for i in range(len(pop)):
        columns = np.unique(pop[i])
        
        X_train_ind = X_train.iloc[:, columns]
        xgb_classifier = xgb.XGBClassifier()
        xgb_classifier.fit(X_train_ind, y_train)

        y_pred = xgb_classifier.predict(X_test.iloc[:, columns])

        accuracy = accuracy_score(y_test, y_pred)

        fitness.append(accuracy)

    return fitness

def select_mating_pool(pop, fitness, num_parents):
    # Selecting the best individuals in the current generation as parents for producing the offspring of the next generation.
    parents = np.empty((num_parents, pop.shape[1]))
    for parent_num in range(num_parents):
        max_fitness_idx = np.where(fitness == np.max(fitness))
        max_fitness_idx = max_fitness_idx[0][0]
        parents[parent_num, :] = pop[max_fitness_idx, :]
        fitness[max_fitness_idx] = -1
    return parents

def crossover(parents, offspring_size):
    offspring = np.empty(offspring_size)

    for k in range(offspring_size[0]):
        # Always perform one-point crossover.
        crossover_point = np.random.randint(1, parents.shape[1])
        parent1_idx = k % parents.shape[0]
        parent2_idx = (k + 1) % parents.shape[0]
        offspring[k, :crossover_point] = parents[parent1_idx, :crossover_point]
        offspring[k, crossover_point:] = parents[parent2_idx, crossover_point:]

    return offspring

def crossover_2(parents, offspring_size, method='two_point'):
    offspring = np.empty(offspring_size)

    for k in range(offspring_size[0]):
        if method == 'two_point':
            # Two-point crossover
            crossover_points = np.sort(np.random.choice(parents.shape[1], 2, replace=False))
            offspring[k, :crossover_points[0]] = parents[k % parents.shape[0], :crossover_points[0]]
            offspring[k, crossover_points[0]:crossover_points[1]] = parents[(k + 1) % parents.shape[0], crossover_points[0]:crossover_points[1]]
            offspring[k, crossover_points[1]:] = parents[k % parents.shape[0], crossover_points[1]:]
        elif method == 'uniform':
            # Uniform crossover
            mask = np.random.randint(0, 2, size=parents.shape[1], dtype=bool)
            offspring[k, mask] = parents[k % parents.shape[0], mask]
            offspring[k, ~mask] = parents[(k + 1) % parents.shape[0], ~mask]
        else:
            raise ValueError("Invalid crossover method. Supported methods: 'two_point', 'uniform'")

    return offspring

def mutation(offspring_crossover):
    # Mutation changes a single gene in each offspring randomly.
    for idx in range(offspring_crossover.shape[0]):
        # The random value to be added to the gene.
        random_value = np.random.choice([-1, 0, 1], 1)
        offspring_crossover[idx, 4] = offspring_crossover[idx, 4] + random_value
    return offspring_crossover

def enhanced_mutation(offspring_crossover, mutation_rate=0.1):
    num_genes_to_mutate = int(offspring_crossover[0].size * mutation_rate)

    for idx in range(offspring_crossover.shape[0]):
        # Select random indices to mutate
        mutation_indices = np.random.choice(offspring_crossover.shape[1], num_genes_to_mutate, replace=False)

        # Generate random values for mutation
        random_values = np.random.choice([-1, 0, 1], num_genes_to_mutate)

        # Apply mutation to selected indices
        offspring_crossover[idx, mutation_indices] += random_values

    return offspring_crossover

In [None]:
col_len = 28

sol_per_pop = 32
num_parents_mating = 8

pop_size = (sol_per_pop,col_len)

max_value = 783 
std_dev = 150
mean = max_value / 2

new_population = np.random.normal(mean, std_dev, pop_size).astype(int)

new_population[new_population > max_value] = max_value
new_population[new_population < 0] = 0

print(new_population)

In [72]:
y=y.astype('int64')

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train = X_train[:10000]
X_test = X_test[:3000]
y_train = y_train[:10000]
y_test = y_test[:3000]

In [None]:
num_generations = 20
for generation in range(num_generations):
    # Measing the fitness of each chromosome in the population.
    fitness = cal_pop_fitness(new_population, X_train,y_train,X_test,y_test)

    print(fitness)

    # Selecting the best parents in the population for mating.
    parents = select_mating_pool(new_population, fitness, 
                                      num_parents_mating)
    
    # print(parents)

    # Generating next generation using crossover.
    offspring_crossover = crossover_2(parents,
                                       offspring_size=(pop_size[0]-parents.shape[0], col_len))


    # Adding some variations to the offsrping using mutation.
    offspring_mutation = enhanced_mutation(offspring_crossover)

    # print(offspring_mutation)

    # Creating the new population based on the parents and offspring.
    new_population[0:parents.shape[0], :] = parents
    new_population[parents.shape[0]:, :] = offspring_mutation  # should be offspring_mutation

    # The best result in the current iteration.
    print("Generation : ", generation)
    # print(offspring_crossover)
    print()

In [77]:
# Getting the best solution after iterating finishing all generations.
#At first, the fitness is calculated for each solution in the final generation.
fitness = cal_pop_fitness(new_population, X_train, y_train, X_test,y_test)
# Then return the index of that solution corresponding to the best fitness.
best_match_idx = np.where(fitness == np.max(fitness))

# print("Best solution : ", new_population[best_match_idx, :])
print("Best solution fitness : ", fitness[best_match_idx[0][0]])

Best solution fitness :  0.8013333333333333


In [78]:
print("Best solution : ", new_population[best_match_idx, :])

Best solution :  [[[644 484 244 211 131 296 453 391 518 350 213 334 362 401 354 432 514
   460 155 539 427 424 317   1  46 342 347 517]]]


In [60]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

rf_classifier.fit(X_train, y_train)

y_pred = rf_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9667619047619047


In [79]:
feat = new_population[best_match_idx, :]

In [80]:
feat[0][0]

array([644, 484, 244, 211, 131, 296, 453, 391, 518, 350, 213, 334, 362,
       401, 354, 432, 514, 460, 155, 539, 427, 424, 317,   1,  46, 342,
       347, 517])

In [81]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

rf_classifier.fit(X_train.iloc[:,feat[0][0]], y_train)

y_pred = rf_classifier.predict(X_test.iloc[:,feat[0][0]])
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8984285714285715


In [10]:
from sklearn.metrics import classification_report

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
new_population = [239 459 392 521 604 550 241 371 490 298 488 350 576 348 149 346 427 465
 408 475 547  48 184 365 714 526 297 264]

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

rf_classifier.fit(X_train.iloc[:,new_population], y_train)

y_pred = rf_classifier.predict(X_test.iloc[:,new_population])
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.9228571428571428


In [11]:
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.96      0.97      0.96      2058
           1       0.97      0.98      0.97      2364
           2       0.91      0.92      0.92      2133
           3       0.91      0.90      0.91      2176
           4       0.93      0.92      0.92      1936
           5       0.90      0.90      0.90      1915
           6       0.94      0.96      0.95      2088
           7       0.93      0.93      0.93      2248
           8       0.89      0.86      0.87      1992
           9       0.88      0.90      0.89      2090

    accuracy                           0.92     21000
   macro avg       0.92      0.92      0.92     21000
weighted avg       0.92      0.92      0.92     21000

