## Importing libraries and getting data ready

In [None]:
import torch
from torchvision import datasets
from torchvision.transforms import v2
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F

transform = v2.Compose([
    v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),
    v2.Normalize(mean= (0.1307,), std =(0.3081,)),
])

# mean and std are calculated for MNIST dataset

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=1)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=1)



In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cpu


## CNN Model

In [20]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()

    # conversion layers
    self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
    self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)

    # batch norm
    self.bn1 = nn.BatchNorm2d(32)
    self.bn2 = nn.BatchNorm2d(64)

    # maxpooling
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

    # fully conected layers
    self.fc1 = nn.Linear(64 * 7 * 7, 128)
    self.fc2 = nn.Linear(128, 10)

  def forward(self, x):
    # Conv -> BN -> ReLU -> Pool
    x = self.pool(F.relu(self.bn1(self.conv1(x))))
    x = self.pool(F.relu(self.bn2(self.conv2(x))))

    x = x.view(x.size(0), -1)
    x = F.relu(self.fc1(x))
    x = self.fc2(x)

    return x


## Evaluate




In [22]:
def evaluate(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total


## Functions for GA

In [None]:
import copy

def get_flat_params(model):
    params = []
    for param in model.parameters():
        params.append(param.data.view(-1))
    flat_params = torch.cat(params)
    return flat_params.clone()

def set_flat_params(model, flat_params):
    pointer = 0
    for param in model.parameters():
        num_params = param.numel()
        param.data.copy_(flat_params[pointer:pointer + num_params].view_as(param))
        pointer += num_params

def create_random_model(base_model_class, device):
    model = base_model_class().to(device)

    for param in model.parameters():
        param.data = torch.randn_like(param)

    return model


## Training

In [24]:
# Create population of K CNN models (model = CNN().to(device)) with random param, evaluate each of them and sort from the best to worst(fitness)
# Let the best Top_n stay (elitism), and create new population using crossover and mutation, and elitic top_n to new_population, evaluate ...(repeat for N generation)
# and the best model that was made in the last gen is evaluated on test_loader(test data).

In [27]:
import random

def fitness_function(model, data_loader):
    return evaluate(model, data_loader)

def selection(population, fitnesses, top_n=None):
    if top_n is None:
        top_n = len(population)
    sorted_indices = sorted(range(len(fitnesses)), key=lambda i: fitnesses[i], reverse=True)
    return [population[i] for i in sorted_indices[:top_n]]

def crossover(parent1, parent2):
    child = copy.deepcopy(parent1)
    flat_params1 = get_flat_params(parent1)
    flat_params2 = get_flat_params(parent2)
    crossover_point = random.randint(0, len(flat_params1))
    new_params = torch.cat((flat_params1[:crossover_point], flat_params2[crossover_point:]))
    set_flat_params(child, new_params)
    return child

def mutate(model, mutation_rate):
    mutated_model = copy.deepcopy(model)
    flat_params = get_flat_params(mutated_model)
    for i in range(len(flat_params)):
        if random.random() < mutation_rate:
            flat_params[i] += torch.randn(1).item()
    set_flat_params(mutated_model, flat_params)
    return mutated_model

population_size = 4
top_n = 2
mutation_rate = round(random.uniform(0.01, 1), 2)
number_of_generations = 2

population = [create_random_model(CNN, device) for _ in range(population_size)]
best_model = None
best_fitness = -float("inf")

for generation in range(number_of_generations):
    print(f"Generation {generation + 1}")
    fitnesses = [fitness_function(model, train_loader) for model in population]
    elite = selection(population, fitnesses, top_n)
    new_population = elite

    while len(new_population) < population_size:
        parent1, parent2 = random.sample(elite, 2)
        child = crossover(parent1, parent2)
        child = mutate(child, mutation_rate)
        new_population.append(child)

    population = new_population
    print("New population created")

    generation_best_fitness = max(fitnesses)
    if generation_best_fitness > best_fitness:
        best_fitness = generation_best_fitness
        best_model = elite[0]

    print(f"Best Fitness in Generation {generation + 1}: {generation_best_fitness}%")

final_test_accuracy = fitness_function(best_model, test_loader)
print(f"Final Test Accuracy: {final_test_accuracy}%")

Generation 1
Best Fitness: 16.293333333333333
New population created
Best Fitness in Generation 1: 16.293333333333333%
Generation 2
Best Fitness: 16.293333333333333
New population created
Best Fitness in Generation 2: 16.293333333333333%
Final Test Accuracy: 16.54%
