In [None]:
import csv
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
import numpy as np
from math import floor, log
from torch.distributions.multivariate_normal import MultivariateNormal
    


#Device set up
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 128

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Define Basic Block
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


# Define ResNet
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [None]:
# Initialize ResNet-18
ResNet18 = ResNet(BasicBlock, [2, 2, 2, 2], num_classes=10)
ResNet18.load_state_dict(torch.load('GD.pkl'))
ResNet18.linear.reset_parameters()
ResNet18.to(device)

loss_func = nn.CrossEntropyLoss()

# Define CMA-ES Class
class CMA:
    def __init__(self, N):
        self.N = N
        self.mean = torch.zeros(N)  
        self.cov = 0.1*torch.eye(N)     
        self.samples = 30
        self.learning_rate = 0.00001
        self.gen_count = 0
        self.best_params = None  
        self.best_fitness = float('-inf')

    def step(self, fitness_function, generation):
        # Initialize the MultivariateNormal distribution
        distribution = MultivariateNormal(self.mean, covariance_matrix=self.cov)

        # Sample particles
        particles = distribution.sample((self.samples,))
        
        # Move particles to GPU for fitness evaluation
        particles_gpu = particles
        fitness = torch.tensor([fitness_function(p) for p in particles_gpu])

        # Rank particles based on fitness
        ranked_indices = fitness.argsort(descending=False)
        number_selected = int(self.samples*(0.6-((generation/50)*(0.6-0.2))))
        selected_particles = particles[ranked_indices[:number_selected]]

        #Update best parameters
        max_fitness = fitness.max().item()
        if max_fitness > self.best_fitness:
            self.best_fitness = max_fitness
            self.best_params = particles[ranked_indices[0]]
        
        # Update mean and covariance
        self.mean = selected_particles.mean(dim=0)
        centered = selected_particles - self.mean
        new_cov = torch.matmul(centered.T, centered) / number_selected
        regularization_term = 1e-5 * torch.eye(new_cov.size(0)) # to stop singularity
        self.cov = (1 - self.learning_rate) * self.cov + self.learning_rate * (new_cov+regularization_term)

        return fitness.max().item(), fitness.mean().item()

# Fitness Function
def fitness_function(params):
    weights = params[:5120].view(10, 512)
    biases = params[5120:]

    ResNet18.linear.weight = torch.nn.Parameter(weights)
    ResNet18.linear.bias = torch.nn.Parameter(biases)
    ResNet18.to(device)

    total_loss = 0.0
    for x, y in trainloader:
        x=x.to(device)
        y=y.to(device)
        outputs = ResNet18(x)
        loss = loss_func(outputs, y)
        total_loss += loss.item()
    return total_loss / len(trainloader)  

# CMA-ES Optimization
def optimize_with_cma_es(N, generations):
    cma = CMA(N)
    best_fitness_values = []

    for generation in range(generations):
        best_fitness, mean_fitness = cma.step(fitness_function, generation)
        best_fitness_values.append(best_fitness)
        print(f"Generation {generation + 1}: Best Fitness = {best_fitness:.4f}")

        
    if cma.best_params is not None:
        best_weights = cma.best_params[:5120].view(10, 512)
        best_biases = cma.best_params[5120:]
        
        ResNet18.linear.weight = torch.nn.Parameter(best_weights)
        ResNet18.linear.bias = torch.nn.Parameter(best_biases)
        ResNet18.to(device)        
        torch.save(ResNet18.state_dict(), 'CMA.pkl') 
        
    plt.plot(range(generations), [f for f in best_fitness_values], label='Best Fitness')
    plt.xlabel('Generations')
    plt.ylabel('Loss')
    plt.title('CMA-ES Optimization')
    plt.grid()
    plt.legend()
    plt.show()
    
    plt.savefig('CMA_loss.pdf')


    # Save to a CSV file
    with open('CMA_fitness.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(best_fitness_values)

# Run Optimization
N = 5120 +10
optimize_with_cma_es(N, generations=50)



In [None]:
testNet = ResNet(BasicBlock, [2, 2, 2, 2])
testNet.load_state_dict(torch.load('CMA.pkl'))

total = 0
correct= 0

with torch.no_grad():
    for data in testloader:
        x, y = data
        # calculate outputs by running images through the network
        outputs = testNet(x)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()


accuracy = correct/total
print('Accurarcy:', accuracy)
