In [None]:
from src.models import *
from src.gfo import GFOProblem, SOCallback, blocker, build_rand_blocks, get_model_params, set_model_state

# Train on MNIST
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm

import numpy as np
import pandas as pd
from sklearn.metrics import (
            f1_score,
        )
import matplotlib.pyplot as plt

In [None]:
# Load MNIST
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)
trainset = torchvision.datasets.MNIST(
    root="./data", train=True, download=True, transform=transform
)
testset = torchvision.datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)
train_loader = DataLoader(trainset, batch_size=128, shuffle=True)
test_loader = DataLoader(testset, batch_size=10000, shuffle=False)

In [None]:
def test(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    f1 = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target.view_as(pred)).sum().item()
            f1 += f1_score(target.view_as(pred).cpu().numpy(), pred.cpu().numpy(), average='macro')


    return total_loss / len(test_loader), correct / len(test_loader.dataset), f1 / len(test_loader)

In [None]:
from cmaes import CMA

# Parameter Setting
NP = 100
block_size = 1000
# Define model
model = MLP()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

init_params = get_model_params(model)
D = len(init_params)
print(f"Original dims: {D} D")

import os
import pickle 

codebook = {}
if os.path.exists(f'out/codebook_D{D}_blocksize{block_size}.pkl'):
    with open(f'out/codebook_D{D}_blocksize{block_size}.pkl', 'rb') as f:
        codebook = pickle.load(f)
else:
    codebook = build_rand_blocks(D, block_size=block_size)
    
    with open(f'out/codebook_D{D}_blocksize{block_size}.pkl', 'wb') as f:
        pickle.dump(codebook, f)

bD = len(codebook)
print(f"Blocked dims: {bD} D")
x0 = np.random.uniform(low=-1, high=1, size=(bD))
init_params = blocker(init_params, codebook)
# x0 = init_params.copy()
# init_pop = np.random.normal(loc=init_params, scale=0.1, size=(NP, bD))

# random_indices = np.random.choice(
#             np.arange(len(trainset)), size=1024, replace=False
#         )
# random_dataset = Subset(trainset, random_indices)
# data_loader = DataLoader(random_dataset, batch_size=128, shuffle=True)
data_loader = DataLoader(trainset, batch_size=1024, shuffle=True)
problem = GFOProblem(n_var=bD, model=model, dataset=trainset, test_loader=test_loader, train_loader=data_loader, 
                      set_model_state=set_model_state,
                      batch_size=1024, device=device, criterion="f1",
                      block=True, codebook=codebook, orig_dims=D)
out={"F": []}
problem._evaluate(np.array([x0, init_params]), out=out)
print(out)

csv_path = f"out/MLP_block_bs{block_size}_gfo_f1_1024data_cmaesv2_5restart_hist.csv"
plt_path = f"out/MLP_block_bs{block_size}_gfo_f1_1024data_cmaesv2_5restart_plt.pdf"
df = pd.DataFrame({
            'n_step': [0],
            'n_eval': [1],
            'f_best': [out["F"][0]],
            'f_avg': [out["F"][0]],
            'f_std': [0],
            'test_f1_best': problem.test_func(x0),
        })
df.to_csv(csv_path, index=False)

callback = SOCallback(k_steps=100, csv_path=csv_path, plt_path=plt_path)
n_restarts = 5
adp_sigma = 0.1
for i_res in range(n_restarts):
    optimizer = CMA(mean=x0, sigma=adp_sigma)
    evals = 0
    steps = 0
    best_X, best_F = None, None
    while evals < 10000:
        steps += 1
        solutions=[]
        for pi in range(optimizer.population_size):
            x = optimizer.ask()
            fitness = problem.scipy_fitness_func(x)
            evals += 1
            solutions.append((x, fitness))

        solutions.sort(key=lambda s: s[1])
        pop_F = [s[1] for s in solutions]
        best_X, best_F = solutions[0][0], solutions[0][-1]
        print(f"step: {steps}, FE: {evals}, f_min: {best_F:.6f}, f_avg: {np.mean(pop_F):.6f}")

        optimizer.tell(solutions)
    
    x0 = best_X.copy()
    adp_sigma *= 0.1

    if i_res < n_restarts:
        print("Restart optimizer...")

print("Best solution found: \nX = %s\nF = %s" % (best_X, best_F))