In [1]:
from src.models import *
from src.gfo import GFOProblem, SOCallback, blocker, build_rand_blocks, get_model_params, set_model_state

# Train on MNIST
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm

import numpy as np
import pandas as pd
from sklearn.metrics import (
            f1_score,
        )
import matplotlib.pyplot as plt

In [2]:
# Load MNIST
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)
trainset = torchvision.datasets.MNIST(
    root="./data", train=True, download=True, transform=transform
)
testset = torchvision.datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)
train_loader = DataLoader(trainset, batch_size=128, shuffle=True)
test_loader = DataLoader(testset, batch_size=10000, shuffle=False)

In [3]:
def test(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    f1 = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target.view_as(pred)).sum().item()
            f1 += f1_score(target.view_as(pred).cpu().numpy(), pred.cpu().numpy(), average='macro')


    return total_loss / len(test_loader), correct / len(test_loader.dataset), f1 / len(test_loader)

In [5]:
from pymoo.algorithms.soo.nonconvex.cmaes import CMAES
from pymoo.optimize import minimize

# Parameter Setting
NP = 100
block_size = 1000
# Define model
model = MLP()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

init_params = get_model_params(model)
D = len(init_params)
print(f"Original dims: {D} D")

import os
import pickle 

codebook = {}
if os.path.exists(f'out/codebook_D{D}_blocksize{block_size}.pkl'):
    with open(f'out/codebook_D{D}_blocksize{block_size}.pkl', 'rb') as f:
        codebook = pickle.load(f)
else:
    codebook = build_rand_blocks(D, block_size=block_size)
    
    with open(f'out/codebook_D{D}_blocksize{block_size}.pkl', 'wb') as f:
        pickle.dump(codebook, f)

bD = len(codebook)
print(f"Blocked dims: {bD} D")
x0 = np.random.uniform(low=-2, high=2, size=(bD))
init_params = blocker(init_params, codebook)
# x0 = init_params.copy()
# init_pop = np.random.normal(loc=init_params, scale=0.1, size=(NP, bD))

# random_indices = np.random.choice(
#             np.arange(len(trainset)), size=1024, replace=False
#         )
# random_dataset = Subset(trainset, random_indices)
# data_loader = DataLoader(random_dataset, batch_size=128, shuffle=True)
data_loader = DataLoader(trainset, batch_size=2048, shuffle=True)
problem = GFOProblem(n_var=bD, model=model, dataset=trainset, test_loader=test_loader, train_loader=data_loader, 
                      set_model_state=set_model_state,
                      batch_size=1024, device=device, criterion="f1",
                      block=True, codebook=codebook, orig_dims=D)
out={"F": []}
problem._evaluate(np.array([x0, init_params]), out=out)
print(out)

algorithm = CMAES(
    x0=x0,
    sigma=0.01,
    tolconditioncov=0,
    tolfacupx=np.inf,
    tolupsigma=np.inf,
    tolfun=0,
    tolfunhist=0,
    tolstagnation=np.inf,
    tolx=0,
    # restarts=4, 
    # bipop=True,
)

csv_path = f"out/MLP_block_bs{block_size}_gfo_f1_2048data_cmaes_hist.csv"
plt_path = f"out/MLP_block_bs{block_size}_gfo_f1_2048data_cmaes_plt.pdf"
df = pd.DataFrame({
            'n_step': [0],
            'n_eval': [1],
            'f_best': [out["F"][0]],
            'f_avg': [out["F"][0]],
            'f_std': [0],
            'test_f1_best': problem.test_func(x0),
        })
df.to_csv(csv_path, index=False)

res = minimize(problem,
               algorithm,
               ('n_eval', 100000),
               callback=SOCallback(k_steps=100, csv_path=csv_path, plt_path=plt_path),
               seed=None,
               verbose=True)

print("Best solution found: \nX = %s\nF = %s" % (res.X, res.F))

Original dims: 50890 D
Blocked dims: 51 D
{'F': array([0.90241998, 0.98156028])}
n_gen  |  n_eval  |     f_avg     |     f_min     |     f_gap     |     sigma     | min_std  | max_std  |   axis  
     1 |        1 |  0.8991821259 |  0.8991821259 |  0.8991821259 |  0.0100000000 |  0.01000 |  0.01000 |  1.00005
     2 |       16 |  0.8986200203 |  0.8809312806 |  0.8809312806 |  0.0094863246 |  0.00946 |  0.00951 |  1.04032
     3 |       31 |  0.8975952079 |  0.8809312806 |  0.8809312806 |  0.0092352643 |  0.00921 |  0.00927 |  1.05215
     4 |       46 |  0.8979218324 |  0.8809312806 |  0.8809312806 |  0.0090705637 |  0.00903 |  0.00911 |  1.06247
     5 |       61 |  0.8948123241 |  0.8787479624 |  0.8787479624 |  0.0089578641 |  0.00891 |  0.00901 |  1.07347
     6 |       76 |  0.8915386856 |  0.8744770736 |  0.8744770736 |  0.0089690627 |  0.00890 |  0.00903 |  1.08208
     7 |       91 |  0.8833707869 |  0.8599639088 |  0.8599639088 |  0.0089822004 |  0.00891 |  0.00904 |  1.09496

KeyboardInterrupt: 