In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd drive/MyDrive/Projects/GradFree-NeuralNet/

/content/drive/MyDrive/Projects/GradFree-NeuralNet


In [None]:
!pip install pymoo torcheval

In [7]:
from src.models import *
from src.gfo import GFOProblem, SOCallback, blocker, build_rand_blocks, get_model_params, set_model_state

# Train on MNIST
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm

import numpy as np
import pandas as pd
from sklearn.metrics import (
            f1_score,
        )
import matplotlib.pyplot as plt

In [8]:
# Load MNIST
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)
trainset = torchvision.datasets.MNIST(
    root="./data", train=True, download=True, transform=transform
)
testset = torchvision.datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)
train_loader = DataLoader(trainset, batch_size=128, shuffle=True)
test_loader = DataLoader(testset, batch_size=10000, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 16025544.55it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 508614.48it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 4393248.54it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3184642.05it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [9]:
def test(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    f1 = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target.view_as(pred)).sum().item()
            f1 += f1_score(target.view_as(pred).cpu().numpy(), pred.cpu().numpy(), average='macro')


    return total_loss / len(test_loader), correct / len(test_loader.dataset), f1 / len(test_loader)

In [None]:
from pymoo.algorithms.soo.nonconvex.cmaes import CMAES
from pymoo.optimize import minimize
import os
import pickle

torch.manual_seed(1)
np.random.seed(seed=1)
# Parameter Setting
NP = 100
block_size = 100
# Define model
model = LeNet(in_channels=1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

init_params = get_model_params(model)
D = len(init_params)
print(f"Original dims: {D} D")



codebook = {}
if os.path.exists(f'out/codebook_D{D}_blocksize{block_size}.pkl'):
    with open(f'out/codebook_D{D}_blocksize{block_size}.pkl', 'rb') as f:
        codebook = pickle.load(f)
else:
    codebook = build_rand_blocks(D, block_size=block_size)

    with open(f'out/codebook_D{D}_blocksize{block_size}.pkl', 'wb') as f:
        pickle.dump(codebook, f)

bD = len(codebook)
print(f"Blocked dims: {bD} D")
x0 = np.random.uniform(low=-5, high=5, size=(bD))
init_params = blocker(init_params, codebook)
# x0 = init_params.copy()
# init_pop = np.random.normal(loc=init_params, scale=0.1, size=(NP, bD))

# random_indices = np.random.choice(
#             np.arange(len(trainset)), size=1024, replace=False
#         )
# random_dataset = Subset(trainset, random_indices)
# data_loader = DataLoader(random_dataset, batch_size=128, shuffle=True)
data_loader = DataLoader(trainset, batch_size=128, shuffle=True)
problem = GFOProblem(n_var=bD, model=model, dataset=trainset, test_loader=test_loader, train_loader=data_loader,
                      set_model_state=set_model_state,
                      batch_size=256, device=device, criterion="f1",
                      block=True, codebook=codebook, orig_dims=D)
out={"F": []}
problem._evaluate(np.array([x0, init_params]), out=out)
print(out)

csv_path = f"out/LeNet_block_bs{block_size}_gfo_f1_cmaes_restart5_hist.csv"
plt_path = f"out/LeNet_block_bs{block_size}_gfo_f1_cmaes_restart5_plt.pdf"
df = pd.DataFrame({
            'n_step': [0],
            'n_eval': [1],
            'f_best': [out["F"][0]],
            'f_avg': [out["F"][0]],
            'f_std': [0],
            'test_f1_best': problem.test_func(x0),
        })
df.to_csv(csv_path, index=False)

batch_size = 128
restarts = 10
best_x0 = x0
sigma = 0.1
for i in range(restarts):
    # data_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
    problem = GFOProblem(n_var=bD, model=model, dataset=trainset, test_loader=test_loader, train_loader=None,
                          set_model_state=set_model_state, batch_size=batch_size, device=device, criterion="f1",
                          block=True, codebook=codebook, orig_dims=D)
    algorithm = CMAES(
        x0=best_x0,
        sigma=sigma,
    )

    res = minimize(problem,
                  algorithm,
                  ('n_eval', 10000),
                  callback=SOCallback(k_steps=100, csv_path=csv_path, plt_path=plt_path),
                  seed=1,
                  verbose=True)

    print("Restart optimizer #:", i+1)

    best_x0 = res.X.copy()
    sigma *= 0.5
    batch_size *= 2


print("Best solution found: \nX = %s\nF = %s" % (res.X, res.F))

Original dims: 61706 D
Blocked dims: 618 D
{'F': array([0.98253968, 0.98823529])}
n_gen  |  n_eval  |     f_avg     |     f_min     |     f_gap     | run  |   fpop   | n_pop |     sigma     | min_std  | max_std  |   axis  
     1 |        1 |  0.9770992380 |  0.9770992380 |  0.9770992380 |    1 |  0.97710 |    23 |  0.1000000000 |  0.10000 |  0.10000 |  1.00005
     2 |       24 |  0.9657621496 |  0.9093007594 |  0.9093007594 |    1 |  0.90930 |    23 |  0.0988621985 |  0.09886 |  0.09887 |  1.00005
     3 |       47 |  0.9621811511 |  0.9093007594 |  0.9093007594 |    1 |  0.91134 |    23 |  0.0978278656 |  0.09782 |  0.09784 |  1.00005
     4 |       70 |  0.9509802921 |  0.9069771990 |  0.9069771990 |    1 |  0.90698 |    23 |  0.0968710128 |  0.09686 |  0.09688 |  1.00005
     5 |       93 |  0.9492612824 |  0.8987414911 |  0.8987414911 |    1 |  0.89874 |    23 |  0.0959964482 |  0.09599 |  0.09601 |  1.00005
     6 |      116 |  0.9480148951 |  0.8574773967 |  0.8574773967 |    1

In [33]:
# Save the best solution model parameters state
best_X = res.X.copy()
if len(res.X) != D:
    best_X = problem.unblocker(best_X)
set_model_state(model, best_X)
torch.save(model.state_dict(), f"out/LeNet_MNIST_block_bs{block_size}_gfo_f1_cmaes_restart5_model.pth")

In [34]:
res.X.min()

-4.999990342805474

In [35]:
res.X.max()

4.99994931336378

In [36]:
criterion = torch.nn.CrossEntropyLoss()

In [37]:
test(model, test_loader=test_loader, criterion=criterion, device=device)

(3497541.75, 0.5265, 0.5261534598171034)