In [1]:
import numpy as np
import math
import multiprocessing as mp

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from collections import namedtuple

from PIL import Image
import os
import os.path
import errno
import codecs
import copy

torch.manual_seed(0)
np.random.seed(0)
print(torch.cuda.device_count())
print(torch.cuda.is_available())

1
True


# 超參數

In [2]:
Args = namedtuple('Args', ['batch_size', 'test_batch_size', 'epochs', 'lr', 'cuda', 'seed', 'log_interval'])
args = Args(batch_size=1000, test_batch_size=1000, epochs=30, lr=0.001, cuda=True, seed=0, log_interval=10)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# 計算排名

In [3]:
def compute_ranks(x):
    assert x.ndim == 1
    ranks = np.empty(len(x),dtype=int)
    ranks[x.argsort()] = np.arange(len(x))
    return ranks

In [4]:
def compute_centered_ranks(x):
    y = compute_ranks(x.ravel()).reshape(x.shape).astype(np.float32)
    y = (y / (x.size - 1)) - 0.5
    return y

# CMAES

In [6]:
class CMAES:
  '''CMA-ES wrapper.'''
  def __init__(self, num_params,      # number of model parameters
               sigma_init=0.10,       # initial standard deviation
               popsize=255):          # population size

    self.num_params = num_params
    self.sigma_init = sigma_init
    self.popsize = popsize

    self.solutions = None

    import cma
    self.es = cma.CMAEvolutionStrategy( self.num_params * [0],
                                        self.sigma_init,
                                        {'popsize': self.popsize})

  def rms_stdev(self):
    sigma = self.es.result[6]
    return np.mean(np.sqrt(sigma*sigma))

  def ask(self):
    '''returns a list of parameters'''
    self.solutions = np.array(self.es.ask())
    return self.solutions

  def tell(self, reward_table_result):
    reward_table = reward_table_result
    self.es.tell(self.solutions, (-reward_table).tolist()) # convert minimizer to maximizer.

  def done(self):
    return self.es.stop()

  def current_param(self):
    return self.es.result[5] # mean solution, presumably better with noise
  
  def best_param(self):
    return self.es.result[0] # best evaluated solution

  def result(self): # return best params so far, along with historically best reward, curr reward, sigma
    r = self.es.result
    return (r[0], -r[1], -r[1], r[6])

# data loader

In [7]:
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}

train_loader = torch.utils.data.DataLoader(
  datasets.MNIST('MNIST_data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
  batch_size=args.batch_size, shuffle=True, **kwargs)

valid_loader = train_loader

test_loader = torch.utils.data.DataLoader(
  datasets.MNIST('MNIST_data', train=False, transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])),
  batch_size=args.batch_size, shuffle=True, **kwargs)

# Net

In [8]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.num_filter1 = 8
    self.num_filter2 = 16
    self.num_padding = 2
    # input is 28x28
    # padding=2 for same padding
    self.conv1 = nn.Conv2d(1, self.num_filter1, 5, padding=self.num_padding)
    # feature map size is 14*14 by pooling
    # padding=2 for same padding
    self.conv2 = nn.Conv2d(self.num_filter1, self.num_filter2, 5, padding=self.num_padding)
    # feature map size is 7*7 by pooling
    self.fc = nn.Linear(self.num_filter2*7*7, 10)

  def forward(self, x):
    x = F.max_pool2d(F.relu(self.conv1(x)), 2)
    x = F.max_pool2d(F.relu(self.conv2(x)), 2)
    x = x.view(-1, self.num_filter2*7*7)   # reshape Variable
    x = self.fc(x)
    return F.log_softmax(x)

In [9]:
model = Net()
if args.cuda:
    model.cuda()
orig_model = copy.deepcopy(model)

# get init params

In [10]:
orig_params = []
model_shapes = []
for param in orig_model.parameters():
    p = param.data.cpu().numpy()
    model_shapes.append(p.shape)
    orig_params.append(p.flatten())
orig_params_flat = np.concatenate(orig_params)
NPARAMS = len(orig_params_flat)
print(NPARAMS)

11274


# how to update model?

In [11]:
def update_model(flat_param, model, model_shapes):
    idx = 0
    i = 0
    for param in model.parameters():
        # get new weight and bias
        delta = np.product(model_shapes[i])
        block = flat_param[idx:idx+delta]
        block = np.reshape(block, model_shapes[i])
        i += 1
        idx += delta
        # to numpy
        block_data = torch.from_numpy(block).float()
        # if use gpu
        if args.cuda:
            block_data = block_data.cuda()
        # update net param weight and bias
        param.data = block_data

# evaluate

In [12]:
def evaluate(model, test_loader, print_mode=True, return_loss=False):
  model.eval()
  test_loss = 0
  correct = 0
  for data, target in test_loader:
    if args.cuda:
      data, target = data.cuda(), target.cuda()
    data, target = Variable(data, volatile=True), Variable(target)
    output = model(data)
    test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
    pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
    correct += pred.eq(target.data.view_as(pred)).cpu().sum()

  test_loss /= len(test_loader.dataset)
  acc = correct / len(test_loader.dataset)
  
  if print_mode:
    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
      test_loss, correct, len(test_loader.dataset),
      100. * acc))
  
  if return_loss:
    return test_loss
  return acc

# 實例化es

In [13]:
es = CMAES(NPARAMS, sigma_init=0.01, popsize=100)

(50_w,100)-aCMA-ES (mu_w=27.0,w_1=8%) in dimension 11274 (seed=601538, Tue Apr 21 14:35:22 2020)


# train loop

In [14]:
best_valid_acc = 0
training_log = []
#訓練幾個紀元
for epoch in range(1, 10*args.epochs + 1):
    #不需要梯度
    model.eval()
    for batch_idx, (data, target) in enumerate(train_loader):
        # 是否用gpu
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        # 取得一批種群
        solutions = es.ask()
        # 對種群中每一個"個體"計算"適應度"
        reward = np.zeros(es.popsize)
        for i in range(es.popsize):
            update_model(solutions[i], model, model_shapes)
            output = model(data)
            loss = F.nll_loss(output, target)
            reward[i] = - loss.data.item()
        # 紀錄最好的適應度
        best_raw_reward = reward.max()
        # 把適應度回饋給es
        es.tell(reward)
        # 取得結果
        result = es.result()
        # 打印訊息
        if (batch_idx % 5 == 0):
            print(epoch, batch_idx, best_raw_reward, result[0].mean(), result[3].mean())
    # 這個紀元目前的解
    curr_solution = es.current_param()
    # 更新模型
    update_model(curr_solution, model, model_shapes)
    # 計算驗證acc,紀錄acc以及打印acc訊息
    valid_acc = evaluate(model, valid_loader, print_mode=False)
    training_log.append([epoch, valid_acc])
    print('valid_acc', valid_acc * 100.)
    #保存最佳模型
    if valid_acc >= best_valid_acc:
        best_valid_acc = valid_acc
        best_model = copy.deepcopy(model)
        print('best valid_acc', best_valid_acc * 100.)



1 0 -2.301568031311035 -4.754631806246488e-05 0.009976557735581865
1 5 -2.301074504852295 2.4398802958459033e-05 0.009867787074910378
1 10 -2.301438331604004 -4.8103104200144176e-05 0.009769219508705865
1 15 -2.2995400428771973 2.439823549686826e-05 0.009677859968893235
1 20 -2.295708179473877 0.00035632789988553494 0.009592588631868983
1 25 -2.288591146469116 0.0003725913163342493 0.009512587287574484
1 30 -2.2796945571899414 0.00035234794825480766 0.009437569407531265
1 35 -2.260024309158325 0.0006354077905544203 0.009367069180659061
1 40 -2.241558074951172 0.0006041833226479468 0.009300776780669087
1 45 -2.2058212757110596 0.0007244268465375859 0.009238260413126302
1 50 -2.159390449523926 0.0007802636155208575 0.009179490160622129
1 55 -2.1354832649230957 0.0008253560444887162 0.009123973416203381


  


IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number