In [1]:
import os
import sys
import time
import glob
import json
import pytz
import numpy as np
import logging
import seaborn as sns
import argparse
import datetime
import networkx as nx
import matplotlib.pyplot as plt

import torch #基本モジュール
from torch.autograd import Variable #自動微分用
import torch.nn as nn #ネットワーク構築用
import torch.optim as optim #最適化関数
import torch.nn.functional as F #ネットワーク用の様々な関数
import torch.utils.data #データセット読み込み関連
import torchvision #画像関連
from torchvision import datasets, models, transforms #画像用データセット諸々
import torch.backends.cudnn as cudnn

In [2]:
SERVER = False

In [3]:
if SERVER:
  from tqdm import tqdm
  import shutil
  import graphviz

  import deap
  from deap import base, creator, tools
  import random

else:
  from google.colab import output
  from tqdm.notebook import tqdm
  import graphviz
  
  !pip install deap
  output.clear()
  
  import deap
  from deap import base, creator, tools
  import random

  
  from google.colab import drive
  drive.mount('/content/drive')

Mounted at /content/drive


# utils

## other

In [4]:
def argspace(*funcs, **kwds):
  def deco(f):
    def inner(**ikwds):
      for g in funcs:
        kwds.update(g())
      kwds.update(ikwds)
      return f(argparse.Namespace(**kwds))
    return inner
  return deco

In [5]:
def set_seed(seed):
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)

In [6]:
class Singleton(object):
  def __new__(cls, *args, **kargs):
    if not hasattr(cls, "_instance"):
      cls._instance = super(Singleton, cls).__new__(cls)
    return cls._instance

In [7]:
class Experiment(Singleton):
  def __init__(self):
    if not hasattr(self, 'func'):
      self.func = {}
  
  def _store(self, key, f, order):
    self.func.setdefault(key, {})
    self.func[key].update({f.__name__:(order, f)})

  def __call__(self, key):
    def f(*args, **kwds):
      if key not in self.func: return None

      funcs = sorted(self.func[key].values(), key=lambda x: x[0])
      return [g(*args, **kwds) for _, g in funcs]
    return f

  def event(*key, order=0):
    def d(f):
      for k in key:
        Experiment()._store(k, f, order)
      def inner(*args, **kwds):
        return f(*args, **kwds)
      return inner
    return d
  
  def reset():
    Experiment().func = {}

## metrics

In [8]:
def accuracy(output, target, topk=(1,)):
  maxk = max(topk)
  batch_size = target.size(0)

  _, pred = output.topk(maxk, 1, True, True)
  pred = pred.t()
  correct = pred.eq(target.view(1, -1).expand_as(pred))

  res = []
  for k in topk:
    correct_k = correct[:k].reshape(-1).float().sum(0)
    res.append(correct_k.mul_(100.0/batch_size))
  return res

In [9]:
class AvgrageMeter(object):

  def __init__(self):
    self.reset()

  def reset(self):
    self.avg = 0
    self.sum = 0
    self.cnt = 0

  def update(self, val, n=1):
    self.sum += val * n
    self.cnt += n
    self.avg = self.sum / self.cnt

In [10]:
class Metrics():
  def __init__(self, model, dataloader):
    self._run(model, dataloader)
    self.epsilon = 1e-7
  
  @torch.no_grad()
  def _run(self, model, dataloader):
    data, target = next(iter(dataloader))
    device =  next(model.parameters()).device
    print(device)
    num = target.max().long() + 1
    cm = torch.zeros(num, num).to(device)

    for i, (data, target) in enumerate(dataloader):
      data = data.to(device)
      target = target.to(device)
      outputs = model(data)
      _, preds = torch.max(outputs, 1)
      for t, p in zip(target.view(-1), preds.view(-1)):
        cm[t.long(), p.long()] += 1

    self.matrix = cm
    self.dim = num
    self.sum0 = self.matrix.sum(0)
    self.sum1 = self.matrix.sum(1)
    self.sum = self.matrix.sum()

  def confusion_matrix(self):
    return self.matrix

  def TP(self, index):
    return self.matrix[index][index]

  def FN(self, index):
    return self.sum1[index] - self.TP(index)

  def FP(self, index):
    return self.sum0[index] - self.TP(index)

  def TN(self, index):
    return self.sum - self.TP(index) - self.FN(index) - self.FP(index)

  def _sum(self, F):
    return sum(F(i) for i in range(self.dim))
    
  def _micro(self, F, G):
    return self._sum(F) / (self._sum(F) + self._sum(G) + self.epsilon)

  def _macro(self, F, G):
    return sum(F(i) / (F(i) + G(i) + self.epsilon) for i in range(self.dim)) / self.dim

  def _switch(self, F, G, micro):
    return (self._micro(F, G) if micro else self._macro(F, G))

  def accuracy(self, micro=True):
    return (self._sum(self.TP) / self.sum if micro else 
            (sum(self.TP(i) / self.sum1 for i in range(self.dim)) / self.dim).mean())

  def precision(self, micro=True):
    return self._switch(self.TP, self.FP, micro)

  def recall(self, micro=True):
    return self._switch(self.TP, self.FN, micro)

  def specificity(self, micro=True):
    return self._switch(self.TN, self.FP, micro)

  def f_measure(self, micro=True):
    p, r = self.precision(micro), self.recall(micro)
    return 2 * p * r / (p + r + self.epsilon)

  def print(self):
    print(self.confusion_matrix())
    print("accuracy ", self.accuracy(), self.accuracy(micro=False))
    print("precision ", self.precision(), self.precision(micro=False))
    print("recall ", self.recall(), self.recall(micro=False))
    print("specificity ", self.specificity(), self.specificity(micro=False))
    print("f_measure ", self.f_measure(), self.f_measure(micro=False))

In [11]:
def count_param(model : nn.Module):
  param = model.named_parameters()
  itr = [np.prod(v.size()) for name, v in param if "auxiliary" not in name]
  return np.sum(itr)

In [12]:
from typing import Union
from collections import OrderedDict
@torch.no_grad()
def square_error(m : Union[nn.Module, OrderedDict], n : Union[nn.Module, OrderedDict]):
  m = m.parameters() if isinstance(m, nn.Module) else m.values()
  n = n.parameters() if isinstance(n, nn.Module) else n.values()
  s = [torch.sum((x - y) * (x - y)) for x, y in zip(m, n)]
  return sum(s)

In [13]:
def init_module(target, source):
  with torch.no_grad():
    for bp, ap in zip(target.parameters(), source.parameters()):
      bp.add_(ap - bp)

## save

In [14]:
def path_with_time(path : str) -> str:
  dt_now = datetime.datetime.now(pytz.timezone('Asia/Tokyo'))
  return '{}-{}'.format(path, dt_now.strftime('%Y-%m-%d_%H-%M-%S'))

In [15]:
def create_exp_dir(path, scripts_to_save=None):
  if not os.path.exists(path):
    os.mkdir(path)
  print('Experiment dir : {}'.format(path))

  if scripts_to_save is not None:
    os.mkdir(os.path.join(path, 'scripts'))
    for script in scripts_to_save:
      dst_file = os.path.join(path, 'scripts', os.path.basename(script))
      shutil.copyfile(script, dst_file)

In [16]:
def init_logging(save_path):
  class Formatter(logging.Formatter):
      """override logging.Formatter to use an aware datetime object"""
      def converter(self, timestamp):
          dt = datetime.datetime.fromtimestamp(timestamp)
          tzinfo = pytz.timezone('Asia/Tokyo')
          return tzinfo.localize(dt)
          
      def formatTime(self, record, datefmt=None):
          dt = self.converter(record.created)
          if datefmt:
              s = dt.strftime(datefmt)
          else:
              try:
                  s = dt.isoformat(timespec='milliseconds')
              except TypeError:
                  s = dt.isoformat()
          return s

  log_format = '%(asctime)s %(message)s'
  logging.basicConfig(stream=sys.stdout, level=logging.INFO,
      format=log_format, datefmt='%m/%d %I:%M:%S %p')
  fh = logging.FileHandler(os.path.join(save_path, 'log.txt'))
  fh.setFormatter(Formatter(log_format))
  logging.getLogger().addHandler(fh)

In [17]:
def save_dir(dir : str, drivepath = './drive/My Drive/ml'):
  if SERVER: return
  if not dir: return

  import subprocess
  res = subprocess.run(["cp", "-r", "./" + dir, drivepath], stdout=subprocess.PIPE)
  sys.stdout.write(res.stdout)

In [18]:
class Store():
  def __init__(self, dir="result", name="log", fig=[]):
    self.dict = {}
    self.dir = dir
    self.name = name
    self.fig = fig
  
  def add(self, name, value):
    if not name in self.dict:
      self.dict[name] = []
    self.dict[name].append(value)
  
  def apply(self, name, func):
    if not name in self.dict:
      self.dict[name] = []
    return func(self.dict[name])

  def update(self, store : 'Store'):
    self.dict.update(store.dict)

  def save(self):
    self.save_log()
    for metrix, x, y in self.fig:
      self.save_fig(metrix, x, y)

  def save_log(self, name=None):
    name = name if name else self.name
    path = os.path.join(self.dir, name + ".txt")
    with open(path, mode='w') as f:
      f.write("%s" % self.dict)

  def save_fig(self, metrix, xlabel, ylabel, show=True):
    fig = plt.figure()

    if type(metrix) is str:
      times = len(self.dict[metrix])
      plt.plot(np.arange(times), self.dict[metrix])
    else :
      times = len(self.dict[metrix[0]])
      for m in metrix:
        plt.plot(np.arange(times), self.dict[m], label=m)
      metrix = "_".join(metrix)
    
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend()
    if show and not SERVER: plt.show()
    fig.savefig(os.path.join(self.dir, "%s_%d.png" % (metrix, times)))

  def __repr__(self):
    return "store in %s" % self.dict

In [19]:
def render_graph(graph, path):
  format = dict(
      format='png', 
      edge_attr=dict(fontsize='20', fontname="times"),
      node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
      engine='dot' # circo, dot, fdp, neato, osage, sfdp, twopi
  )

  dg = graphviz.Digraph(**format)

  dg.attr('node', fillcolor='dodgerblue4', fontcolor='white', fontsize='15') # coral, 
  for node in graph.nodes():
    attr = graph.nodes[node]
    label = attr['name'] if 'name' in attr else str(node)
    label += '\n(%s, %d, %d)' % (attr['channel'], 32 / attr['stride'], 32 / attr['stride'])
    dg.node(str(node), label=label)

  for (i, j) in graph.edges():
    attr = graph.edges[i, j]
    label = attr['module']
    label = ""
    style = 'bold' if attr['module'] == 'forward' else 'dashed'
    dg.edge(str(i), str(j), label=label, style=style)

  dg.render(path)
  return dg

In [20]:
def save_heatmap(data : torch.tensor, path, format='1.2f'):
  plt.figure()
  data = data.detach().cpu().clone().numpy()
  sns.heatmap(data, annot=True, fmt=format)
  plt.savefig(path)
  plt.close('all')

# dataset

In [21]:
class Cutout(object):
  def __init__(self, length):
    self.length = length

  def __call__(self, img):
    h, w = img.size(1), img.size(2)
    mask = np.ones((h, w), np.float32)
    y = np.random.randint(h)
    x = np.random.randint(w)

    y1 = np.clip(y - self.length // 2, 0, h)
    y2 = np.clip(y + self.length // 2, 0, h)
    x1 = np.clip(x - self.length // 2, 0, w)
    x2 = np.clip(x + self.length // 2, 0, w)

    mask[y1: y2, x1: x2] = 0.
    mask = torch.from_numpy(mask)
    mask = mask.expand_as(img)
    img *= mask
    return img

In [22]:
def load_dataset(train=2000, test=500, valid=0, cutout=False, cutout_length=16):
  #画像の変形処理
  transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
  ])

  if cutout:
    transform.transforms.append(Cutout(cutout_length))

  transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
  ])

  dataset = torchvision.datasets.CIFAR10
  kwargs = {"root" : "./data", "download" : True}

  #CIFAR-10のtrain, testsetのロード
  trainset = dataset(train=True, transform=transform, **kwargs)
  testset = dataset(train=False, transform=transform_test, **kwargs)
  
  trainset, validset, _ = torch.utils.data.random_split(trainset, [train, valid, 50000-train-valid])
  testset, _ = torch.utils.data.random_split(testset, [test, 10000-test])
  return argparse.Namespace(train=trainset, test=testset, valid=validset)

In [23]:
def load_dataloader(args):
  kwargs = {'num_workers': 2, 'pin_memory': True} if args.use_cuda else {}
  args.valid_size = args.valid_size if args.valid_size else 0
  sets = load_dataset(train=args.train_size, test=args.test_size, valid=args.valid_size, cutout=args.cutout, cutout_length=args.cutout_length)
  trainloader = torch.utils.data.DataLoader(sets.train, batch_size=args.batch_size, shuffle=True, **kwargs)
  validloader = torch.utils.data.DataLoader(sets.valid, batch_size=args.batch_size, shuffle=True, **kwargs) if args.valid_size else None
  testloader = torch.utils.data.DataLoader(sets.test, batch_size=args.batch_size, shuffle=False, **kwargs)
  return argparse.Namespace(train=trainloader, test=testloader, valid=validloader)

# model

## sampler

In [24]:
class ArchitectureSampler():
  def __call__(self, graph : nx.DiGraph, alpha : torch.Tensor) -> nx.DiGraph:
    return self.graph(graph, alpha)

In [25]:
class MaxSampler(ArchitectureSampler):
  def graph(self, graph, alpha):
    G = nx.DiGraph(graph)
    n = G.number_of_nodes()

    for j in range(1, n):
      edges = [(i, j) for i in G.predecessors(j)]
      alphas = [alpha[i, j].item() for i, j in edges]
      edge_num = round(sum(alphas))
      disable = sorted(zip(edges, alphas), key=lambda x: x[-1])[:-edge_num]
      G.remove_edges_from([i for i, _ in disable])

    return G

In [26]:
class EdgewiseSampler(ArchitectureSampler):
  def graph(self, graph, alpha):
    G = nx.DiGraph(graph)
    n = G.number_of_nodes()

    for j in range(1, n):
      edges = [(i, j) for i in G.predecessors(j)]
      alphas = [alpha[i, j].item() for i, j in edges]
      disable = [(e, a) for e, a in zip(edges, alphas) if round(a) < 1]
      G.remove_edges_from([i for i, _ in disable])

    return G

In [27]:
class ForwardSampler(ArchitectureSampler):
  def graph(self, graph, alpha):
    G = nx.DiGraph(graph)
    n = G.number_of_nodes()

    for j in range(1, n):
      edges = [(i, j) for i in G.predecessors(j) if not i + 1 == j]
      G.remove_edges_from(edges)

    return G

In [28]:
class RandomSampler(ArchitectureSampler):
  def __init__(self, shortcut_num=0):
    self.num = shortcut_num

  def graph(self, graph, alpha):
    G = nx.DiGraph(graph)

    edges = [(e, f) for e, f in G.edges() if not e + 1 == f]
    np.random.shuffle(edges)

    for e, f in edges[:len(edges)-self.num]:
      G.remove_edge(e, f)

    return G

In [29]:
class StrideCutSampler(ArchitectureSampler):
  def __init__(self, stride_max=8):
    assert stride_max >= 1
    self.stride = stride_max

  def graph(self, graph, alpha):
    G = nx.DiGraph(graph)
    n = G.number_of_nodes()

    for j in range(1, n):
      edges = [(i, j) for i in G.predecessors(j) if G.edges[i, j]['stride'] > self.stride]
      G.remove_edges_from(edges)

    return G

In [30]:
# args = {'gene':'VGG19', 'stride_max':2}
# model = load_model(dir="", **args)
# g = model.graph
# sampler = RandomSampler(10)
# h = model.sampled_graph(sampler)
# render_graph(h, 'graph')
# # nx.graph_edit_distance(g, h)

## component

In [31]:
cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

In [32]:
class FactorizedReduce(nn.Module):
  def __init__(self, channel_in, channel_out, stride, affine=True):
    super(FactorizedReduce, self).__init__()
    assert channel_out % stride == 0
    
    self.convs = nn.ModuleList([
      nn.Conv2d(channel_in, channel_out // stride, 1, stride=stride, padding=0, bias=False)
      for _ in range(stride)
    ])
    self.bn = nn.BatchNorm2d(channel_out, affine=affine)

  def forward(self, x):
    # strideの偶奇による情報ロスを防ぐ
    out = torch.cat([conv(x[:,:,i:,i:]) for i, conv in enumerate(self.convs)], dim=1)
    out = self.bn(out)
    return out

In [33]:
class Shortcut(nn.Module):
  def __init__(self, in_channel, out_channel, stride):
    super(Shortcut, self).__init__()
    self.f = self._shortcut(in_channel, out_channel, stride)

  def forward(self, x):
    return self.f(x)

  def _shortcut(self, channel_in, channel_out, stride):
    if stride > 1:
      return FactorizedReduce(channel_in, channel_out, stride)
    elif channel_in != channel_out:
      return nn.Conv2d(channel_in, channel_out, 
                       kernel_size=1, stride=stride, padding=0)
    else:
      return lambda x: x

In [34]:
def drop_path(x, drop_prob):
  if drop_prob > 0.:
    keep_prob = 1. - drop_prob
    b = torch.ones(x.size(0), 1, 1, 1, device=x.device) * keep_prob
    mask = Variable(torch.bernoulli(b))
    x = x / keep_prob * mask
  return x

In [35]:
# x = torch.randn(4, 3, 8, 8).to(torch.device('cuda'))
# keep_prob = 0.5
# torch.bernoulli(torch.ones(x.size(0), 1, 1, 1, device=x.device) * keep_prob)

In [36]:
class Block(nn.Module):
  def __init__(self, graph, index):
    super(Block, self).__init__()
    node = graph.nodes[index]
    edges = [(i, index, graph.edges[i, index]) for i in graph.predecessors(index)]

    self.index = index
    self.indices = [i for i, _, _ in edges]
    self.edges = nn.ModuleList([self._build_module(s) for i, j, s in edges])

    process = [nn.ReLU(inplace=True)]
    if node['pool']: process += [nn.MaxPool2d(kernel_size=2, stride=2)]
    self.post_process = nn.Sequential(*process)

  def _build_module(self, setting):
    module = setting['module']
    in_channel, out_channel = setting['channel']
    stride = setting['stride']
    if module == 'forward':
      return nn.Sequential(
          nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1),
          nn.BatchNorm2d(out_channel)
          )
    elif module == 'shortcut':
      return Shortcut(in_channel, out_channel, stride)
    else:
      raise ValueError("module name")

  def forward(self, inputs, alpha, drop_prob=0.):
    if self.training and drop_prob > 0.:
      # f = labmda x: drop_path(F(x), drop_prob)
      output = sum(alpha[i] * drop_path(F(inputs[i]), drop_prob) for i, F in zip(self.indices, self.edges))
    else:
      output = sum(alpha[i] * F(inputs[i]) for i, F in zip(self.indices, self.edges))
    return self.post_process(output)

## network

In [37]:
# 前提条件 : make graph, modules(Block, pool, ...)
# 拘束条件 : alpha sofmax
class Network(nn.Module):
  def __init__(self, gene, graph=None, preprocess : ArchitectureSampler=None):
    super(Network, self).__init__()
    self.gene = gene
    self.evaluate = bool(graph)
    self.graph = graph if graph else self._make_graph(gene)
    self.graph = preprocess(self.graph, None) if preprocess else self.graph

    self.blocks = nn.ModuleList(self._make_blocks(self.graph))
    self.pool = nn.AvgPool2d(kernel_size=1, stride=1)
    self.classifier = nn.Linear(512, 10)
    self.drop_path_prob = 0.

  def _make_graph(self, gene, color_channel=3):
    
    def _decode_gene(gene):
      ch, st = [], []
      for g in gene:
        if g == 'M':
          st[-1] *= 2
        else :
          ch += [g]
          st += [1]
      return ch, st

    def __pi(array):
      r = []
      s = 1
      for q in array:
        s *= q
        r += [s]
      return r

    channel, stride = _decode_gene(gene)
    channel = [color_channel] + channel
    stride = [1] + stride
    s_stride = __pi(stride)

    n = len(channel)
    nodes = [(i, {'channel':channel[i], 'stride':s_stride[i], 'pool':stride[i]>1}) for i in range(n)]
    nodes[0][-1].update({'name':'input'})
    edges = [(i, j, {}) for i in range(n) for j in range(n) if i < j and not (i == 0 and j > 1)]
    for (i, j, d) in edges:
      d.update({
        'module' : 'forward' if i + 1 == j else 'shortcut', 
        'channel' : (nodes[i][-1]['channel'], nodes[j][-1]['channel']),
        'stride' : int(nodes[j-1][-1]['stride'] / nodes[i][-1]['stride'])
      })

    G = nx.DiGraph()
    G.add_nodes_from(nodes)
    G.add_edges_from(edges)
    return G

  def _make_blocks(self, graph):
    return [Block(graph, i) for i in graph.nodes() if i > 0]


  def init_alpha(self, device):
    def _init_alpha(node_num, device, delta=1e-3):
      noise = delta * torch.randn(node_num, node_num, device=device)
      alpha = noise.clone().detach().requires_grad_(True)
      return [alpha]

    def _mask(node_num, device, name):
      mask = torch.zeros(node_num, node_num, device=device)

      for i, j in self.graph.edges():
        op = self.graph.edges[i, j]['module']
        if not op == name: continue

        mask[i, j] = 1

      return mask.t() > 0

    n = self.graph.number_of_nodes()
    self.alphas = _init_alpha(n, device)
    self.mask_s = _mask(n, device, 'shortcut')
    self.mask_f = _mask(n, device, 'forward')

    # self.alphas += [torch.ones(n, device=device, requires_grad=True)]
    # self.alphas += [torch.zeros(n, device=device, requires_grad=True)]

    return self

  @torch.no_grad()
  def export_alpha(self):
    alpha = self.alphas[0].tolist()
    return alpha
  
  @torch.no_grad()
  def import_alpha(self, alpha):
    self.alphas[0] = self.alphas[0].new_tensor(alpha).view_as(self.alphas[0])


  def normalized_alpha(self):
    alpha = torch.zeros_like(self.alphas[0])
    if self.evaluate:
      for i, j in self.graph.edges():
        alpha[j, i] = 1.0
    else:
      # i = 0
      # for raw, mask, b in zip(self.alphas[0], self.mask_s, self.normalized_beta()):
      #   buf = torch.zeros_like(alpha[i])
      #   buf[mask] = b * F.softmax(raw[mask], dim=0)
      #   alpha[i] = buf
      #   i += 1
      i = 0
      for raw, mask in zip(self.alphas[0], self.mask_s):
        buf = torch.zeros_like(alpha[i])
        buf[mask] = torch.nn.Hardsigmoid()(raw[mask])
        alpha[i] = buf
        i += 1
        
      alpha[self.mask_f] = 1.0
    return alpha

  # def normalized_beta(self):
  #   # f = lambda x : math.exp(x - 1) if x < 1 else math.log(x) + 1
  #   # return self.beta().apply_(f)
  #   x = self.beta()
  #   m = x>1
  #   beta = torch.zeros_like(x)
  #   beta[m] = torch.log(x[m]) + 1
  #   beta[~m] = torch.exp(x[~m] - 1)
  #   return beta

  # def beta(self):
  #   return self.alphas[1]

  @torch.no_grad()
  def matrix_alpha(self, normalize=True):
    return (self.normalized_alpha() if normalize else self.alphas[0]).t()


  def sampled_graph(self, sampler : ArchitectureSampler):
    return sampler(self.graph, self.matrix_alpha())


  def forward(self, x):
    assert self.evaluate or self.drop_path_prob <= 0.
    state = [x]
    alpha = self.normalized_alpha()
    # alpha = self.alphas[0]

    for block in self.blocks:
      x = block(state, alpha[block.index], self.drop_path_prob)
      state += [x]

    out = self.pool(x)
    out = out.view(out.size(0), -1)
    out = self.classifier(out)
    return out

## GA

### util

In [38]:
class FitnessMin(base.Fitness):
  weights = -1.0,

In [39]:
class Individual():
  def __init__(self, generator):
    self.fitness = FitnessMin()
    self.result = {}
    self._alpha : list = None
    Individual.generator = generator

  @property
  def alpha(self):
    if self._alpha is None:
      self._alpha = Individual.generator()
    return self._alpha

  @alpha.setter
  def alpha(self, alpha):
    self._alpha = alpha
    return self

In [40]:
class GA():
  def init(self):
    pass

  def mate(self):
    pass

  def mutate(self):
    pass

  def select(self):
    pass

  def evaluate(self):
    pass

In [41]:
class Executor():
  def run(self):
    pass

### SGA

In [42]:
class MyGA(GA):
  
  def __init__(self, args):
    self.exp, self.args = Experiment(), args
    self.toolbox = base.Toolbox()
    self.init_func()
    self.init_stats()


  def init_func(self):
    toolbox = self.toolbox
    def aaa():
      return Individual(generator=self._init)
    toolbox.register("population", tools.initRepeat, list, aaa)

    # ops
    toolbox.register("init", self.init)
    toolbox.register("mate", self._mate)
    toolbox.register("mutate", self._mutate)
    toolbox.register("select", tools.selTournament, tournsize=self.args.tournament)
    toolbox.register("evolve", self._evolve)
    toolbox.register("evaluate", self._evaluate)

    self.hall = tools.HallOfFame(1)


  def init_stats(self):
    stats = tools.Statistics(key=lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)
    self.stats = stats

    self.logbook = tools.Logbook()
    self.logbook.header = "gen", "avg", "std", "min", "max"


  """
  individual operations
  """

  # def _init(self):
  #   return super()._init()


  def _mate(self, ind1, ind2):
    for a1, a2 in zip(ind1.alpha, ind2.alpha):
      # tools.cxBlend(a1, a2, alpha = 0.5)
      tools.cxUniform(a1, a2, indpb = 0.5)
    return ind1, ind2


  def _mutate(self, ind):
    alpha = ind.alpha
    for i, a in enumerate(alpha):
      alpha[i], = tools.mutGaussian(a, mu=0, sigma=0.2, indpb=0.2)
    ind.alpha = alpha
    return ind,
  

  # def _evaluate(self, ind):
  #   return super()._evaluate(ind)


  # def _evolve(self, ind):
  #   super()._evolve(ind)


  """
  population operations
  """

  def init(self):
    return self.toolbox.population(self.args.population)


  def mate(self, offspring):
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
      if random.random() < self.args.crossover:
        self.toolbox.mate(child1, child2)


  def mutate(self, offspring):
    for mutant in offspring:
      if random.random() < self.args.mutation:
        self.toolbox.mutate(mutant)


  def select_elite(self, pop):
    elite = tools.selBest(pop, 1)
    elite = list(map(self.toolbox.clone, elite))
    return elite


  def select(self, pop):
    offspring = self.toolbox.select(pop, len(pop))
    offspring = list(map(self.toolbox.clone, offspring))
    return offspring


  def evolve(self, pop):
    self.exp('step_evolution_start')(args)
    for ind in pop:
      self._evolve(ind)
    self.exp('step_evolution_end')(args)


  def evaluate(self, pop):
    fitnesses = map(self.toolbox.evaluate, pop)
    for ind, fit in zip(pop, fitnesses):
      del ind.fitness.values
      ind.fitness.values = fit
    self.exp('step_evaluate_end')(args)


  def record(self, pop):
    record = self.stats.compile(pop)
    print(record)
    self.logbook.record(gen=self.args.nth_gen, **record)

In [43]:
# アルゴリズムを表現
class ExecuteGA(Executor):
  
  def __init__(self, ga, args):
    self.args = args
    self.ga = ga

  def run(self):
    ga = self.ga
    exp, args = ga.exp, ga.args = Experiment(), self.args

    args.pop = pop = ga.init()

    exp('evolution_start')(args)
    for g in range(args.generation):
      logging.info('generation %d', g)
      args.nth_gen = g
      exp('generation_start')(args)

      ga.evolve(pop)
      ga.evaluate(pop)
      ga.record(pop)
      elite = ga.select_elite(pop)
      offspring = ga.select(pop)[:-len(elite)]
      ga.mate(offspring)
      ga.mutate(offspring)
      offspring += elite
      pop[:] = offspring

      # self.hall.update(pop)
      exp('generation_end')(args)
    exp('evolution_end')(args)

    return pop

### TDGA

In [44]:
class TDGA(GA):
  
  def __init__(self, args):
    self.exp, self.args = Experiment(), args
    self.toolbox = base.Toolbox()
    self.init_func()
    self.init_stats()
    self.update()


  def init_func(self):
    toolbox = self.toolbox
    def aaa():
      return Individual(generator=self._init)
    toolbox.register("population", tools.initRepeat, list, aaa)

    # ops
    toolbox.register("mate", self._mate)
    toolbox.register("mutate", self._mutate)


  def init_stats(self):
    stats = tools.Statistics(key=lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)
    self.stats = stats

    self.args.logbook = self.args.logbook if self.args.resume else tools.Logbook()
    self.logbook = self.args.logbook
    self.logbook.header = "gen", "avg", "std", "min", "max"


  """
  individual operations
  """

  # def _init(self):
  #   return super()._init()


  def _mate(self, ind1, ind2):
    for a1, a2 in zip(ind1.alpha, ind2.alpha):
      # tools.cxBlend(a1, a2, alpha = 0.5)
      tools.cxUniform(a1, a2, indpb = 0.5)
    return ind1, ind2


  def _mutate(self, ind):
    alpha = ind.alpha
    for i, a in enumerate(alpha):
      alpha[i], = tools.mutGaussian(a, mu=0, sigma=0.2, indpb=0.2)
    ind.alpha = alpha
    return ind,
  

  # def _evaluate(self, ind):
  #   return super()._evaluate(ind)


  # def _evolve(self, ind):
  #   super()._evolve(ind)


  """
  population operations
  """

  def init(self):
    args = self.args

    def load_population():
      dir = os.path.join(self.args.dir, 'gen', "%d" % args.last_gen)
      paths = [os.path.join(dir, 'ind_%d.pth' % i) for i in range(args.population)]
      pop = [torch.load(path) for path in paths]
      return pop

    if self.args.resume:
      return load_population()

    return self.toolbox.population(self.args.population)


  def mate(self, offspring):
    for child1, child2 in zip(offspring[::2], offspring[1::2]):
      if random.random() < self.args.crossover:
        self.toolbox.mate(child1, child2)


  def mutate(self, offspring):
    for mutant in offspring:
      if random.random() < self.args.mutation:
        self.toolbox.mutate(mutant)


  def select_elite(self, pop):
    elite = tools.selBest(pop, 1)
    elite = list(map(self.toolbox.clone, elite))
    return elite


  def select(self, pop):
    pop = self.compress(pop)

    def closure(pop):
      E_sum, n = sum([ind.fitness.values[0] for ind in pop]), len(pop)
      alpha_sum = sum([torch.tensor(ind.alpha) for ind in pop])
      def compute_F(ind):
        # TODO : ??? wvalues, values
        # ignore H_all
        alpha = torch.tensor(ind.alpha)
        alpha_bar = (alpha_sum + alpha) / (n + 1)
        H_i = torch.pow(torch.mean(torch.pow(alpha - alpha_bar, 2)), 0.5)
        E_bar = (E_sum + ind.fitness.values[0]) / (n + 1)
        F = E_bar - H_i * self.temperature
        return F
      return compute_F

    selected = []
    for i in range(self.args.population):
      f = closure(selected)
      Fs = [f(ind) for ind in pop]
      idx = Fs.index(min(Fs))
      selected += [pop[idx]]

    self.update()

    return list(map(self.toolbox.clone, selected))

  def compress(self, pop):
    if self.args.is_compress:
      # TODO
      pass

    return pop

  def update(self):
    # TODO
    args = self.args
    t = args.nth_gen / args.generation
    self.temperature = pow(args.t_init, 1-t) * pow(args.t_fin, t)


  def evolve(self, pop):
    self.exp('step_evolution_start')(self.args)
    for ind in pop:
      self._evolve(ind)
    self.exp('step_evolution_end')(self.args)


  def evaluate(self, pop):
    fitnesses = map(self._evaluate, pop)
    for ind, fit in zip(pop, fitnesses):
      del ind.fitness.values
      ind.fitness.values = fit
    self.exp('step_evaluate_end')(self.args)


  def record(self, pop):
    record = self.stats.compile(pop)
    print(record)
    self.logbook.record(gen=self.args.nth_gen, **record)

In [45]:
# アルゴリズムを表現
class ExecuteTDGA(Executor):
  
  def __init__(self, ga, args):
    self.args = args
    self.ga = ga

  def run(self):
    ga = self.ga
    exp, args = ga.exp, ga.args = Experiment(), self.args

    args.pop = pop = ga.init()
    ga.evaluate(pop)

    exp('evolution_start')(args)
    for g in range(args.nth_gen, args.generation):
      logging.info('generation %d', g)
      args.nth_gen = g
      exp('generation_start')(args)

      elite = ga.select_elite(pop)
      
      ga.evolve(pop)
      ga.evaluate(pop)

      offspring = list(map(ga.toolbox.clone, pop)) # TODO

      ga.mate(offspring)
      gen = pop + offspring
      ga.mutate(gen)
      gen += elite

      # ga.evolve(gen)
      ga.evaluate(gen)
      pop[:] = ga.select(gen) # [:] inplace
      ga.record(pop)

      exp('generation_end')(args)
    exp('evolution_end')(args)

    return pop

## architecture

In [46]:
class Architect():
  def __init__(self, valid_loader, model, criterion, lr, device):
    self.valid_loader = valid_loader
    self.model = model
    self.criterion = criterion
    self.optimizer = optim.Adam(model.alphas, lr=lr, betas=(0.5, 0.999), weight_decay=1e-3)
    self.device = device
    self.train = True

  def step(self):
    if not self.valid_loader: return
    if not self.train: return
    
    data_v, target_v = next(iter(self.valid_loader))
    data_v, target_v = data_v.to(self.device), target_v.to(self.device)

    self.optimizer.zero_grad()
    output = self.model(data_v)
    loss = self.criterion(output, target_v)
    loss.backward()
    self.optimizer.step()

# learning

## events

In [47]:
@Experiment.event('setup')
def setup(args):
  if args.resume:
    args.save = args.dir
  else :
    args.save = path_with_time(args.save)
    create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

  init_logging(args.save)
  logging.info('kwargs %s' % args)
  
  args.start_epoch = 0

  # log init
  fig = [(["train_acc", "test_acc"], "epochs", "accuracy[%]"),
        (["train_loss", "test_loss"], "epochs", "loss")]
  store = Store(dir=args.save, name="store", fig=fig)
  args.store = store

  set_seed(args.seed)

In [48]:
@Experiment.event('setup')
def set_device(args):
  # cuda init
  args.use_cuda = torch.cuda.is_available()
  args.device = torch.device("cuda" if args.use_cuda else "cpu")

In [49]:
@Experiment.event('start')
def set_tdqm(args):
  args.bar = tqdm(total = args.epochs - args.start_epoch)

In [50]:
@Experiment.event('epoch_end')
def update_tdqm(args):
  args.bar.update()

In [51]:
@Experiment.event('epoch_start')
def logging_alpha(args):
  model = args.model
  if not model.evaluate:
    logging.info('raw %s', model.matrix_alpha(normalize=False))
    logging.info('alpha %s', model.matrix_alpha())
    logging.info('beta %s', model.beta())

In [52]:
@Experiment.event('train_end')
def train_end(args, data):
  train_acc, train_obj = data
  logging.info('train_acc %f', train_acc)
  args.store.add("train_loss", train_obj)
  args.store.add("train_acc", train_acc)

In [53]:
@Experiment.event('test_end')
def test_end(args, data):
  valid_acc, valid_obj = data
  logging.info('valid_acc %f', valid_acc)
  args.store.add("test_loss", valid_obj)
  args.store.add("test_acc", valid_acc)

In [54]:
@Experiment.event('checkpoint', 'end', order=1)
def save_checkpoint(args):
  args.store.save()
  save_dir(args.save)

In [55]:
@Experiment.event('checkpoint', 'end')
def save_model(args):
  state = {
    'model': args.model.state_dict(),
    'graph': args.model.graph,
    'alpha': args.model.alphas,
    'store': args.store,
    'epoch': args.epoch,
  }
  torch.save(state, os.path.join(args.save, 'checkpoint.pth'))

In [56]:
@Experiment.event('epoch_end')
def save_graph(args):
  path = os.path.join(args.save, 'graph')
  if not os.path.exists(path):
    os.mkdir(path)
  sampler = MaxSampler()
  graph = args.model.sampled_graph(sampler)
  torch.save(graph, os.path.join(path, 'graph_%d.pth' % args.epoch))
  render_graph(graph, os.path.join(path, 'graph_%d' % args.epoch))

In [57]:
@Experiment.event('epoch_end')
def save_alpha(args):
  path = os.path.join(args.save, 'alpha')
  if not os.path.exists(path):
    os.mkdir(path)
  save_heatmap(args.model.matrix_alpha(), os.path.join(path, 'alpha_%d.png' % args.epoch))

In [58]:
@Experiment.event('epoch_end')
def save_weight(args):
  if not args.save_weight: return

  path = os.path.join(args.save, 'model')
  if not os.path.exists(path):
    os.mkdir(path)
  torch.save(args.model.state_dict(), os.path.join(path, 'model_%d.pth' % args.epoch))

In [59]:
@Experiment.event('end')
def aggregate_data(args):
  store = args.store
  m = store.apply('test_acc', max)
  store.add('test_acc' + '_best', m)
  logging.info('best acc %s' % m)

## ga event

In [60]:
@Experiment.event('setup_ga')
def init_resume(args):
  if args.resume:
    args.last_gen = args.nth_gen
    args.nth_gen += 1
  else:
    args.nth_gen = 0

In [61]:
@Experiment.event('generation_end', 'evolution_end', order=1)
def save_dir2drive(args):
  if SERVER:
    return
    
  save_dir(args.save)

In [62]:
@Experiment.event('evolve_end')
def save_individual_train(args, result):
  args.individual.result['train'] = result

@Experiment.event('evaluate_end')
def save_individual_test(args, result):
  args.individual.result['test'] = result

In [63]:
@Experiment.event('generation_end')
def save_generation_log(args):
  path = os.path.join(args.save, 'gen', '%d' % args.nth_gen)
  os.makedirs(path, exist_ok=True)

  s = ["%d : %s" % (i, ind.result) for i, ind in enumerate(args.pop)]

  with open(os.path.join(path, 'gen.txt'), mode='w') as f:
    f.write("\n".join(s))

In [64]:
@Experiment.event('generation_end')
def save_individual(args):
  path = os.path.join(args.save, 'gen', '%d' % args.nth_gen)
  os.makedirs(path, exist_ok=True)

  for i, ind in enumerate(args.pop):
    args.model.import_alpha(ind.alpha)
    save_heatmap(args.model.matrix_alpha(), os.path.join(path, 'alpha_%d.png' % i))

    sampler = EdgewiseSampler()
    graph = args.model.sampled_graph(sampler)
    render_graph(graph, os.path.join(path, 'graph_%d' % i))

    edge = len([(e, f) for e, f in graph.edges() if not e + 1 == f])
    ind.result['edge'] = edge

    torch.save(ind, os.path.join(path, 'ind_%d.pth' % i))
    logging.info(ind.result)

In [65]:
@Experiment.event('generation_end')
def save_checkpoint_ga(args):
  state = {
    'model': args.model.state_dict(),
    'logbook': args.logbook,
    'nth_gen': args.nth_gen,
  }
  torch.save(state, os.path.join(args.save, 'checkpoint.pth'))

In [66]:
@Experiment.event('evolution_end')
def report_population(args):
  def export_result(dir, n_gen, n_pop):
    def load_gen(path):
      paths = [os.path.join(path, "ind_%d.pth" % i) for i in range(n_pop)]
      pop = [torch.load(p).result for p in paths]
      return pop

    dirs = lambda x: dir
    paths = [os.path.join('', dirs(i), 'gen', '%d' % i) for i in range(n_gen)]
    results = [load_gen(path) for path in paths]
    print(results)
    torch.save(results, os.path.join(dirs(0), 'result.pth'))
  
  export_result(args.save, args.generation, args.population)

## learning

In [67]:
@argspace(retain_graph=True)
def train(args):
  objs = AvgrageMeter()
  top1 = AvgrageMeter()
  top5 = AvgrageMeter()
  args.model.train()

  for step, (input, target) in enumerate(args.dataset):
    n = input.size(0)

    input = Variable(input, requires_grad=False).to(args.device)
    target = Variable(target, requires_grad=False).to(args.device)

    args.architect.step()

    args.optimizer.zero_grad()
    logits = args.model(input)
    loss = args.criterion(logits, target)

    loss.backward(retain_graph=args.retain_graph)
    # nn.utils.clip_grad_norm(args.model.parameters(), args.grad_clip)
    args.optimizer.step()

    prec1, prec5 = accuracy(logits, target, topk=(1, 5))
    objs.update(loss.item(), n)
    top1.update(prec1.item(), n)
    top5.update(prec5.item(), n)

    if step % args.report_freq == 0:
      logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

  return top1.avg, objs.avg

In [68]:
@argspace(retain_graph=True)
def train_architect(args):
  objs = AvgrageMeter()
  top1 = AvgrageMeter()
  top5 = AvgrageMeter()
  args.model.train()

  for step, (input, target) in enumerate(args.dataset):
    n = input.size(0)

    input = Variable(input, requires_grad=False).to(args.device)
    target = Variable(target, requires_grad=False).to(args.device)

    args.architect.step()

    logits = args.model(input)
    loss = args.criterion(logits, target)
    prec1, prec5 = accuracy(logits, target, topk=(1, 5))
    objs.update(loss.item(), n)
    top1.update(prec1.item(), n)
    top5.update(prec5.item(), n)

    if step % args.report_freq == 0:
      logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

  return top1.avg, objs.avg

In [69]:
@argspace()
def infer(args):
  objs = AvgrageMeter()
  top1 = AvgrageMeter()
  top5 = AvgrageMeter()
  args.model.eval()

  for step, (input, target) in enumerate(args.dataset):
    input = Variable(input, requires_grad=False).to(args.device)
    target = Variable(target, requires_grad=False).to(args.device)

    logits = args.model(input)
    loss = args.criterion(logits, target)

    prec1, prec5 = accuracy(logits, target, topk=(1, 5))
    n = input.size(0)
    objs.update(loss.item(), n)
    top1.update(prec1.item(), n)
    top5.update(prec5.item(), n)

    if step % args.report_freq == 0:
      logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

  return top1.avg, objs.avg

## setting

In [70]:
def default_setting():
  default_args = argparse.Namespace(
      gene='VGG19', momentum=0.9, report=100, checkpoint=10,
      stride_max=2, batch_size=64, dir="", graph=None, lr_alpha=0.0,
      save_weight=False, seed=41, scheduler=None, pretrain=False,
      cutout=False, cutout_length=0, drop_path_prob=0.0)
  return vars(default_args)

## experiment

In [71]:
@argspace(default_setting)
def load_model(args) -> Network:
  
  # init
  set_device(args)
  device = args.device

  # model setup
  gene = cfg[args.gene]
  sampler = StrideCutSampler(args.stride_max) if args.stride_max > 0 else None
  model = Network(gene, graph=args.graph, preprocess=sampler).to(device).init_alpha(device)

  # resume
  if args.dir:
    state = torch.load(os.path.join(args.dir, 'checkpoint.pth'))
    model.load_state_dict(state['model'])
    model.alphas = state['alpha']

  return model

In [72]:
@argspace(default_setting)
def main(args):

  # init
  exp = Experiment()
  exp('setup')(args)
  store = args.store
  device = args.device


  # model setup
  gene = cfg[args.gene]
  logging.info('gene %s', gene)
  sampler = StrideCutSampler(args.stride_max) if args.stride_max > 0 else None
  graph = args.graph if args.graph else None
  model = Network(gene, graph=graph, preprocess=sampler).to(device).init_alpha(device)
  logging.info('model param %s', count_param(model))

  # # load cuda
  # if device == 'cuda':
  #   model = torch.nn.DataParallel(model)
  #   cudnn.benchmark = True

  if args.pretrain:
    assert args.gene == 'VGG19'
    vgg19 = models.vgg19(pretrained=True).to(device)
    pretrain = [f for f in vgg19.features if type(f) == nn.Conv2d]
    pretrain += [f for f in vgg19.features if type(f) == nn.BatchNorm2d]
    target = [e for b in model.blocks for e in b.edges if not type(e) == Shortcut]
    target = [t[0] for t in target] + [t[1] for t in target]
    for t, s in zip(target, pretrain):
      init_module(t, s)
    del vgg19
    logging.info('Transfar learning (%s)...' % (args.gene))

  # resume
  if args.dir:
    state = torch.load(os.path.join(args.dir, 'checkpoint.pth'))
    model.load_state_dict(state['model'])
    model.alphas = state['alpha']
    args.start_epoch = state['epoch']
    store.update(state['store'])
    logging.info('Resuming from epoch %d in %s' % (args.start_epoch, args.dir))

  args.model = model

  # env
  loader = load_dataloader(args)
  optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4)
  criterion = nn.CrossEntropyLoss()
  architect = Architect(loader.valid, model, criterion, args.lr_alpha, device)
  if args.scheduler:
    scheduler = args.scheduler(optimizer, **args.scheduler_args)

  exp('start')(args)

  for epoch in range(args.start_epoch + 1, args.epochs + 1):
    logging.info('epoch %d', epoch)
    args.epoch = epoch
    
    exp('epoch_start')(args)
          
    if not model.evaluate:
      architect.train = epoch >= args.weight_epoch

    if model.evaluate:
      model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

    # training
    train_result = train(dataset=loader.train, model=model, 
                         criterion=criterion, optimizer=optimizer, 
                         architect=architect, 
                         device=device, report_freq=args.report)
    exp('train_end')(args, train_result)

    # validation
    valid_result = infer(dataset=loader.test, model=model, 
                                 criterion=criterion,
                                 device=device, report_freq=args.report)
    exp('test_end')(args, valid_result)

    if args.scheduler:
      scheduler.step()

    exp('epoch_end')(args)

    if epoch % args.checkpoint == 0:
      exp('checkpoint')(args)

  exp('end')(args)

  return model

In [73]:
@argspace(**default_setting())
def evaluate(args):
  model = load_model(dir=args.dir, gene=args.gene, stride_max=args.stride_max)
  
  sampler : ArchitectureSampler = args.sampler
  graph = model.sampled_graph(sampler)

  args.origin_dir = args.dir
  args.dir = ""
  args.graph = graph
  main(**vars(args))

In [74]:
@argspace(**default_setting())
def evaluate_random(args):
  model = load_model(dir=args.dir, gene=args.gene, stride_max=args.stride_max)
  
  sampler : ArchitectureSampler = args.sampler
  graph = model.sampled_graph(sampler)

  n = len([(e, f) for e, f in graph.edges() if not e + 1 == f])
  sampler : ArchitectureSampler = RandomSampler(n)
  graph = model.sampled_graph(sampler)
  
  args.origin_dir = args.dir
  args.dir = ""
  args.graph = graph
  main(**vars(args))

## experiment ga

In [75]:
@argspace(default_setting)
def main_ga(args):

  # init
  exp = Experiment()
  exp('setup')(args)
  store = args.store
  device = args.device


  # model setup
  gene = cfg[args.gene]
  logging.info('gene %s', gene)
  sampler = StrideCutSampler(args.stride_max) if args.stride_max > 0 else None
  graph = args.graph if args.graph else None
  model = Network(gene, graph=graph, preprocess=sampler).to(device).init_alpha(device)
  logging.info('model param %s', count_param(model))

  if args.pretrain and not args.resume:
    assert args.gene == 'VGG19'
    vgg19 = models.vgg19(pretrained=True).to(device)
    pretrain = [f for f in vgg19.features if type(f) == nn.Conv2d]
    pretrain += [f for f in vgg19.features if type(f) == nn.BatchNorm2d]
    target = [e for b in model.blocks for e in b.edges if not type(e) == Shortcut]
    target = [t[0] for t in target] + [t[1] for t in target]
    for t, s in zip(target, pretrain):
      init_module(t, s)
    del vgg19
    logging.info('Transfar learning (%s)...' % (args.gene))

  # resume
  if args.resume:
    state = torch.load(os.path.join(args.dir, 'checkpoint.pth'))
    model.load_state_dict(state['model'])
    args.nth_gen = state['nth_gen']
    args.logbook = state['logbook']
    logging.info('Resuming from gen %d in %s' % (args.nth_gen, args.dir))

  if args.trained_weight:
    state = torch.load(os.path.join(args.dir, 'checkpoint.pth'))
    model.load_state_dict(state['model'])
    args.trained_alpha = model.normalized_alpha()
    logging.info('Using pretrained data in %s' % args.dir)

  args.model = model

  # env
  loader = load_dataloader(args)
  optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4)
  criterion = nn.CrossEntropyLoss()
  architect = Architect(loader.valid, model, criterion, args.lr_alpha, device)
  if args.scheduler:
    scheduler = args.scheduler(optimizer, **args.scheduler_args)


  def init_individual(self):
    size = len(model.alphas[0])

    if args.trained_alpha is not None:
      alpha = ((torch.bernoulli(args.trained_alpha) * 2. - 1.) * 3.).tolist()
    else:
      alpha = torch.randn(size, size).tolist()
    
    logging.info("init ind %s", alpha)
    return alpha

  @Experiment.event('step_evolution_start')
  def train_weight(args):
    if args.fix_weight:
      return

    def mean_alpha(pop):
      alphas = torch.tensor([ind.alpha for ind in pop])
      return torch.mean(alphas, 0).tolist()

    alpha_m = mean_alpha(args.pop)
    model.import_alpha(alpha_m)

    # training
    architect.train = False
    train_result = train(dataset=loader.train, model=model, 
                         criterion=criterion, optimizer=optimizer, 
                         architect=architect, 
                         device=device, report_freq=args.report)
    logging.info('train weight %s', train_result)


  def evolve(self, individual):
    if args.no_evolve:
      return

    args.individual = individual
    model.import_alpha(individual.alpha)

    # training
    architect.train = True
    train_result = train_architect(dataset=loader.valid, model=model, 
                         criterion=criterion, optimizer=optimizer, 
                         architect=architect, 
                         device=device, report_freq=args.report)

    exp('evolve_end')(args, train_result)
    logging.info('train %s', train_result)

    individual.alpha = model.export_alpha()


  def evaluate(self, individual):
    args.individual = individual
    model.import_alpha(individual.alpha)

    # validation
    valid_result = infer(dataset=loader.test, model=model, 
                                 criterion=criterion,
                                 device=device, report_freq=args.report)
    exp('evaluate_end')(args, valid_result)
    logging.info('valid %s', valid_result)

    acc, loss = valid_result
    return loss,

  
  exp('setup_ga')(args)

  GA._init = init_individual
  GA._evaluate = evaluate
  GA._evolve = evolve

  ga = TDGA(args)
  exec = ExecuteTDGA(ga, args)
  _ = exec.run()
  logging.info('book\n%s', ga.logbook)

  return model

In [76]:
@argspace(default_setting)
def evaluate_ga(args):
  model = load_model(gene=args.gene, stride_max=args.stride_max)
  
  ind = torch.load(args.dir)
  model.import_alpha(ind.alpha)
  sampler : ArchitectureSampler = args.sampler
  graph = model.sampled_graph(sampler)

  args.origin_dir, args.dir = args.dir, ""
  args.graph = graph
  main(**vars(args))

# main

In [111]:
# ind = torch.load("ind_0.pth")
# alpha = torch.tensor(ind.alpha)
# alpha = nn.Hardsigmoid()(alpha).t()
def mean_alpha(dir):
  paths = [os.path.join(dir, "ind_%d.pth" % i) for i in range(10)]
  pop = [torch.load(path) for path in paths]
  alpha = torch.tensor([ind.alpha for ind in pop]).mean(dim=0)
  # alpha = torch.tensor(ind.alpha)
  alpha = nn.Hardsigmoid()(alpha).t()

In [109]:
def render_weight_graph(graph, path):
  format = dict(
      format='png', 
      edge_attr=dict(fontsize='20', fontname="times"),
      node_attr=dict(style='filled', shape='rect', align='center', fontsize='20', height='0.5', width='0.5', penwidth='2', fontname="times"),
      engine='dot' # circo, dot, fdp, neato, osage, sfdp, twopi
  )

  dg = graphviz.Digraph(**format)

  dg.attr('node', fillcolor='dodgerblue4', fontcolor='white', fontsize='15') # coral, 
  for node in graph.nodes():
    attr = graph.nodes[node]
    label = attr['name'] if 'name' in attr else str(node)
    label += '\n(%s, %d, %d)' % (attr['channel'], 32 / attr['stride'], 32 / attr['stride'])
    dg.node(str(node), label=label)

  for (i, j) in graph.edges():
    attr = graph.edges[i, j]
    label = attr['module']
    label = ""
    style = 'bold' if attr['module'] == 'forward' else 'dashed'
    color = "0.000 0.000 %f" % attr['weight']
    dg.edge(str(i), str(j), label=label, style=style, color=color)

  dg.render(path)
  return dg

In [None]:
graph = load_model().graph
dirs = ['gen/%d' % i for i in range(150)]
for dir in dirs:
  alpha = mean_alpha(dir)
  for (i, j) in graph.edges():
    graph.edges[i, j]['weight'] = alpha[i, j]
    render_weight_graph(graph, os.path.join(dir, 'weight_alpha'))

In [None]:
graph = load_model().graph
# graph = sampler(graph, alpha)
for (i, j) in graph.edges():
  graph.edges[i, j]['weight'] = alpha[i, j]

render_weight_graph(graph, "test")


In [None]:
sampler : ArchitectureSampler = EdgewiseSampler()
graph = load_model().graph
graph = sampler(graph, alpha)
render_weight_graph(graph, "test")


In [None]:
# if __name__ == '__main__':
#   path = 'exp_tdga-2021-01-07_10-39-12/gen/59/'
#   for i in [0]:
#     evaluate_ga(save="exp_tdga_eval_%d" % i, lr=0.0090131, epochs=150,
#         scheduler=optim.lr_scheduler.StepLR,
#         scheduler_args={'gamma':0.23440, 'step_size':100},
#         train_size=50000, valid_size=0, test_size=10000, sampler=EdgewiseSampler(),
#         dir=os.path.join("drive/My Drive/ml/", path, 'ind_%d.pth' % i))

In [None]:

#
# export_result('exp_tdga_full-2021-01-13_16-37-53', 150, 10)

In [None]:
# if __name__ == '__main__':
#   main_ga(save="exp_tdga", lr=0.001, lr_alpha=0.001,
#       population=10, generation=20, crossover=0.5, mutation=0.2,
#       t_init=10, t_fin=2, is_compress=False, resume=False,
#       train_size=12500, valid_size=5000, test_size=1000, seed=41, pretrain=True)
if __name__ == '__main__':
  main_ga(save="exp_tdga_full", lr=0.001, lr_alpha=0.001,
      population=10, generation=150, crossover=0.5, mutation=0.2,
      t_init=1, t_fin=.001, is_compress=False, resume=True,
      dir='drive/My Drive/ml/exp_tdga_full-2021-01-12_14-12-27',
      train_size=25000, valid_size=25000, test_size=2000, seed=41, pretrain=True)

01/13 02:48:49 AM kwargs Namespace(batch_size=64, checkpoint=10, crossover=0.5, cutout=False, cutout_length=0, dir='drive/My Drive/ml/exp_tdga_full-2021-01-12_14-12-27', drop_path_prob=0.0, gene='VGG19', generation=150, graph=None, is_compress=False, lr=0.001, lr_alpha=0.001, momentum=0.9, mutation=0.2, population=10, pretrain=True, report=100, resume=True, save='drive/My Drive/ml/exp_tdga_full-2021-01-12_14-12-27', save_weight=False, scheduler=None, seed=41, stride_max=2, t_fin=0.001, t_init=1, test_size=2000, train_size=25000, valid_size=25000)
01/13 02:48:49 AM gene [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
01/13 02:49:00 AM model param 26298058
01/13 02:49:01 AM Resuming from gen 20 in drive/My Drive/ml/exp_tdga_full-2021-01-12_14-12-27
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
01/13 02:49:17 AM valid 000 5.498128e-01 81.250000 100.000000
01/13 02:49:18 AM valid (79.55, 0.588439420223236)
01/13 02:49:18 AM valid 000 4.593287e-01 85.937500 100.000000
01/13 02:49:19 AM valid (78.15, 0.643814992904663)
01/13 02:49:19 AM valid 000 6.018072e-01 82.812500 100.000000
01/13 02:49:21 AM valid (79.55, 0.6378285965919495)
01/13 02:49:21 AM valid 000 4.636073e-01 85.937500 100.000000
01/13 02:49:22 AM valid (79.25, 0.7085670685768127)
01/13 02:49:22 AM valid 000 6.939766e-01 78.125000 98.437500
01/13 02:49:23 AM valid (70.15, 0.8316194438934326)
01/13 02:49:23 AM valid 000 6.018072e-01 82.812500 100.000000
01/13 02:49:25 AM valid (79.55, 0.6378285965919495)
01/13 02:49:25 AM valid 000 9.443201e-01 67.187500 96.875000
01/13 02:49:26 AM valid (63.8, 1.0121531419754028)
01/13 02:49:26 AM valid 000 6.434132e-01 78.125000 100.000000
01/13 02:49:27 AM valid (71.8, 0.8647606544494629)
01/13

Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)


01/13 03:57:13 AM train 100 5.232006e-01 82.116337 99.226485
01/13 03:57:49 AM train 200 5.177517e-01 82.050684 99.230410
01/13 03:58:24 AM train 300 5.218837e-01 81.955980 99.257683
01/13 03:58:57 AM train (82.096, 0.519761318435669)
01/13 03:58:57 AM train 000 6.965043e-01 71.875000 100.000000
01/13 03:59:33 AM train 100 5.151303e-01 82.812500 99.350248
01/13 04:00:09 AM train 200 5.177707e-01 82.454913 99.323694
01/13 04:00:45 AM train 300 5.217404e-01 82.225914 99.299211
01/13 04:01:17 AM train (82.124, 0.5231976710319519)
01/13 04:01:17 AM train 000 6.173596e-01 82.812500 100.000000
01/13 04:01:53 AM train 100 5.428325e-01 82.348391 99.211015
01/13 04:02:29 AM train 200 5.579205e-01 82.027363 99.082711
01/13 04:03:05 AM train 300 5.620766e-01 81.779485 99.138289
01/13 04:03:37 AM train (81.752, 0.5674258223152161)
01/13 04:03:37 AM valid 000 4.925942e-01 81.250000 100.000000
01/13 04:03:38 AM valid (80.2, 0.5812793273925781)
01/13 04:03:39 AM valid 000 5.042697e-01 84.375000 100.0

Traceback (most recent call last):


01/13 04:53:03 AM train 200 5.543991e-01 80.736940 98.981654
01/13 04:53:39 AM train 300 5.536442e-01 80.678987 99.029277
01/13 04:54:11 AM train (80.652, 0.5522137214660644)
01/13 04:54:12 AM valid 000 9.886706e-01 76.562500 98.437500
01/13 04:54:13 AM valid (69.25, 1.009804090499878)
01/13 04:54:13 AM valid 000 9.348612e-01 75.000000 96.875000
01/13 04:54:14 AM valid (72.95, 0.8670927181243896)
01/13 04:54:14 AM valid 000 7.257244e-01 81.250000 96.875000
01/13 04:54:16 AM valid (80.9, 0.5815771417617798)
01/13 04:54:16 AM valid 000 8.103407e-01 82.812500 96.875000
01/13 04:54:17 AM valid (77.0, 0.7974324946403504)
01/13 04:54:17 AM valid 000 6.615923e-01 76.562500 96.875000
01/13 04:54:18 AM valid (74.4, 0.7247013378143311)
01/13 04:54:19 AM valid 000 7.604197e-01 82.812500 98.437500
01/13 04:54:20 AM valid (79.9, 0.6760255768299103)
01/13 04:54:20 AM valid 000 9.348612e-01 75.000000 96.875000
01/13 04:54:21 AM valid (72.95, 0.8670927181243896)
01/13 04:54:21 AM valid 000 6.615923e-0

Traceback (most recent call last):


01/13 05:47:37 AM train 100 4.679781e-01 84.235767 99.597772
01/13 05:48:14 AM train 200 4.732456e-01 84.017413 99.471393
01/13 05:48:51 AM train 300 4.739039e-01 84.146595 99.449751
01/13 05:49:25 AM train (84.1, 0.4774639751338959)
01/13 05:49:25 AM train 000 2.556946e-01 89.062500 100.000000
01/13 05:50:02 AM train 100 4.384076e-01 84.622525 99.489480
01/13 05:50:39 AM train 200 4.536101e-01 84.188433 99.385883
01/13 05:51:16 AM train 300 4.646488e-01 83.938953 99.366694
01/13 05:51:50 AM train (83.808, 0.46874182121276853)
01/13 05:51:50 AM train 000 5.320794e-01 84.375000 98.437500
01/13 05:52:28 AM train 100 5.010089e-01 84.452351 99.195545
01/13 05:53:05 AM train 200 5.033841e-01 84.203980 99.222637
01/13 05:53:41 AM train 300 4.975890e-01 84.375000 99.273256
01/13 05:54:15 AM train (84.416, 0.49600757637023923)
01/13 05:54:15 AM train 000 5.922793e-01 78.125000 100.000000
01/13 05:54:52 AM train 100 4.539143e-01 84.746287 99.257426
01/13 05:55:28 AM train 200 4.578329e-01 84.52

Traceback (most recent call last):


01/13 06:29:06 AM train 300 4.845296e-01 83.367940 99.309593
01/13 06:29:39 AM train (83.484, 0.48421576629638674)
01/13 06:29:40 AM train 000 7.243722e-01 76.562500 98.437500
01/13 06:30:16 AM train 100 4.982679e-01 84.436881 99.319307
01/13 06:30:53 AM train 200 4.895939e-01 84.958022 99.354789
01/13 06:31:30 AM train 300 5.062233e-01 84.587832 99.314784
01/13 06:32:03 AM train (84.508, 0.504502810497284)
01/13 06:32:03 AM train 000 3.839611e-01 87.500000 100.000000
01/13 06:32:40 AM train 100 4.627232e-01 84.220297 99.241955
01/13 06:33:17 AM train 200 4.544177e-01 84.569341 99.385883
01/13 06:33:54 AM train 300 4.538350e-01 84.769518 99.423796
01/13 06:34:28 AM train (84.704, 0.4542347362232208)
01/13 06:34:28 AM train 000 5.468155e-01 84.375000 96.875000
01/13 06:35:05 AM train 100 4.800764e-01 83.833540 99.365718
01/13 06:35:42 AM train 200 4.829477e-01 83.885261 99.339241
01/13 06:36:19 AM train 300 4.836914e-01 84.094684 99.288829
01/13 06:36:52 AM train (84.12, 0.4808617241764

Traceback (most recent call last):


01/13 06:41:05 AM train (82.82, 0.5218439598083496)
01/13 06:41:06 AM train 000 7.904791e-01 78.125000 100.000000
01/13 06:41:43 AM train 100 4.732709e-01 84.065594 99.381188
01/13 06:42:20 AM train 200 4.609959e-01 84.390547 99.401430
01/13 06:42:57 AM train 300 4.586578e-01 84.426910 99.449751
01/13 06:43:31 AM train (84.284, 0.4615122832298279)
01/13 06:43:31 AM train 000 7.518960e-01 85.937500 96.875000
01/13 06:44:09 AM train 100 5.514104e-01 83.555074 99.133663
01/13 06:44:45 AM train 200 5.595629e-01 83.411070 99.121580
01/13 06:45:23 AM train 300 5.516924e-01 83.279693 99.185008
01/13 06:45:56 AM train (83.264, 0.5518317601776123)
01/13 06:45:57 AM train 000 6.016299e-01 78.125000 100.000000
01/13 06:46:34 AM train 100 4.314088e-01 85.133045 99.303837
01/13 06:47:11 AM train 200 4.348365e-01 85.245647 99.261505
01/13 06:47:48 AM train 300 4.442457e-01 84.946013 99.345930
01/13 06:48:22 AM train (84.852, 0.4504073877048492)
01/13 06:48:22 AM train 000 5.091388e-01 81.250000 100.

Traceback (most recent call last):


01/13 08:12:18 AM train (84.2, 0.47756906480789185)
01/13 08:12:18 AM train 000 5.559778e-01 81.250000 98.437500
01/13 08:12:54 AM train 100 4.175809e-01 85.860149 99.458540
01/13 08:13:29 AM train 200 4.180253e-01 85.875311 99.401430
01/13 08:14:06 AM train 300 4.174178e-01 85.890781 99.434178


Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


01/13 08:14:37 AM train (85.912, 0.4179445619010925)
01/13 08:14:38 AM train 000 5.969364e-01 85.937500 100.000000
01/13 08:15:14 AM train 100 4.907691e-01 84.607054 99.381188
01/13 08:15:50 AM train 200 5.071573e-01 84.483831 99.362562
01/13 08:16:26 AM train 300 5.093921e-01 84.421719 99.377076
01/13 08:16:58 AM train (84.344, 0.5137639960670471)
01/13 08:16:58 AM train 000 4.138526e-01 85.937500 100.000000
01/13 08:17:34 AM train 100 4.679581e-01 84.699876 99.427599
01/13 08:18:10 AM train 200 4.734503e-01 84.895833 99.424751
01/13 08:18:46 AM train 300 4.741800e-01 85.018688 99.444560
01/13 08:19:18 AM train (84.86, 0.47919205300331114)
01/13 08:19:18 AM valid 000 5.913628e-01 85.937500 100.000000
01/13 08:19:20 AM valid (78.4, 0.6835518028736115)
01/13 08:19:20 AM valid 000 4.778045e-01 85.937500 100.000000
01/13 08:19:21 AM valid (82.5, 0.5129253630638122)
01/13 08:19:21 AM valid 000 8.561212e-01 79.687500 98.437500
01/13 08:19:22 AM valid (79.15, 0.7435566968917847)
01/13 08:19:

Traceback (most recent call last):


01/13 08:54:28 AM train 200 5.456812e-01 83.807525 99.362562
01/13 08:55:05 AM train 300 5.413993e-01 83.871470 99.371885
01/13 08:55:39 AM train (83.752, 0.5446479674530029)
01/13 08:55:39 AM train 000 4.017259e-01 85.937500 100.000000
01/13 08:56:16 AM train 100 4.644505e-01 84.746287 99.350248
01/13 08:56:54 AM train 200 4.691671e-01 84.678172 99.339241
01/13 08:57:31 AM train 300 4.755565e-01 84.733181 99.283638
01/13 08:58:04 AM train (84.704, 0.47585748905181885)
01/13 08:58:05 AM train 000 4.868431e-01 82.812500 100.000000
01/13 08:58:42 AM train 100 5.171994e-01 83.090965 99.164604
01/13 08:59:20 AM train 200 5.323123e-01 82.750311 99.160448
01/13 08:59:56 AM train 300 5.314144e-01 82.672342 99.200581
01/13 09:00:29 AM train (82.516, 0.5365411080551148)
01/13 09:00:30 AM train 000 7.427200e-01 81.250000 96.875000
01/13 09:01:07 AM train 100 5.308262e-01 83.818069 99.396658
01/13 09:01:44 AM train 200 5.306713e-01 83.939677 99.362562
01/13 09:02:21 AM train 300 5.210592e-01 84.1

Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


01/13 09:22:25 AM train 100 4.727298e-01 84.514233 99.272896
01/13 09:23:02 AM train 200 4.764739e-01 84.530473 99.245958
01/13 09:23:40 AM train 300 4.804514e-01 84.525540 99.252492
01/13 09:24:14 AM train (84.624, 0.4763033587646484)
01/13 09:24:14 AM train 000 2.525537e-01 90.625000 100.000000
01/13 09:24:51 AM train 100 4.626233e-01 84.637995 99.427599
01/13 09:25:29 AM train 200 4.861171e-01 83.799751 99.378109
01/13 09:26:06 AM train 300 4.926597e-01 83.710548 99.304402
01/13 09:26:40 AM train (83.852, 0.48411329521179197)
01/13 09:26:40 AM train 000 2.626526e-01 93.750000 100.000000
01/13 09:27:17 AM train 100 4.411544e-01 85.442450 99.427599
01/13 09:27:55 AM train 200 4.257981e-01 85.890858 99.502488
01/13 09:28:32 AM train 300 4.312065e-01 85.631229 99.460133
01/13 09:29:06 AM train (85.472, 0.43312832328796386)
01/13 09:29:06 AM train 000 6.517979e-01 85.937500 100.000000
01/13 09:29:44 AM train 100 4.995135e-01 84.622525 99.334777
01/13 09:30:21 AM train 200 5.002567e-01 84

Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
Traceback (most recent call last):


01/13 10:06:13 AM train 300 4.592460e-01 85.215947 99.434178


Traceback (most recent call last):


01/13 10:06:47 AM train (85.268, 0.4614064992904663)
01/13 10:06:48 AM train 000 3.720199e-01 85.937500 100.000000


Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)


01/13 10:07:25 AM train 100 4.881125e-01 83.833540 99.257426
01/13 10:08:02 AM train 200 4.911280e-01 83.791978 99.308147
01/13 10:08:39 AM train 300 4.858210e-01 83.907807 99.330357
01/13 10:09:13 AM train (83.896, 0.4816096831417084)
01/13 10:09:14 AM train 000 5.454791e-01 82.812500 98.437500
01/13 10:09:52 AM train 100 5.368467e-01 83.864480 99.381188
01/13 10:10:30 AM train 200 5.361009e-01 83.939677 99.284826
01/13 10:11:08 AM train 300 5.346739e-01 83.944145 99.309593
01/13 10:11:43 AM train (83.972, 0.5340405015182496)
01/13 10:11:44 AM train 000 3.969461e-01 82.812500 100.000000
01/13 10:12:22 AM train 100 5.195330e-01 83.632426 99.427599
01/13 10:13:00 AM train 200 4.937865e-01 84.056281 99.448072
01/13 10:13:38 AM train 300 4.837972e-01 84.426910 99.454942
01/13 10:14:12 AM train (84.528, 0.48562553943634035)
01/13 10:14:12 AM train 000 3.724873e-01 90.625000 96.875000
01/13 10:14:51 AM train 100 4.345993e-01 85.272277 99.458540
01/13 10:15:28 AM train 200 4.323849e-01 85.26

Traceback (most recent call last):
Traceback (most recent call last):


01/13 10:24:45 AM train 100 4.150015e-01 85.829208 99.334777
01/13 10:25:24 AM train 200 4.162567e-01 85.618781 99.409204
01/13 10:26:03 AM train 300 4.245168e-01 85.584510 99.403032
01/13 10:26:37 AM train (85.664, 0.4220455585861206)
01/13 10:26:37 AM train 000 7.618191e-01 79.687500 98.437500
01/13 10:27:15 AM train 100 4.603805e-01 85.256807 99.474010
01/13 10:27:53 AM train 200 4.797983e-01 84.818097 99.463619
01/13 10:28:31 AM train 300 4.809690e-01 85.075789 99.460133
01/13 10:29:05 AM train (85.076, 0.4802834702110291)
01/13 10:29:05 AM valid 000 4.119166e-01 84.375000 100.000000
01/13 10:29:06 AM valid (82.95, 0.5472345314025879)
01/13 10:29:06 AM valid 000 7.823705e-01 71.875000 100.000000
01/13 10:29:07 AM valid (74.05, 0.7819113397598266)
01/13 10:29:08 AM valid 000 5.209454e-01 82.812500 96.875000
01/13 10:29:09 AM valid (77.2, 0.7060945534706116)
01/13 10:29:09 AM valid 000 2.936144e-01 90.625000 100.000000
01/13 10:29:10 AM valid (84.35, 0.5244557065963745)
01/13 10:29:1

Traceback (most recent call last):


01/13 10:32:08 AM train 200 4.828582e-01 85.346704 99.378109
01/13 10:32:46 AM train 300 4.772523e-01 85.309385 99.403032
01/13 10:33:21 AM train (85.244, 0.4783338069343567)
01/13 10:33:21 AM train 000 7.328798e-01 81.250000 100.000000
01/13 10:33:59 AM train 100 4.729290e-01 85.349629 99.334777
01/13 10:34:37 AM train 200 4.773669e-01 85.214552 99.393657
01/13 10:35:15 AM train 300 4.634790e-01 85.413206 99.428987
01/13 10:35:49 AM train (85.544, 0.46233543190956117)
01/13 10:35:50 AM train 000 4.836130e-01 81.250000 100.000000
01/13 10:36:28 AM train 100 5.080214e-01 85.163985 99.195545
01/13 10:37:06 AM train 200 5.177927e-01 84.864739 99.253731
01/13 10:37:44 AM train 300 5.234228e-01 84.593023 99.262874
01/13 10:38:18 AM train (84.72, 0.5206493011856079)
01/13 10:38:19 AM train 000 5.545129e-01 81.250000 100.000000
01/13 10:38:57 AM train 100 4.792515e-01 84.715347 99.350248
01/13 10:39:34 AM train 200 4.610395e-01 85.315609 99.385883
01/13 10:40:13 AM train 300 4.565779e-01 85.3

Traceback (most recent call last):


01/13 10:43:55 AM train 100 4.912177e-01 84.699876 99.334777
01/13 10:44:33 AM train 200 4.927763e-01 84.802550 99.362562
01/13 10:45:11 AM train 300 4.818151e-01 84.956395 99.392650
01/13 10:45:45 AM train (84.904, 0.4806766734313965)
01/13 10:45:45 AM train 000 3.662983e-01 87.500000 100.000000
01/13 10:46:23 AM train 100 4.712470e-01 85.318688 99.303837
01/13 10:47:01 AM train 200 4.587571e-01 85.782027 99.378109
01/13 10:47:39 AM train 300 4.633052e-01 85.615656 99.371885
01/13 10:48:14 AM train (85.528, 0.4601539856910706)
01/13 10:48:15 AM train 000 4.204286e-01 85.937500 100.000000
01/13 10:48:52 AM train 100 5.053560e-01 84.947401 99.520421
01/13 10:49:31 AM train 200 5.094879e-01 84.802550 99.471393
01/13 10:50:09 AM train 300 5.196183e-01 84.567068 99.366694
01/13 10:50:43 AM train (84.488, 0.5215495408153534)
01/13 10:50:43 AM train 000 2.917706e-01 92.187500 100.000000
01/13 10:51:21 AM train 100 4.313270e-01 85.767327 99.535891
01/13 10:52:00 AM train 200 4.456745e-01 85.3

Traceback (most recent call last):


01/13 11:04:24 AM train 300 5.019086e-01 84.349045 99.439369
01/13 11:04:59 AM train (84.3, 0.502256526222229)
01/13 11:04:59 AM train 000 4.148332e-01 84.375000 100.000000
01/13 11:05:37 AM train 100 5.002597e-01 84.885520 99.319307
01/13 11:06:16 AM train 200 5.045204e-01 85.051306 99.409204
01/13 11:06:54 AM train 300 5.099983e-01 84.805855 99.387458
01/13 11:07:28 AM train (84.764, 0.5102092512893677)
01/13 11:07:28 AM train 000 5.975281e-01 79.687500 100.000000
01/13 11:08:06 AM train 100 4.467592e-01 85.643564 99.582302
01/13 11:08:45 AM train 200 4.567793e-01 85.517724 99.533582
01/13 11:09:22 AM train 300 4.562132e-01 85.465116 99.522425
01/13 11:09:56 AM train (85.6, 0.4517407689380646)
01/13 11:09:57 AM train 000 7.566766e-01 81.250000 100.000000
01/13 11:10:35 AM train 100 5.145008e-01 84.375000 99.443069
01/13 11:11:13 AM train 200 5.238017e-01 84.343905 99.300373
01/13 11:11:51 AM train 300 5.150608e-01 84.567068 99.309593
01/13 11:12:25 AM train (84.46, 0.5138866934204102

Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


01/13 11:24:47 AM train 100 5.106604e-01 84.545173 99.056312
01/13 11:25:25 AM train 200 4.992426e-01 84.693719 99.269279
01/13 11:26:04 AM train 300 4.950229e-01 84.660507 99.325166
01/13 11:26:38 AM train (84.556, 0.491498077917099)
01/13 11:26:38 AM train 000 4.984322e-01 82.812500 100.000000
01/13 11:27:17 AM train 100 5.222910e-01 84.993812 99.257426
01/13 11:27:55 AM train 200 5.015822e-01 85.237873 99.214863
01/13 11:28:34 AM train 300 5.120206e-01 85.029070 99.268065
01/13 11:29:09 AM train (85.064, 0.5085629752349854)
01/13 11:29:09 AM train 000 3.196792e-01 89.062500 98.437500
01/13 11:29:48 AM train 100 4.677161e-01 85.303218 99.535891
01/13 11:30:26 AM train 200 4.786427e-01 84.942475 99.471393
01/13 11:31:05 AM train 300 4.796527e-01 85.049834 99.387458
01/13 11:31:39 AM train (85.144, 0.4782403255081177)
01/13 11:31:40 AM train 000 2.947241e-01 85.937500 100.000000
01/13 11:32:19 AM train 100 4.803566e-01 84.792698 99.474010
01/13 11:32:57 AM train 200 4.907581e-01 84.460

Traceback (most recent call last):


01/13 11:35:28 AM train 200 5.292260e-01 83.955224 99.315920
01/13 11:36:07 AM train 300 5.218768e-01 84.016819 99.319975
01/13 11:36:41 AM train (84.068, 0.524641872959137)
01/13 11:36:42 AM train 000 3.363040e-01 89.062500 100.000000
01/13 11:37:20 AM train 100 4.718411e-01 85.086634 99.504950
01/13 11:37:59 AM train 200 4.793574e-01 84.958022 99.440299
01/13 11:38:37 AM train 300 4.806315e-01 84.811047 99.444560
01/13 11:39:11 AM train (84.724, 0.48397441090583804)
01/13 11:39:12 AM train 000 3.797644e-01 87.500000 100.000000
01/13 11:39:51 AM train 100 5.138757e-01 84.096535 99.412129
01/13 11:40:29 AM train 200 5.147437e-01 84.444963 99.300373
01/13 11:41:07 AM train 300 5.172790e-01 84.286752 99.309593
01/13 11:41:42 AM train (84.42, 0.5181674698257446)
01/13 11:41:43 AM train 000 4.695001e-01 84.375000 100.000000
01/13 11:42:21 AM train 100 4.930656e-01 84.978342 99.319307
01/13 11:43:00 AM train 200 4.965839e-01 84.662624 99.261505
01/13 11:43:38 AM train 300 4.896628e-01 84.69

Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


01/13 12:00:33 PM train 300 4.767645e-01 85.579319 99.314784
01/13 12:01:08 PM train (85.388, 0.47647758618354796)
01/13 12:01:08 PM train 000 4.803996e-01 85.937500 98.437500
01/13 12:01:47 PM train 100 5.013912e-01 84.421411 99.396658
01/13 12:02:26 PM train 200 5.034663e-01 84.654851 99.393657
01/13 12:03:04 PM train 300 4.897651e-01 84.951204 99.418605
01/13 12:03:39 PM train (85.068, 0.48776874713897705)
01/13 12:03:39 PM train 000 3.664936e-01 84.375000 100.000000
01/13 12:04:18 PM train 100 4.530038e-01 85.628094 99.582302
01/13 12:04:57 PM train 200 4.708967e-01 85.665423 99.432525


Traceback (most recent call last):


01/13 12:05:36 PM train 300 4.709629e-01 85.511836 99.392650
01/13 12:06:11 PM train (85.468, 0.4725731061172485)
01/13 12:06:11 PM train 000 4.641986e-01 87.500000 100.000000
01/13 12:06:50 PM train 100 4.653342e-01 85.720916 99.412129
01/13 12:07:28 PM train 200 4.597419e-01 85.595460 99.471393
01/13 12:08:06 PM train 300 4.664186e-01 85.454734 99.470515
01/13 12:08:41 PM train (85.58, 0.4617711210346222)
01/13 12:08:42 PM train 000 4.821633e-01 87.500000 100.000000
01/13 12:09:20 PM train 100 4.982300e-01 84.452351 99.381188
01/13 12:09:59 PM train 200 4.928854e-01 84.849192 99.440299
01/13 12:10:37 PM train 300 5.030746e-01 84.670889 99.403032
01/13 12:11:12 PM train (84.908, 0.4973258024597168)
01/13 12:11:12 PM train 000 4.013403e-01 89.062500 100.000000


Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)


01/13 12:11:51 PM train 100 4.716193e-01 84.576114 99.458540


Traceback (most recent call last):


01/13 12:12:29 PM train 200 4.759339e-01 84.740361 99.486940
01/13 12:13:07 PM train 300 4.788452e-01 84.681271 99.418605
01/13 12:13:42 PM train (84.752, 0.4775162512779236)
01/13 12:13:42 PM train 000 3.431389e-01 92.187500 100.000000
01/13 12:14:21 PM train 100 4.596359e-01 85.303218 99.551361
01/13 12:14:59 PM train 200 4.420054e-01 85.782027 99.580224
01/13 12:15:38 PM train 300 4.510221e-01 85.511836 99.522425
01/13 12:16:13 PM train (85.52, 0.4561853859901428)
01/13 12:16:13 PM valid 000 3.428015e-01 87.500000 100.000000
01/13 12:16:15 PM valid (84.25, 0.49773106718063354)
01/13 12:16:15 PM valid 000 4.695438e-01 87.500000 98.437500
01/13 12:16:16 PM valid (83.05, 0.5530815212726593)
01/13 12:16:16 PM valid 000 3.598725e-01 87.500000 100.000000
01/13 12:16:17 PM valid (84.5, 0.4915854201316833)
01/13 12:16:18 PM valid 000 3.242113e-01 90.625000 100.000000
01/13 12:16:19 PM valid (83.85, 0.5298133521080017)
01/13 12:16:19 PM valid 000 5.569693e-01 84.375000 98.437500
01/13 12:16:

Traceback (most recent call last):


01/13 12:28:07 PM train (85.752, 0.45282614067077637)
01/13 12:28:07 PM train 000 4.415176e-01 82.812500 98.437500
01/13 12:28:46 PM train 100 4.819151e-01 85.504332 99.288366
01/13 12:29:25 PM train 200 4.904514e-01 85.020211 99.315920
01/13 12:30:03 PM train 300 4.827982e-01 85.127699 99.413414
01/13 12:30:38 PM train (85.228, 0.4811410309410095)
01/13 12:30:38 PM train 000 6.532121e-01 79.687500 98.437500
01/13 12:31:17 PM train 100 4.513064e-01 86.030322 99.412129
01/13 12:31:56 PM train 200 4.596107e-01 85.727612 99.409204
01/13 12:32:34 PM train 300 4.656156e-01 85.714286 99.434178
01/13 12:33:09 PM train (85.54, 0.47045766750335694)
01/13 12:33:09 PM train 000 5.069861e-01 84.375000 100.000000
01/13 12:33:48 PM train 100 4.219160e-01 86.633663 99.381188
01/13 12:34:27 PM train 200 4.358889e-01 86.108520 99.370336
01/13 12:35:05 PM train 300 4.404379e-01 86.108804 99.387458
01/13 12:35:41 PM train (86.028, 0.44475610236167906)
01/13 12:35:41 PM train 000 2.997679e-01 92.187500 10