In [1]:
import logging
from naslib.defaults.trainer import Trainer
from naslib.optimizers import DARTSOptimizer
from naslib.search_spaces import DartsSearchSpace
from naslib.utils import utils, setup_logger, get_config_from_args, set_seed, log_args
from naslib.search_spaces.core.graph import Graph, EdgeData
from naslib.search_spaces.core import primitives as ops
from torch import nn
from fvcore.common.config import CfgNode
from copy import deepcopy
from IPython.display import clear_output
import torch
from naslib.search_spaces.core.primitives import AbstractPrimitive
from activation_sub_func.binary_func import *
from activation_sub_func.unary_func import *

device: cuda:0
device: cpu
device: cuda:0
device: cuda:0
device: cuda:0
device: cuda:0


In [3]:
config = utils.get_config_from_args(config_type='nas')
config.optimizer = 'darts'
config.search.batch_size = 8 # for TF pool, 32 for personal
config.search.learning_rate = 0.0001
config.search.epochs = 0
utils.set_seed(config.seed)
clear_output(wait=True)
utils.log_args(config)

logger = setup_logger(config.save + '/log.log')
logger.setLevel(logging.INFO)

[32m[07/08 10:22:41 nl.utils.utils]: [0mdataset....................................cifar10
[32m[07/08 10:22:41 nl.utils.utils]: [0mseed.............................................0
[32m[07/08 10:22:41 nl.utils.utils]: [0msearch_space...........................nasbench201
[32m[07/08 10:22:41 nl.utils.utils]: [0mout_dir........................................run
[32m[07/08 10:22:41 nl.utils.utils]: [0moptimizer....................................darts
[32m[07/08 10:22:41 nl.utils.utils]: [0msearchacq_fn_optimization: random_sampling
acq_fn_type: its
arch_learning_rate: 0.0003
arch_weight_decay: 0.001
batch_size: 8
checkpoint_freq: 1000
cutout: False
cutout_length: 16
cutout_prob: 1.0
data_size: 25000
debug_predictor: False
drop_path_prob: 0.0
encoding_type: adjacency_one_hot
epochs: 0
fidelity: -1
gpu: None
grad_clip: 5
k: 10
learning_rate: 0.0001
learning_rate_min: 0.001
max_mutations: 1
momentum: 0.9
num_arches_to_mutate: 2
num_candidates: 20
num_ensemble: 3
num_init: 10
o

In [9]:
class Stack(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return [x[0], x[1]]

    def get_embedded_ops(self):
        return None


class UnStack(AbstractPrimitive):
    def __init__(self, dim=1):
        super().__init__(locals())
        self.dim = dim

    def forward(self, x, edge_data=None):
        return x[self.dim]

    def get_embedded_ops(self):
        return None


class RNNResNet20SearchSpace(Graph):
    """
    https://www.researchgate.net/figure/ResNet-20-architecture_fig3_351046093
    """

    OPTIMIZER_SCOPE = [
        f"activation_{i}" for i in range(1, 20)
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        # cell definition
        activation_cell = Graph()
        activation_cell.name = 'activation_cell'
        activation_cell.add_node(1)  # input node
        activation_cell.add_node(2)  # unary node / intermediate node
        activation_cell.add_node(3)  # unary node / intermediate node
        activation_cell.add_node(4)  # binary node / output node
        activation_cell.add_edges_from([(1, 2, EdgeData())])  # mutable intermediate edge
        activation_cell.add_edges_from([(1, 3, EdgeData())])  # mutable intermediate edge

        activation_cell.add_edges_from([(2, 4, EdgeData().finalize())])  # mutable intermediate edge
        activation_cell.add_edges_from([(3, 4, EdgeData().finalize())])  # mutable intermediate edge
        activation_cell.nodes[4]['comb_op'] = Stack()

        activation_cell.add_node(5)  # binary node / output node
        activation_cell.add_edges_from([(4, 5, EdgeData())])  # mutable intermediate edge

        activation_cell.add_node(6)
        activation_cell.add_edges_from([(5, 6, EdgeData().finalize())])  # unary node / intermediate node
        activation_cell.add_node(7)
        activation_cell.add_edges_from([(6, 7, EdgeData())])  # mutable intermediate edge
        activation_cell.add_node(8)
        activation_cell.add_edges_from([(1, 8, EdgeData())])  # mutable intermediate edge

        activation_cell.add_node(9)
        activation_cell.add_edges_from([(8, 9, EdgeData().finalize())])  # mutable intermediate edge
        activation_cell.add_edges_from([(7, 9, EdgeData().finalize())])  # mutable intermediate edge
        activation_cell.nodes[9]['comb_op'] = Stack()

        activation_cell.add_node(10)
        activation_cell.add_edges_from([(9, 10, EdgeData())])  # mutable intermediate edge

        activation_cell.add_node(11)
        activation_cell.add_edges_from([(10, 11, EdgeData().finalize())])  # mutable intermediate edge

        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1)  # input
        self.add_node(2)  # intermediate
        self.add_node(3,
                      subgraph=activation_cell.copy().set_scope("activation_1").set_input([2]))  # activation cell 3
        self.nodes[3]['subgraph'].name = "activation_1"
        self.update_edges(
            update_func=lambda edge: self._set_ops(edge, 16),
            scope=f"activation_{1}",
            private_edge_data=True, )

        self.add_node(4)
        self.add_node(5,
                      subgraph=activation_cell.copy().set_scope("activation_2").set_input([4]))  # activation cell 3
        self.nodes[5]['subgraph'].name = "activation_2"
        self.update_edges(
            update_func=lambda edge: self._set_ops(edge, 16),
            scope=f"activation_{2}",
            private_edge_data=True, )

        self.add_node(6)
        # Todo add option here with a func which has a arg channels
        self.add_node(7,
                      subgraph=activation_cell.copy().set_scope("activation_3").set_input([6]))  # activation cell 3
        self.nodes[7]['subgraph'].name = "activation_3"
        self.update_edges(
            update_func=lambda edge: self._set_ops(edge, 16),
            scope=f"activation_{3}",
            private_edge_data=True, )

        self.add_edges_from([
            (1, 2, EdgeData()),
            (2, 3, EdgeData()),
            (3, 4, EdgeData()),
            (4, 5, EdgeData()),
            (5, 6, EdgeData()),
            (3, 6, EdgeData()),
            (6, 7, EdgeData())
        ])

        self.edges[1, 2].set('op',
                             ops.Sequential(nn.Conv2d(3, 16, 3, padding=1), ))  # convolutional edge
        self.edges[3, 4].set('op',
                             ops.Sequential(nn.Conv2d(16, 16, 3, padding=1), ))  # convolutional edge
        self.edges[5, 6].set('op',
                             ops.Sequential(nn.Conv2d(16, 16, 3, padding=1), ))  # convolutional edge

        conv_option = {
            "in_channels": 16,
            "out_channels": 16,
            "kernel_size": 3,
            "padding": 1
        }
        self._create_base_block(7, 4, activation_cell, conv_option)
        self._create_base_block(11, 6, activation_cell, conv_option)

        conv_option_a = {
            "in_channels": 16,
            "out_channels": 32,
            "kernel_size": 3,
            "padding": 1,
            "stride": 2
        }
        conv_option_b = {
            "in_channels": 16,
            "out_channels": 32,
            "kernel_size": 1,
            "padding": 0,
            "stride": 2
        }
        self._create_reduction_block(15, 8, activation_cell, conv_option_a, conv_option_b)

        conv_option = {
            "in_channels": 32,
            "out_channels": 32,
            "kernel_size": 3,
            "padding": 1
        }
        self._create_base_block(19, 10, activation_cell, conv_option)
        self._create_base_block(23, 12, activation_cell, conv_option)

        conv_option_a = {
            "in_channels": 32,
            "out_channels": 64,
            "kernel_size": 3,
            "padding": 1,
            "stride": 2
        }
        conv_option_b = {
            "in_channels": 32,
            "out_channels": 64,
            "kernel_size": 1,
            "padding": 0,
            "stride": 2
        }
        self._create_reduction_block(27, 14, activation_cell, conv_option_a, conv_option_b)

        conv_option = {
            "in_channels": 64,
            "out_channels": 64,
            "kernel_size": 3,
            "padding": 1
        }
        self._create_base_block(31, 16, activation_cell, conv_option)
        self._create_base_block(34, 18, activation_cell, conv_option)

        # add head
        self.add_node(39)
        self.add_edges_from([
            (38, 39, EdgeData())
        ])
        self.edges[38, 39].set('op',
                               ops.Sequential(
                                   nn.AvgPool2d(8),
                                   nn.Flatten(),
                                   nn.Linear(64, 10),
                                   nn.Softmax(dim=1)
                               ))  # convolutional edge
        self.add_node(40)
        self.add_edges_from([
            (39, 40, EdgeData().finalize())
        ])

    def _create_base_block(self, start: int, stage: int, cell, conv_option: dict):
        self.add_node(start + 1)

        self.add_node(start + 2, subgraph=cell.copy().set_scope(f"activation_{stage}").set_input(
            [start + 1]))  # activation cell 3
        self.nodes[start + 2]['subgraph'].name = f"activation_{stage}"
        self.update_edges(
            update_func=lambda edge: self._set_ops(edge, conv_option["out_channels"]),
            scope=f"activation_{stage}",
            private_edge_data=True, )

        self.add_node(start + 3)

        self.add_node(start + 4, subgraph=cell.copy().set_scope(f"activation_{stage + 1}").set_input(
            [start + 3]))  # activation cell 3
        self.nodes[start + 4]['subgraph'].name = f"activation_{stage + 1}"
        self.update_edges(
            update_func=lambda edge: self._set_ops(edge, conv_option["out_channels"]),
            scope=f"activation_{stage + 1}",
            private_edge_data=True, )

        self.add_edges_from([
            (start, start + 1, EdgeData()),
            (start, start + 3, EdgeData()),
            (start + 1, start + 2, EdgeData()),
            (start + 2, start + 3, EdgeData()),
            (start + 3, start + 4, EdgeData()),
        ])

        self.edges[start, start + 1].set('op',
                                         ops.Sequential(nn.Conv2d(**conv_option), ))  # convolutional edge
        self.edges[start + 2, start + 3].set('op',
                                             ops.Sequential(nn.Conv2d(**conv_option), ))  # convolutional edge

    def _create_reduction_block(self, start: int, stage: int, cell, conv_option_a: dict, conv_option_b: dict):
        self.add_node(start + 1)

        self.add_node(start + 2, subgraph=cell.copy().set_scope(f"activation_{stage}").set_input(
            [start + 1]))  # activation cell 3
        self.nodes[start + 2]['subgraph'].name = f"activation_{stage}"
        self.update_edges(
            update_func=lambda edge: self._set_ops(edge, conv_option_a["out_channels"]),
            scope=f"activation_{stage}",
            private_edge_data=True, )

        self.add_node(start + 3)

        self.add_node(start + 4, subgraph=cell.copy().set_scope(f"activation_{stage + 1}").set_input(
            [start + 3]))  # activation cell 3
        self.nodes[start + 4]['subgraph'].name = f"activation_{stage + 1}"
        self.update_edges(
            update_func=lambda edge: self._set_ops(edge, conv_option_b["out_channels"]),
            scope=f"activation_{stage + 1}",
            private_edge_data=True, )

        self.add_edges_from([
            (start, start + 1, EdgeData()),
            (start, start + 3, EdgeData()),  # add conv
            (start + 1, start + 2, EdgeData()),
            (start + 2, start + 3, EdgeData()),
            (start + 3, start + 4, EdgeData()),
        ])

        self.edges[start, start + 1].set('op',
                                         ops.Sequential(nn.Conv2d(**conv_option_a), ))  # convolutional edge
        conv_option_a["in_channels"] = conv_option_a["out_channels"]
        conv_option_a["stride"] = 1

        self.edges[start, start + 3].set('op',
                                         ops.Sequential(nn.Conv2d(**conv_option_b), ))  # convolutional edge
        self.edges[start + 2, start + 3].set('op',
                                             ops.Sequential(nn.Conv2d(**conv_option_a), ))  # convolutional edge

    def _set_ops(self, edge, channels=32):
        # unary (1, 2), (1, 3), (1, 8), (6, 7)
        if (edge.head, edge.tail) in {(1, 2), (1, 3), (1, 8), (6, 7)}:
            edge.data.set("op", [
                ops.Identity(),
                ops.Zero(stride=1),
                Power(2),
                Power(3),
                Power(.5),
                Sin(),
                Cos(),
                Abs_op(),
                Sign(),
                Beta_mul(channels=channels),
                Beta_add(channels=channels),
                Log(),
                Exp(),
                Sinh(),
                Cosh(),
                Tanh(),
                Asinh(),
                Acosh(),
                Atan(),
                Sinc(),
                Maximum0(),
                Minimum0(),
                Sigmoid(),
                LogExp(),
                Exp2(),
                Erf(),
                Beta(channels=channels),
            ])
        # binary (4, 5), (9, 10)
        elif (edge.head, edge.tail) in {(4, 5), (9, 10)}:
            edge.data.set("op", [
                Add(),
                Sub(),
                Mul(),
                Div(),
                Maximum(),
                Minimum(),
                SigMul(),
                ExpBetaSub2(channels=channels),
                ExpBetaSubAbs(channels=channels),
                BetaMix(channels=channels),
            ])

In [10]:
search_space = RNNResNet20SearchSpace()

In [11]:
optimizer = DARTSOptimizer(config)
optimizer.adapt_search_space(search_space)

[32m[07/08 10:23:40 nl.optimizers.oneshot.darts.optimizer]: [0mParsed graph:
Graph activation_1:
 Graph(
  (activation_1-edge(1,2)): MixedOp(
    (primitive-0): Identity()
    (primitive-1): Zero (stride=1)
    (primitive-2): Power()
    (primitive-3): Power()
    (primitive-4): Power()
    (primitive-5): Sin()
    (primitive-6): Cos()
    (primitive-7): Abs_op()
    (primitive-8): Sign()
    (primitive-9): Beta_mul()
    (primitive-10): Beta_add()
    (primitive-11): Log()
    (primitive-12): Exp()
    (primitive-13): Sinh()
    (primitive-14): Cosh()
    (primitive-15): Tanh()
    (primitive-16): Asinh()
    (primitive-17): Acosh()
    (primitive-18): Atan()
    (primitive-19): Sinc()
    (primitive-20): Maximum0()
    (primitive-21): Minimum0()
    (primitive-22): Sigmoid()
    (primitive-23): LogExp()
    (primitive-24): Exp2()
    (primitive-25): Erf()
    (primitive-26): Beta()
  )
  (activation_1-edge(1,3)): MixedOp(
    (primitive-0): Identity()
    (primitive-1): Zero (strid

In [12]:
trainer = Trainer(optimizer, config)
trainer.search()

[32m[07/08 10:23:47 nl.defaults.trainer]: [0mparam size = 0.284074MB
[32m[07/08 10:23:47 nl.defaults.trainer]: [0mStart training
Files already downloaded and verified
Files already downloaded and verified
[32m[07/08 10:23:48 nl.defaults.trainer]: [0mTraining finished


In [14]:
trainer.evaluate()

[32m[07/08 10:28:31 nl.defaults.trainer]: [0mStart evaluation
[32m[07/08 10:28:31 nl.defaults.trainer]: [0mloading model from file run/cifar10/bananas/0/search/model_final.pth


Skip loading parameter 'makrograph-edge(1,2).op.0.weight' to the model due to incompatible shapes: (6, 3, 5, 5) in the checkpoint but (16, 3, 3, 3) in the model! You might want to double check if this is expected.
Skip loading parameter 'makrograph-edge(1,2).op.0.bias' to the model due to incompatible shapes: (6,) in the checkpoint but (16,) in the model! You might want to double check if this is expected.
Skip loading parameter 'makrograph-edge(3,4).op.0.weight' to the model due to incompatible shapes: (10, 400) in the checkpoint but (16, 16, 3, 3) in the model! You might want to double check if this is expected.
Skip loading parameter 'makrograph-edge(3,4).op.0.bias' to the model due to incompatible shapes: (10,) in the checkpoint but (16,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mmakrograph-edge(1,2).op.0.{bias, weight}[0m
[34mmakrograph-edge(11,12).op.0.{bias, weight}[0m
[34mmakrogra

The checkpoint state_dict contains keys that are not used by the model:
  [35mmakrograph-edge(1,2).op.2.{bias, weight}[0m


[32m[07/08 10:28:32 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([ 5.6738e-04,  1.0712e-03, -1.5268e-03, -1.0443e-04, -4.1967e-04,
        -2.0072e-03, -9.7109e-04,  4.9893e-04, -1.6426e-03, -1.0110e-03,
         9.4010e-04, -1.5769e-03,  2.5636e-04, -4.6075e-04,  7.1774e-04,
        -7.3150e-05,  1.9285e-03, -1.7848e-03,  1.4929e-03,  7.2506e-04,
         5.6533e-04,  7.5997e-04, -8.9835e-04,  7.1230e-04,  1.6233e-04,
         1.6120e-03,  6.0333e-04], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-6.9886e-04, -2.3129e-03,  3.4386e-04, -2.9579e-04,  5.9824e-04,
        -8.1357e-04, -1.0154e-03, -2.1835e-04,  1.1574e-03, -4.0224e-04,
        -8.5330e-04, -8.8774e-04,  8.9622e-04,  1.2542e-03, -9.0247e-05,
         5.3916e-04, -1.6619e-03,  2.7380e-04, -1.4940e-03, -5.9278e-05,
         1.9373e-03,  1.6234e-03,  9.6623e-04,  1.0527e-03, -1.2285e-03,
         1.0993e-04, -1.2780e-04], device='cuda:0',

[32m[07/08 10:28:32 nl.defaults.trainer]: [0mFinal architecture:
Graph activation_1:
 Graph(
  (activation_1-edge(1,2)): Asinh()
  (activation_1-edge(1,3)): Maximum0()
  (activation_1-edge(1,8)): Exp()
  (activation_1-edge(2,4)): Identity()
  (activation_1-edge(3,4)): Identity()
  (activation_1-comb_op_at(4)): Stack()
  (activation_1-edge(4,5)): Minimum()
  (activation_1-edge(5,6)): Identity()
  (activation_1-edge(6,7)): Erf()
  (activation_1-edge(7,9)): Identity()
  (activation_1-edge(8,9)): Identity()
  (activation_1-comb_op_at(9)): Stack()
  (activation_1-edge(9,10)): Add()
  (activation_1-edge(10,11)): Identity()
)
Graph activation_10:
 Graph(
  (activation_10-edge(1,2)): Asinh()
  (activation_10-edge(1,3)): Maximum0()
  (activation_10-edge(1,8)): Exp()
  (activation_10-edge(2,4)): Identity()
  (activation_10-edge(3,4)): Identity()
  (activation_10-comb_op_at(4)): Stack()
  (activation_10-edge(4,5)): Minimum()
  (activation_10-edge(5,6)): Identity()
  (activation_10-edge(6,7)): E

[32m[07/08 10:28:32 nl.defaults.trainer]: [0mStarting retraining from scratch
Files already downloaded and verified
Files already downloaded and verified
[32m[07/08 10:28:33 nl.defaults.trainer]: [0mcuda consumption
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    4425 KB |    9594 KB |    5286 GB |    5286 GB |
|       from large pool |       0 KB |       0 KB |       0 GB |       0 GB |
|       from small pool |    4425 KB |    9594 KB |    5286 GB |    5286 GB |
|---------------------------------------------------------------------------|
| Active memory         |    4425 KB |    9594 KB |    5286 GB |    5286 GB |


KeyboardInterrupt: 