In [3]:
import logging
from naslib.defaults.trainer import Trainer
from naslib.optimizers import DARTSOptimizer
from naslib.search_spaces import DartsSearchSpace
from naslib.utils import utils, setup_logger, get_config_from_args, set_seed, log_args
from naslib.search_spaces.core.graph import Graph, EdgeData
from naslib.search_spaces.core import primitives as ops
from torch import nn
from fvcore.common.config import CfgNode
from copy import deepcopy
from IPython.display import clear_output
import torch

In [4]:
config = utils.get_config_from_args(config_type='nas')
config.optimizer = 'darts'
utils.set_seed(config.seed)
clear_output(wait=True)
utils.log_args(config)

logger = setup_logger(config.save + '/log.log')
logger.setLevel(logging.INFO)

[32m[07/03 11:30:13 nl.utils.utils]: [0mdataset....................................cifar10
[32m[07/03 11:30:13 nl.utils.utils]: [0mseed.............................................0
[32m[07/03 11:30:13 nl.utils.utils]: [0msearch_space...........................nasbench201
[32m[07/03 11:30:13 nl.utils.utils]: [0mout_dir........................................run
[32m[07/03 11:30:13 nl.utils.utils]: [0moptimizer....................................darts
[32m[07/03 11:30:13 nl.utils.utils]: [0msearchacq_fn_optimization: random_sampling
acq_fn_type: its
arch_learning_rate: 0.0003
arch_weight_decay: 0.001
batch_size: 256
checkpoint_freq: 1000
cutout: False
cutout_length: 16
cutout_prob: 1.0
data_size: 25000
debug_predictor: False
drop_path_prob: 0.0
encoding_type: adjacency_one_hot
epochs: 100
fidelity: -1
gpu: None
grad_clip: 5
k: 10
learning_rate: 0.025
learning_rate_min: 0.001
max_mutations: 1
momentum: 0.9
num_arches_to_mutate: 2
num_candidates: 20
num_ensemble: 3
num_init: 1

In [None]:
class Minimum(ops.AbstractPrimitive):

    def __init__(self, **kwargs):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return torch.minimum(x[0], x[1])

    def get_embedded_ops(self):
        return None
    

class Maximum(ops.AbstractPrimitive):

    def __init__(self, **kwargs):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return torch.maximum(x[0], x[1])

    def get_embedded_ops(self):
        return None


class Addition(ops.AbstractPrimitive):

    def __init__(self, **kwargs):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return torch.add(x[0], x[1])

    def get_embedded_ops(self):
        return None
    

class Subtraction(ops.AbstractPrimitive):

    def __init__(self, **kwargs):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return torch.subtract(x[0], x[1])

    def get_embedded_ops(self):
        return None
    

class Multiplication(ops.AbstractPrimitive):

    def __init__(self, **kwargs):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return torch.mul(x[0], x[1])

    def get_embedded_ops(self):
        return None

In [6]:
class stack():
    def __init__(self):
        pass
    def __call__(self, tensors, edges_data=None):
        return torch.stack(tensors)


class SimpleSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'a_stage_2'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        stages = ['a_stage_1', 'a_stage_2']

        # cell definition
        activation_cell = Graph()
        activation_cell.name = 'activation_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2) # intermediate node
        activation_cell.add_node(3) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData())]) # mutable intermediate edge
        activation_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable output edge

        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        for i, scope in zip(range(3, 5), stages):
            self.add_node(i, subgraph=deepcopy(activation_cell).set_scope(scope).set_input([i-1])) # activation cell i
            self.nodes[i]['subgraph'].name = scope
        self.add_node(5) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 5)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        self.edges[4, 5].set('op', 
            ops.Sequential(
                nn.Linear(400, 10), 
                nn.Softmax(dim=1)
            )) # linear edge
        
        for scope in stages:
            self.update_edges(
                update_func=lambda edge: self._set_ops(edge),
                scope=scope,
                private_edge_data=True,
            )

    def _set_ops(self, edge):
        edge.data.set('op', [
            ops.Sequential(nn.ReLU()),
            ops.Sequential(nn.Hardswish()),
            ops.Sequential(nn.LeakyReLU()),
            ops.Sequential(nn.Identity())
        ])

In [8]:
class stack():
    def __init__(self):
        pass
    def __call__(self, tensors, edges_data=None):
        return torch.stack(tensors)


class ComplexSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'u_stage_1',
        'u_stage_2',
        'b_stage_1'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        u_stages = ['u_stage_1', 'u_stage_2']
        
        # unary cell definition
        unary_cell = Graph()
        unary_cell.name = 'u_cell'
        unary_cell.add_node(1) # input node
        unary_cell.add_node(2) # intermediate node
        unary_cell.add_node(3) # output node
        unary_cell.add_edges_from([(1, 2, EdgeData())]) # mutable edge
        unary_cell.edges[1, 2].set('cell_name', 'u_cell')
        unary_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable edge
        
        # binary cell definition
        binary_cell = Graph()
        binary_cell.name = 'b_cell'
        binary_cell.add_node(1) # input node
        binary_cell.add_node(2) # input node
        binary_cell.add_node(3) # concatination node
        binary_cell.nodes[3]['comb_op'] = stack()
        binary_cell.add_node(4) # intermediate node
        binary_cell.add_node(5) # output node
        binary_cell.add_edges_from([(3, 4, EdgeData())]) # mutable edge
        binary_cell.edges[3, 4].set('cell_name', 'b_cell') 
        binary_cell.add_edges_from([(1, 3, EdgeData().finalize()),
                                    (2, 3, EdgeData().finalize()),
                                    (4, 5, EdgeData().finalize())]) # immutable edges
        
        # activation cell definition
        activation_cell = Graph()
        activation_cell.name = 'a_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2, subgraph=deepcopy(unary_cell).set_scope('u_stage_1').set_input([1])) # unary node
        activation_cell.nodes[2]['subgraph'].name = 'u_stage_1'
        activation_cell.add_node(3, subgraph=deepcopy(unary_cell).set_scope('u_stage_2').set_input([1])) # unary node
        activation_cell.nodes[3]['subgraph'].name = 'u_stage_2'
        activation_cell.add_node(4, subgraph=deepcopy(binary_cell).set_scope('b_stage_1').set_input([2, 3])) # binary node
        activation_cell.nodes[4]['subgraph'].name = 'b_stage_1'
        activation_cell.add_node(5) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData().finalize()), 
                                        (1, 3, EdgeData().finalize()),
                                        (2, 4, EdgeData().finalize()),
                                        (3, 4, EdgeData().finalize()), 
                                        (4, 5, EdgeData().finalize())])
        
        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        self.add_node(3, subgraph=deepcopy(activation_cell).set_input([2])) # activation cell
        self.nodes[3]['subgraph'].name = 'a_stage_1'
        self.add_node(4) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 4)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        self.edges[3, 4].set('op', 
            ops.Sequential(
                nn.Linear(400, 10), 
                nn.Softmax(dim=1)
            )) # linear edge
        
        for scope in u_stages:
            self.update_edges(
                update_func=lambda edge: self._set_unary_ops(edge),
                scope=scope,
                private_edge_data=True,
            ) # set unary cell ops
        
        self.update_edges(
            update_func=lambda edge: self._set_binary_ops(edge),
            scope='b_stage_1',
            private_edge_data=True
        ) # set binary cell ops
        

    def _set_unary_ops(self, edge):
        edge.data.set('op', [ops.Identity(), ops.Zero(stride=1)]) 
        
        
    def _set_binary_ops(self, edge):
        edge.data.set('op', [Minimum(), Maximum()]) 

In [23]:
class RNNSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'u_stage_1',
        'u_stage_2',
        'u_stage_3',
        'u_stage_4',
        'b_stage_1',
        'b_stage_2'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        u_stages = ['u_stage_1', 'u_stage_2', 'u_stage_3', 'u_stage_4']
        b_stages = ['b_stage_1', 'b_stage_2']
        
        # unary cell definition
        unary_cell = Graph()
        unary_cell.name = 'u_cell'
        unary_cell.add_node(1) # input node
        unary_cell.add_node(2) # intermediate node
        unary_cell.add_node(3) # output node
        unary_cell.add_edges_from([(1, 2, EdgeData())]) # mutable edge
        unary_cell.edges[1, 2].set('cell_name', 'u_cell')
        unary_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable edge
        
        # binary cell definition
        binary_cell = Graph()
        binary_cell.name = 'b_cell'
        binary_cell.add_node(1) # input node
        binary_cell.add_node(2) # input node
        binary_cell.add_node(3) # concatination node
        binary_cell.nodes[3]['comb_op'] = stack()
        binary_cell.add_node(4) # intermediate node
        binary_cell.add_node(5) # output node
        binary_cell.add_edges_from([(3, 4, EdgeData())]) # mutable edge
        binary_cell.edges[3, 4].set('cell_name', 'b_cell') 
        binary_cell.add_edges_from([(1, 3, EdgeData().finalize()),
                                    (2, 3, EdgeData().finalize()),
                                    (4, 5, EdgeData().finalize())]) # immutable edges
        
        # activation cell definition
        activation_cell = Graph()
        activation_cell.name = 'a_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2, subgraph=deepcopy(unary_cell).set_scope('u_stage_1').set_input([1])) # unary cell 1
        activation_cell.nodes[2]['subgraph'].name = 'u_stage_1'
        activation_cell.add_node(3, subgraph=deepcopy(unary_cell).set_scope('u_stage_2').set_input([1])) # unary cell 2
        activation_cell.nodes[3]['subgraph'].name = 'u_stage_2'
        activation_cell.add_node(4, subgraph=deepcopy(unary_cell).set_scope('u_stage_3').set_input([1])) # unary cell 3
        activation_cell.nodes[4]['subgraph'].name = 'u_stage_3'
        activation_cell.add_node(5, subgraph=deepcopy(binary_cell).set_scope('b_stage_1').set_input([2, 3])) # binary cell 1
        activation_cell.nodes[5]['subgraph'].name = 'b_stage_1'
        activation_cell.add_node(6, subgraph=deepcopy(unary_cell).set_scope('u_stage_4').set_input([5])) # unary cell 4
        activation_cell.nodes[6]['subgraph'].name = 'u_stage_4'
        activation_cell.add_node(7, subgraph=deepcopy(binary_cell).set_scope('b_stage_2').set_input([4, 6])) # binary cell 2
        activation_cell.nodes[7]['subgraph'].name = 'b_stage_2'
        activation_cell.add_node(8) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData().finalize()), 
                                        (1, 3, EdgeData().finalize()),
                                        (1, 4, EdgeData().finalize()),
                                        (2, 5, EdgeData().finalize()),
                                        (3, 5, EdgeData().finalize()), 
                                        (4, 7, EdgeData().finalize()),
                                        (5, 6, EdgeData().finalize()),
                                        (6, 7, EdgeData().finalize()),
                                        (7, 8, EdgeData().finalize())])
        
        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        self.add_node(3, subgraph=deepcopy(activation_cell).set_input([2])) # activation cell
        self.nodes[3]['subgraph'].name = 'a_stage_1'
        self.add_node(4) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 4)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        self.edges[3, 4].set('op', 
            ops.Sequential(
                nn.Linear(400, 10), 
                nn.Softmax(dim=1)
            )) # linear edge
        
        for scope in u_stages:
            self.update_edges(
                update_func=lambda edge: self._set_unary_ops(edge),
                scope=scope,
                private_edge_data=True,
            ) # set unary cell ops
        
        for scope in b_stages:
            self.update_edges(
                update_func=lambda edge: self._set_binary_ops(edge),
                scope=scope,
                private_edge_data=True
            ) # set binary cell ops
        

    def _set_unary_ops(self, edge):
        edge.data.set('op', [
            ops.Identity(), ops.Zero(stride=1)]) 
        
        
    def _set_binary_ops(self, edge):
        edge.data.set('op', [
            Minimum(), 
            Maximum()
        ]) 

In [24]:
search_space = RNNSearchSpace()

In [25]:
optimizer = DARTSOptimizer(config)
optimizer.adapt_search_space(search_space)

[32m[07/03 11:45:35 nl.optimizers.oneshot.darts.optimizer]: [0mParsed graph:
Graph a_stage_1:
 Graph(
  (a_stage_1-edge(1,2)): Identity()
  (a_stage_1-edge(1,3)): Identity()
  (a_stage_1-edge(1,4)): Identity()
  (a_stage_1-subgraph_at(2)): Graph u_stage_1-0.4099054, scope u_stage_1, 3 nodes
  (a_stage_1-edge(2,5)): Identity()
  (a_stage_1-subgraph_at(3)): Graph u_stage_2-0.4099054, scope u_stage_2, 3 nodes
  (a_stage_1-edge(3,5)): Identity()
  (a_stage_1-subgraph_at(4)): Graph u_stage_3-0.4099054, scope u_stage_3, 3 nodes
  (a_stage_1-edge(4,7)): Identity()
  (a_stage_1-subgraph_at(5)): Graph b_stage_1-0.5691127, scope b_stage_1, 5 nodes
  (a_stage_1-edge(5,6)): Identity()
  (a_stage_1-subgraph_at(6)): Graph u_stage_4-0.4099054, scope u_stage_4, 3 nodes
  (a_stage_1-edge(6,7)): Identity()
  (a_stage_1-subgraph_at(7)): Graph b_stage_2-0.5691127, scope b_stage_2, 5 nodes
  (a_stage_1-edge(7,8)): Identity()
)
Graph b_stage_1:
 Graph(
  (b_stage_1-edge(1,3)): Identity()
  (b_stage_1-edge

In [None]:
trainer = Trainer(optimizer, config)
trainer.search()

[32m[07/03 11:45:42 nl.defaults.trainer]: [0mparam size = 0.006882MB
[32m[07/03 11:45:42 nl.defaults.trainer]: [0mStart training
Files already downloaded and verified
Files already downloaded and verified
[32m[07/03 11:45:43 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.000257, -0.000239, 1
+0.000919, +0.000448, 0
-0.001975, +0.001175, 1
+0.000275, -0.000229, 0
+0.000869, +0.001941, 1
+0.000945, +0.001190, 1
[32m[07/03 11:45:43 nl.defaults.trainer]: [0mEpoch 0-0, Train loss: 2.30342, validation loss: 2.30370, learning rate: [0.025]
[32m[07/03 11:45:48 nl.defaults.trainer]: [0mEpoch 0-29, Train loss: 2.30125, validation loss: 2.30104, learning rate: [0.025]
[32m[07/03 11:45:53 nl.defaults.trainer]: [0mEpoch 0-58, Train loss: 2.28639, validation loss: 2.29286, learning rate: [0.025]
[32m[07/03 11:45:58 nl.defaults.trainer]: [0mEpoch 0-87, Train loss: 2.26142, validation loss: 2.24306, learning rate: [0.025]
[32m[07/03 11:46:03 nl

[32m[07/03 11:47:43 nl.defaults.trainer]: [0mEpoch 4 done. Train accuracy (top1, top5): 36.60286, 80.89143, Validation accuracy: 36.30246, 80.36896
[32m[07/03 11:47:43 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.001209, +0.001209, 1
-0.039513, +0.040649, 1
+0.160732, -0.161366, 0
+0.162611, -0.162575, 0
+0.168845, -0.166191, 0
+0.169028, -0.167122, 0
[32m[07/03 11:47:45 nl.defaults.trainer]: [0mEpoch 5-11, Train loss: 2.14681, validation loss: 2.09733, learning rate: [0.024852260087141656]
[32m[07/03 11:47:50 nl.defaults.trainer]: [0mEpoch 5-40, Train loss: 2.08374, validation loss: 2.10976, learning rate: [0.024852260087141656]
[32m[07/03 11:47:55 nl.defaults.trainer]: [0mEpoch 5-69, Train loss: 2.07845, validation loss: 2.10258, learning rate: [0.024852260087141656]
[32m[07/03 11:48:00 nl.defaults.trainer]: [0mEpoch 5-98, Train loss: 2.09986, validation loss: 2.07366, learning rate: [0.024852260087141656]
[32m[07/03 11:48:05 

[32m[07/03 11:49:43 nl.defaults.trainer]: [0mEpoch 9 done. Train accuracy (top1, top5): 41.44857, 85.43429, Validation accuracy: 41.22092, 85.20757
[32m[07/03 11:49:43 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.002063, +0.002063, 1
-0.101387, +0.102481, 1
+0.208230, -0.208750, 0
+0.210682, -0.210653, 0
+0.262222, -0.259678, 0
+0.233113, -0.231373, 0
[32m[07/03 11:49:46 nl.defaults.trainer]: [0mEpoch 10-21, Train loss: 2.04142, validation loss: 2.04411, learning rate: [0.024412678195541847]
[32m[07/03 11:49:51 nl.defaults.trainer]: [0mEpoch 10-50, Train loss: 2.05918, validation loss: 2.06415, learning rate: [0.024412678195541847]
[32m[07/03 11:49:56 nl.defaults.trainer]: [0mEpoch 10-79, Train loss: 2.05028, validation loss: 2.04868, learning rate: [0.024412678195541847]
[32m[07/03 11:50:02 nl.defaults.trainer]: [0mEpoch 10-109, Train loss: 2.03026, validation loss: 2.00814, learning rate: [0.024412678195541847]
[32m[07/03 11:5

[32m[07/03 11:51:42 nl.defaults.trainer]: [0mEpoch 14 done. Train accuracy (top1, top5): 44.24000, 87.88571, Validation accuracy: 43.99806, 87.61975
[32m[07/03 11:51:42 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.008301, +0.008301, 1
-0.168675, +0.169739, 1
+0.222315, -0.222729, 0
+0.226030, -0.226007, 0
+0.370534, -0.368101, 0
+0.266981, -0.265419, 0
[32m[07/03 11:51:43 nl.defaults.trainer]: [0mEpoch 15-6, Train loss: 2.05078, validation loss: 1.95587, learning rate: [0.023692078290260415]
[32m[07/03 11:51:48 nl.defaults.trainer]: [0mEpoch 15-36, Train loss: 1.98981, validation loss: 2.04263, learning rate: [0.023692078290260415]
[32m[07/03 11:51:53 nl.defaults.trainer]: [0mEpoch 15-66, Train loss: 2.00929, validation loss: 2.00930, learning rate: [0.023692078290260415]
[32m[07/03 11:51:58 nl.defaults.trainer]: [0mEpoch 15-95, Train loss: 2.03443, validation loss: 2.02829, learning rate: [0.023692078290260415]
[32m[07/03 11:52

[32m[07/03 11:53:41 nl.defaults.trainer]: [0mEpoch 19 done. Train accuracy (top1, top5): 48.27714, 89.97143, Validation accuracy: 47.82448, 89.50730
[32m[07/03 11:53:41 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.008811, +0.008811, 1
-0.231842, +0.232884, 1
+0.187530, -0.187876, 0
+0.192206, -0.192187, 0
+0.462062, -0.459755, 0
+0.245949, -0.244526, 0
[32m[07/03 11:53:45 nl.defaults.trainer]: [0mEpoch 20-23, Train loss: 1.96353, validation loss: 2.00127, learning rate: [0.022708203932499376]
[32m[07/03 11:53:50 nl.defaults.trainer]: [0mEpoch 20-52, Train loss: 1.97124, validation loss: 1.97209, learning rate: [0.022708203932499376]
[32m[07/03 11:53:55 nl.defaults.trainer]: [0mEpoch 20-81, Train loss: 1.98043, validation loss: 1.94313, learning rate: [0.022708203932499376]
[32m[07/03 11:54:00 nl.defaults.trainer]: [0mEpoch 20-110, Train loss: 1.98552, validation loss: 1.94863, learning rate: [0.022708203932499376]
[32m[07/03 11:

In [47]:
trainer.evaluate_oneshot()

[32m[06/29 18:09:42 nl.defaults.trainer]: [0mStart one-shot evaluation
Files already downloaded and verified
Files already downloaded and verified
[32m[06/29 18:09:48 nl.defaults.trainer]: [0mEvaluation finished


56.85999998982747