In [20]:
import logging
from naslib.defaults.trainer import Trainer
from naslib.optimizers import DARTSOptimizer
from naslib.search_spaces import DartsSearchSpace
from naslib.utils import utils, setup_logger, get_config_from_args, set_seed, log_args
from naslib.search_spaces.core.graph import Graph, EdgeData
from naslib.search_spaces.core import primitives as ops
from torch import nn
from fvcore.common.config import CfgNode
from copy import deepcopy
from IPython.display import clear_output
import torch
from naslib.search_spaces.core.primitives import AbstractPrimitive

In [21]:
config = utils.get_config_from_args(config_type='nas')
config.optimizer = 'darts'
config.search.batch_size = 16 # for TF pool, 32 for personal
utils.set_seed(config.seed)
clear_output(wait=True)
utils.log_args(config)

logger = setup_logger(config.save + '/log.log')
logger.setLevel(logging.INFO)

[32m[07/07 10:55:52 nl.utils.utils]: [0mdataset....................................cifar10
[32m[07/07 10:55:52 nl.utils.utils]: [0mseed.............................................0
[32m[07/07 10:55:52 nl.utils.utils]: [0msearch_space...........................nasbench201
[32m[07/07 10:55:52 nl.utils.utils]: [0mout_dir........................................run
[32m[07/07 10:55:52 nl.utils.utils]: [0moptimizer....................................darts
[32m[07/07 10:55:52 nl.utils.utils]: [0msearchacq_fn_optimization: random_sampling
acq_fn_type: its
arch_learning_rate: 0.0003
arch_weight_decay: 0.001
batch_size: 16
checkpoint_freq: 1000
cutout: False
cutout_length: 16
cutout_prob: 1.0
data_size: 25000
debug_predictor: False
drop_path_prob: 0.0
encoding_type: adjacency_one_hot
epochs: 100
fidelity: -1
gpu: None
grad_clip: 5
k: 10
learning_rate: 0.025
learning_rate_min: 0.001
max_mutations: 1
momentum: 0.9
num_arches_to_mutate: 2
num_candidates: 20
num_ensemble: 3
num_init: 10

In [22]:
class Power(AbstractPrimitive):
    def __init__(self,power):
        super().__init__(locals())
        self.power=power
    def forward(self, x, edge_data=None):
        return torch.pow(x,self.power)
    def get_embedded_ops(self):
        return None

class Sin(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.sin(x)
    def get_embedded_ops(self):
        return None
    
class Cos(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.cos(x)
    def get_embedded_ops(self):
        return None

class Abs_op(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.abs(x)
    def get_embedded_ops(self):
        return None

class Sign(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return x*-1
    def get_embedded_ops(self):
        return None

class Beta_mul(AbstractPrimitive):
    def __init__(self, channels):
        super().__init__(locals())
        self.beta = torch.nn.Parameter(torch.ones(channels))
    def forward(self, x, edge_data=None):
        return x * self.beta
    def get_embedded_ops(self):
        return None

class Beta_add(AbstractPrimitive):
    def __init__(self, channels):
        super().__init__(locals())
        self.beta = torch.nn.Parameter(torch.ones(channels))
    def forward(self, x, edge_data=None):
        return x + self.beta
    def get_embedded_ops(self):
        return None

class Log(AbstractPrimitive):
    def __init__(self, eps=1e-10):
        super().__init__(locals())
        self.eps = eps
    def forward(self, x, edge_data=None):
        return torch.log(x+self.eps)
    def get_embedded_ops(self):
        return None

class Exp(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.exp(x) # clamp to 10
    def get_embedded_ops(self):
        return None

class Sinh(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.sinh(x) 
    def get_embedded_ops(self):
        return None

class Cosh(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.cosh(x)
    def get_embedded_ops(self):
        return None

class Tanh(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.tanh(x)
    def get_embedded_ops(self):
        return None

class Asinh(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.asinh(x)
    def get_embedded_ops(self):
        return None

class Acosh(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.acosh(x)
    def get_embedded_ops(self):
        return None
    
class Atan(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.atan(x)
    def get_embedded_ops(self):
        return None

class Sinc(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.sinc(x)
    def get_embedded_ops(self):
        return None

class Maximum0(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.maximum(x,torch.zeros(x.shape).cuda())
    def get_embedded_ops(self):
        return None
    
class Minimum0(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.minimum(x,torch.zeros(x.shape).cuda())
    def get_embedded_ops(self):
        return None
    
class Sigmoid(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.sigmoid(x)
    def get_embedded_ops(self):
        return None

class LogExp(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.log(1+torch.exp(x))
    def get_embedded_ops(self):
        return None

class Exp2(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.exp(-torch.pow(x,2))
    def get_embedded_ops(self):
        return None

class Erf(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.erf(x)
    def get_embedded_ops(self):
        return None

class Beta(AbstractPrimitive):
    def __init__(self, channels):
        super().__init__(locals())
        self.beta = torch.nn.Parameter(torch.ones(channels))
    def forward(self, x, edge_data=None):
        return self.beta
    def get_embedded_ops(self):
        return None
    
class Add(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.add(x[0],x[1])
    def get_embedded_ops(self):
        return None

class Sub(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.sub(x[0],x[1])
    def get_embedded_ops(self):
        return None

class Mul(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.mul(x[0],x[1])
    def get_embedded_ops(self):
        return None

class Div(AbstractPrimitive):
    def __init__(self,eps=1e-10):
        super().__init__(locals())
        self.eps=eps
    def forward(self, x, edge_data=None):
        return torch.div(x[0],x[1] + self.eps)
    def get_embedded_ops(self):
        return None

class Maximum(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.maximum(x[0],x[1])
    def get_embedded_ops(self):
        return None
    
class Minimum(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.minimum(x[0],x[1])
    def get_embedded_ops(self):
        return None

class SigMul(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())
    def forward(self, x, edge_data=None):
        return torch.mul(torch.sigmoid(x[0]),x[1])
    def get_embedded_ops(self):
        return None

class ExpBetaSub2(AbstractPrimitive):
    def __init__(self, channels):
        super().__init__(locals())
        self.beta = torch.nn.Parameter(torch.ones(channels))
    def forward(self, x, edge_data=None):
        return torch.exp(-self.beta*torch.pow(torch.sub(x[0],x[1]),2))
    def get_embedded_ops(self):
        return None

class ExpBetaSubAbs(AbstractPrimitive):
    def __init__(self, channels):
        super().__init__(locals())
        self.beta = torch.nn.Parameter(torch.ones(channels))
    def forward(self, x, edge_data=None):
        return torch.exp(-self.beta*torch.abs(torch.sub(x[0],x[1])))
    def get_embedded_ops(self):
        return None

class BetaMix(AbstractPrimitive):
    def __init__(self, channels):
        super().__init__(locals())
        self.beta = torch.nn.Parameter(torch.ones(channels))
    def forward(self, x, edge_data=None):
        return torch.add(-self.beta*x[0],(1-self.beta)*x[1])
    def get_embedded_ops(self):
        return None

In [23]:
class SimpleSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'a_stage_2'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        stages = ['a_stage_1', 'a_stage_2']

        # cell definition
        activation_cell = Graph()
        activation_cell.name = 'activation_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2) # intermediate node
        activation_cell.add_node(3) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData())]) # mutable intermediate edge
        activation_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable output edge

        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        for i, scope in zip(range(3, 5), stages):
            self.add_node(i, subgraph=deepcopy(activation_cell).set_scope(scope).set_input([i-1])) # activation cell i
            self.nodes[i]['subgraph'].name = scope
        self.add_node(5) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 5)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        self.edges[4, 5].set('op', 
            ops.Sequential(
                nn.Linear(400, 10), 
                nn.Softmax(dim=1)
            )) # linear edge
        
        for scope in stages:
            self.update_edges(
                update_func=lambda edge: self._set_ops(edge),
                scope=scope,
                private_edge_data=True,
            )

    def _set_ops(self, edge):
        edge.data.set('op', [
            ops.Sequential(nn.ReLU()),
            ops.Sequential(nn.Hardswish()),
            ops.Sequential(nn.LeakyReLU()),
            ops.Sequential(nn.Identity())
        ])

In [24]:
class stack():
    def __init__(self):
        pass
    def __call__(self, tensors, edges_data=None):
        return torch.stack(tensors)


class ComplexSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'u_stage_1',
        'u_stage_2',
        'b_stage_1'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        u_stages = ['u_stage_1', 'u_stage_2']
        
        # unary cell definition
        unary_cell = Graph()
        unary_cell.name = 'u_cell'
        unary_cell.add_node(1) # input node
        unary_cell.add_node(2) # intermediate node
        unary_cell.add_node(3) # output node
        unary_cell.add_edges_from([(1, 2, EdgeData())]) # mutable edge
        unary_cell.edges[1, 2].set('cell_name', 'u_cell')
        unary_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable edge
        
        # binary cell definition
        binary_cell = Graph()
        binary_cell.name = 'b_cell'
        binary_cell.add_node(1) # input node
        binary_cell.add_node(2) # input node
        binary_cell.add_node(3) # concatination node
        binary_cell.nodes[3]['comb_op'] = stack()
        binary_cell.add_node(4) # intermediate node
        binary_cell.add_node(5) # output node
        binary_cell.add_edges_from([(3, 4, EdgeData())]) # mutable edge
        binary_cell.edges[3, 4].set('cell_name', 'b_cell') 
        binary_cell.add_edges_from([(1, 3, EdgeData().finalize()),
                                    (2, 3, EdgeData().finalize()),
                                    (4, 5, EdgeData().finalize())]) # immutable edges
        
        # activation cell definition
        activation_cell = Graph()
        activation_cell.name = 'a_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2, subgraph=deepcopy(unary_cell).set_scope('u_stage_1').set_input([1])) # unary node
        activation_cell.nodes[2]['subgraph'].name = 'u_stage_1'
        activation_cell.add_node(3, subgraph=deepcopy(unary_cell).set_scope('u_stage_2').set_input([1])) # unary node
        activation_cell.nodes[3]['subgraph'].name = 'u_stage_2'
        activation_cell.add_node(4, subgraph=deepcopy(binary_cell).set_scope('b_stage_1').set_input([2, 3])) # binary node
        activation_cell.nodes[4]['subgraph'].name = 'b_stage_1'
        activation_cell.add_node(5) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData().finalize()), 
                                        (1, 3, EdgeData().finalize()),
                                        (2, 4, EdgeData().finalize()),
                                        (3, 4, EdgeData().finalize()), 
                                        (4, 5, EdgeData().finalize())])
        
        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        self.add_node(3, subgraph=deepcopy(activation_cell).set_input([2])) # activation cell
        self.nodes[3]['subgraph'].name = 'a_stage_1'
        self.add_node(4) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 4)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        self.edges[3, 4].set('op', 
            ops.Sequential(
                nn.Linear(400, 10), 
                nn.Softmax(dim=1)
            )) # linear edge
        
        for scope in u_stages:
            self.update_edges(
                update_func=lambda edge: self._set_unary_ops(edge),
                scope=scope,
                private_edge_data=True,
            ) # set unary cell ops
        
        self.update_edges(
            update_func=lambda edge: self._set_binary_ops(edge),
            scope='b_stage_1',
            private_edge_data=True
        ) # set binary cell ops
        

    def _set_unary_ops(self, edge):
        edge.data.set('op', [ops.Identity(), ops.Zero(stride=1)]) 
        
        
    def _set_binary_ops(self, edge):
        edge.data.set('op', [Minimum(), Maximum()]) 

In [25]:
class RNNSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'u_stage_1',
        'u_stage_2',
        'u_stage_3',
        'u_stage_4',
        'b_stage_1',
        'b_stage_2'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        u_stages = ['u_stage_1', 'u_stage_2', 'u_stage_3', 'u_stage_4']
        b_stages = ['b_stage_1', 'b_stage_2']
        
        # unary cell definition
        unary_cell = Graph()
        unary_cell.name = 'u_cell'
        unary_cell.add_node(1) # input node
        unary_cell.add_node(2) # intermediate node
        unary_cell.add_node(3) # output node
        unary_cell.add_edges_from([(1, 2, EdgeData())]) # mutable edge
        unary_cell.edges[1, 2].set('cell_name', 'u_cell')
        unary_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable edge
        
        # binary cell definition
        binary_cell = Graph()
        binary_cell.name = 'b_cell'
        binary_cell.add_node(1) # input node
        binary_cell.add_node(2) # input node
        binary_cell.add_node(3) # concatination node
        binary_cell.nodes[3]['comb_op'] = stack()
        binary_cell.add_node(4) # intermediate node
        binary_cell.add_node(5) # output node
        binary_cell.add_edges_from([(3, 4, EdgeData())]) # mutable edge
        binary_cell.edges[3, 4].set('cell_name', 'b_cell') 
        binary_cell.add_edges_from([(1, 3, EdgeData().finalize()),
                                    (2, 3, EdgeData().finalize()),
                                    (4, 5, EdgeData().finalize())]) # immutable edges
        
        # activation cell definition
        activation_cell = Graph()
        activation_cell.name = 'a_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2, subgraph=deepcopy(unary_cell).set_scope('u_stage_1').set_input([1])) # unary cell 1
        activation_cell.nodes[2]['subgraph'].name = 'u_stage_1'
        activation_cell.add_node(3, subgraph=deepcopy(unary_cell).set_scope('u_stage_2').set_input([1])) # unary cell 2
        activation_cell.nodes[3]['subgraph'].name = 'u_stage_2'
        activation_cell.add_node(4, subgraph=deepcopy(unary_cell).set_scope('u_stage_3').set_input([1])) # unary cell 3
        activation_cell.nodes[4]['subgraph'].name = 'u_stage_3'
        activation_cell.add_node(5, subgraph=deepcopy(binary_cell).set_scope('b_stage_1').set_input([2, 3])) # binary cell 1
        activation_cell.nodes[5]['subgraph'].name = 'b_stage_1'
        activation_cell.add_node(6, subgraph=deepcopy(unary_cell).set_scope('u_stage_4').set_input([5])) # unary cell 4
        activation_cell.nodes[6]['subgraph'].name = 'u_stage_4'
        activation_cell.add_node(7, subgraph=deepcopy(binary_cell).set_scope('b_stage_2').set_input([4, 6])) # binary cell 2
        activation_cell.nodes[7]['subgraph'].name = 'b_stage_2'
        activation_cell.add_node(8) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData().finalize()), 
                                        (1, 3, EdgeData().finalize()),
                                        (1, 4, EdgeData().finalize()),
                                        (2, 5, EdgeData().finalize()),
                                        (3, 5, EdgeData().finalize()), 
                                        (4, 7, EdgeData().finalize()),
                                        (5, 6, EdgeData().finalize()),
                                        (6, 7, EdgeData().finalize()),
                                        (7, 8, EdgeData().finalize())])
        
        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        self.add_node(3, subgraph=deepcopy(activation_cell).set_input([2])) # activation cell
        self.add_node(4) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 4)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        self.edges[3, 4].set('op', 
            ops.Sequential(
                nn.Linear(400, 10), 
                nn.Softmax(dim=1)
            )) # linear edge
        
        for scope in u_stages:
            self.update_edges(
                update_func=lambda edge: self._set_unary_ops(edge),
                scope=scope,
                private_edge_data=True,
            ) # set unary cell ops
        
        for scope in b_stages:
            self.update_edges(
                update_func=lambda edge: self._set_binary_ops(edge),
                scope=scope,
                private_edge_data=True
            ) # set binary cell ops
        

    def _set_unary_ops(self, edge, channels=None):
        edge.data.set('op', [
            ops.Identity(), 
            ops.Zero(stride=1)
        ]) 
        
        
    def _set_binary_ops(self, edge, channels=None):
        edge.data.set('op', [
            Minimum(),
            Maximum(),
        ]) 

In [28]:
class Stack(AbstractPrimitive):
    def __init__(self):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return torch.stack(x)

    def get_embedded_ops(self):
        return None


class UnStack(AbstractPrimitive):
    def __init__(self, dim=1):
        super().__init__(locals())
        self.dim = dim

    def forward(self, x, edge_data=None):
        return x[self.dim]

    def get_embedded_ops(self):
        return None


class RNNResNet20SearchSpace(Graph):
    """
    https://www.researchgate.net/figure/ResNet-20-architecture_fig3_351046093
    """

    OPTIMIZER_SCOPE = [
        f"activation_{i}" for i in range(1, 20)
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        # cell definition
        activation_cell = Graph()
        activation_cell.name = 'activation_cell'
        activation_cell.add_node(1)  # input node
        activation_cell.add_node(2)  # unary node / intermediate node
        activation_cell.add_node(3)  # unary node / intermediate node
        activation_cell.add_node(4)  # binary node / output node
        activation_cell.add_edges_from([(1, 2, EdgeData())])  # mutable intermediate edge
        activation_cell.add_edges_from([(1, 3, EdgeData())])  # mutable intermediate edge

        activation_cell.add_edges_from([(2, 4, EdgeData().finalize())])  # mutable intermediate edge
        activation_cell.add_edges_from([(3, 4, EdgeData().finalize())])  # mutable intermediate edge
        activation_cell.nodes[4]['comb_op'] = Stack()

        activation_cell.add_node(5)  # binary node / output node
        activation_cell.add_edges_from([(4, 5, EdgeData())])  # mutable intermediate edge

        activation_cell.add_node(6)
        activation_cell.add_edges_from([(5, 6, EdgeData().finalize())])  # unary node / intermediate node
        activation_cell.add_node(7)
        activation_cell.add_edges_from([(6, 7, EdgeData())])  # mutable intermediate edge
        activation_cell.add_node(8)
        activation_cell.add_edges_from([(1, 8, EdgeData())])  # mutable intermediate edge

        activation_cell.add_node(9)
        activation_cell.add_edges_from([(8, 9, EdgeData().finalize())])  # mutable intermediate edge
        activation_cell.add_edges_from([(7, 9, EdgeData().finalize())])  # mutable intermediate edge
        activation_cell.nodes[9]['comb_op'] = Stack()

        activation_cell.add_node(10)
        activation_cell.add_edges_from([(9, 10, EdgeData())])  # mutable intermediate edge

        activation_cell.add_node(11)
        activation_cell.add_edges_from([(10, 11, EdgeData().finalize())])  # mutable intermediate edge

        for tup in [(1, 2), (1, 3), (1, 8), (6, 7)]:  # unary operations
            activation_cell.edges[tup[0], tup[1]].set("op", [
                ops.Identity(),
                ops.Zero(stride=1),
                ops.Sequential(Power(2)),
                ops.Sequential(Power(3)),
                ops.Sequential(Power(.5)),
                ops.Sequential(Sin()),
                ops.Sequential(Cos()),
                ops.Sequential(Abs_op()),
                ops.Sequential(Sign()),
#                 ops.Sequential(Beta_mul(channels=32)),
#                 ops.Sequential(Beta_add(channels=32)),
                ops.Sequential(Log()),
                ops.Sequential(Exp()),
                ops.Sequential(Sinh()),
                ops.Sequential(Cosh()),
                ops.Sequential(Tanh()),
                ops.Sequential(Asinh()),
                ops.Sequential(Acosh()),
                ops.Sequential(Atan()),
                ops.Sequential(Sinc()),
                ops.Sequential(Maximum0()),
                ops.Sequential(Minimum0()),
                ops.Sequential(Sigmoid()),
                ops.Sequential(LogExp()),
                ops.Sequential(Exp2()),
                ops.Sequential(Erf()),
#                 ops.Sequential(Beta(channels=32)),    
            ])

        for tup in [(4, 5), (9, 10)]:
            activation_cell.edges[tup[0], tup[1]].set("op", [
                ops.Sequential(Add()),
                ops.Sequential(Sub()),
                ops.Sequential(Mul()),
                ops.Sequential(Div()),
                ops.Sequential(Maximum()),
                ops.Sequential(Minimum()),
#                 ops.Sequential(ExpBetaSub2(channels=32)),
#                 ops.Sequential(ExpBetaSubAbs(channel=32)),
#                 ops.Sequential(BetaMix(channels=32)),
            ])

        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1)  # input
        self.add_node(2)  # intermediate
        self.add_node(3,
                      subgraph=activation_cell.copy().set_scope("activation_1").set_input([2]))  # activation cell 3
        self.nodes[3]['subgraph'].name = "activation_1"

        self.add_node(4)
        self.add_node(5,
                      subgraph=activation_cell.copy().set_scope("activation_2").set_input([4]))  # activation cell 3
        self.nodes[5]['subgraph'].name = "activation_2"

        self.add_node(6)
        self.add_node(7,
                      subgraph=activation_cell.copy().set_scope("activation_3").set_input([6]))  # activation cell 3
        self.nodes[7]['subgraph'].name = "activation_3"

        self.add_edges_from([
            (1, 2, EdgeData()),
            (2, 3, EdgeData()),
            (3, 4, EdgeData()),
            (4, 5, EdgeData()),
            (5, 6, EdgeData()),
            (3, 6, EdgeData()),
            (6, 7, EdgeData())
        ])

        self.edges[1, 2].set('op',
                             ops.Sequential(nn.Conv2d(3, 16, 3, padding=1), ))  # convolutional edge
        self.edges[3, 4].set('op',
                             ops.Sequential(nn.Conv2d(16, 16, 3, padding=1), ))  # convolutional edge
        self.edges[5, 6].set('op',
                             ops.Sequential(nn.Conv2d(16, 16, 3, padding=1), ))  # convolutional edge

        conv_option = {
            "in_channels": 16,
            "out_channels": 16,
            "kernel_size": 3,
            "padding": 1
        }
        self._create_base_block(7, 4, activation_cell, conv_option)
        self._create_base_block(11, 6, activation_cell, conv_option)

        conv_option_a = {
            "in_channels": 16,
            "out_channels": 32,
            "kernel_size": 3,
            "padding": 1,
            "stride": 2
        }
        conv_option_b = {
            "in_channels": 16,
            "out_channels": 32,
            "kernel_size": 1,
            "padding": 0,
            "stride": 2
        }
        self._create_reduction_block(15, 8, activation_cell, conv_option_a, conv_option_b)

        conv_option = {
            "in_channels": 32,
            "out_channels": 32,
            "kernel_size": 3,
            "padding": 1
        }
        self._create_base_block(19, 10, activation_cell, conv_option)
        self._create_base_block(23, 12, activation_cell, conv_option)

        conv_option_a = {
            "in_channels": 32,
            "out_channels": 64,
            "kernel_size": 3,
            "padding": 1,
            "stride": 2
        }
        conv_option_b = {
            "in_channels": 32,
            "out_channels": 64,
            "kernel_size": 1,
            "padding": 0,
            "stride": 2
        }
        self._create_reduction_block(27, 14, activation_cell, conv_option_a, conv_option_b)

        conv_option = {
            "in_channels": 64,
            "out_channels": 64,
            "kernel_size": 3,
            "padding": 1
        }
        self._create_base_block(31, 16, activation_cell, conv_option)
        self._create_base_block(34, 18, activation_cell, conv_option)

        # add head
        self.add_node(39)
        self.add_edges_from([
            (38, 39, EdgeData())
        ])
        self.edges[38, 39].set('op',
                               ops.Sequential(
                                   nn.AvgPool2d(8),
                                   nn.Flatten(),
                                   nn.Linear(64, 10),
                                   nn.Softmax()
                               ))  # convolutional edge
        self.add_node(40)
        self.add_edges_from([
            (39, 40, EdgeData().finalize())
        ])

    def _create_base_block(self, start: int, stage: int, cell, conv_option: dict):
        self.add_node(start + 1)

        self.add_node(start + 2, subgraph=cell.copy().set_scope(f"activation_{stage}").set_input(
            [start + 1]))  # activation cell 3
        self.nodes[start + 2]['subgraph'].name = f"activation_{stage}"

        self.add_node(start + 3)

        self.add_node(start + 4, subgraph=cell.copy().set_scope(f"activation_{stage + 1}").set_input(
            [start + 3]))  # activation cell 3
        self.nodes[start + 4]['subgraph'].name = f"activation_{stage + 1}"

        self.add_edges_from([
            (start, start + 1, EdgeData()),
            (start, start + 3, EdgeData()),
            (start + 1, start + 2, EdgeData()),
            (start + 2, start + 3, EdgeData()),
            (start + 3, start + 4, EdgeData()),
        ])

        self.edges[start, start + 1].set('op',
                                         ops.Sequential(nn.Conv2d(**conv_option), ))  # convolutional edge
        self.edges[start + 2, start + 3].set('op',
                                             ops.Sequential(nn.Conv2d(**conv_option), ))  # convolutional edge

    def _create_reduction_block(self, start: int, stage: int, cell, conv_option_a: dict, conv_option_b: dict):
        self.add_node(start + 1)

        self.add_node(start + 2, subgraph=cell.copy().set_scope(f"activation_{stage}").set_input(
            [start + 1]))  # activation cell 3
        self.nodes[start + 2]['subgraph'].name = f"activation_{stage}"

        self.add_node(start + 3)

        self.add_node(start + 4, subgraph=cell.copy().set_scope(f"activation_{stage + 1}").set_input(
            [start + 3]))  # activation cell 3
        self.nodes[start + 4]['subgraph'].name = f"activation_{stage + 1}"

        self.add_edges_from([
            (start, start + 1, EdgeData()),
            (start, start + 3, EdgeData()),  # add conv
            (start + 1, start + 2, EdgeData()),
            (start + 2, start + 3, EdgeData()),
            (start + 3, start + 4, EdgeData()),
        ])

        self.edges[start, start + 1].set('op',
                                         ops.Sequential(nn.Conv2d(**conv_option_a), ))  # convolutional edge
        conv_option_a["in_channels"] = conv_option_a["out_channels"]
        conv_option_a["stride"] = 1

        self.edges[start, start + 3].set('op',
                                         ops.Sequential(nn.Conv2d(**conv_option_b), ))  # convolutional edge
        self.edges[start + 2, start + 3].set('op',
                                             ops.Sequential(nn.Conv2d(**conv_option_a), ))  # convolutional edge

In [29]:
search_space = RNNResNet20SearchSpace()

In [30]:
optimizer = DARTSOptimizer(config)
optimizer.adapt_search_space(search_space)

[32m[07/07 10:56:47 nl.optimizers.oneshot.darts.optimizer]: [0mParsed graph:
Graph activation_1:
 Graph(
  (activation_1-edge(1,2)): MixedOp(
    (primitive-0): Identity()
    (primitive-1): Zero (stride=1)
    (primitive-2): Sequential(
      (op): Sequential(
        (0): Power()
      )
    )
    (primitive-3): Sequential(
      (op): Sequential(
        (0): Power()
      )
    )
    (primitive-4): Sequential(
      (op): Sequential(
        (0): Power()
      )
    )
    (primitive-5): Sequential(
      (op): Sequential(
        (0): Sin()
      )
    )
    (primitive-6): Sequential(
      (op): Sequential(
        (0): Cos()
      )
    )
    (primitive-7): Sequential(
      (op): Sequential(
        (0): Abs_op()
      )
    )
    (primitive-8): Sequential(
      (op): Sequential(
        (0): Sign()
      )
    )
    (primitive-9): Sequential(
      (op): Sequential(
        (0): Log()
      )
    )
    (primitive-10): Sequential(
      (op): Sequential(
        (0): Exp()
  

In [None]:
trainer = Trainer(optimizer, config)
trainer.search()

[32m[07/07 10:56:49 nl.defaults.trainer]: [0mparam size = 0.271690MB
[32m[07/07 10:56:49 nl.defaults.trainer]: [0mStart training
Files already downloaded and verified
Files already downloaded and verified
[32m[07/07 10:56:50 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
+0.001515, -0.000164, +0.001632, -0.000510, -0.000001, -0.000682, +0.001282, -0.001548, +0.000633, +0.000713, -0.001763, +0.001133, -0.001208, -0.001001, +0.000206, -0.000608, -0.001260, +0.000356, +0.000518, -0.001146, -0.000835, -0.000017, +0.000940, -0.000706, 2
+0.000334, -0.000353, -0.000279, -0.000459, +0.000727, +0.001899, +0.000475, +0.001939, +0.000308, -0.001569, +0.000574, -0.001367, +0.001725, +0.000065, -0.001500, -0.001313, -0.000132, +0.000525, -0.001490, -0.000912, -0.000180, +0.001335, -0.001060, -0.001355, 7
+0.001254, -0.001125, +0.001665, +0.000785, +0.000707, +0.001371, -0.000020, -0.000373, -0.001140, +0.000877, +0.000933, +0.002180, +0.000313, -0.0003

  input = module(input)


[32m[07/07 10:56:51 nl.defaults.trainer]: [0mEpoch 0-0, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:56:51 nl.defaults.trainer]: [0mcuda consumption
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 1            |        cudaMalloc retries: 2         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    3608 KB |    2329 MB |   30244 MB |   30241 MB |
|       from large pool |       0 KB |    2154 MB |    6912 MB |    6912 MB |
|       from small pool |    3608 KB |    1551 MB |   23332 MB |   23329 MB |
|---------------------------------------------------------------------------|
| Active memory         |    3608 KB |    2329 MB |   30244 MB |   30241 MB |
|       from large pool |       0 KB

[32m[07/07 10:56:56 nl.defaults.trainer]: [0mEpoch 0-7, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:57:01 nl.defaults.trainer]: [0mEpoch 0-14, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:57:07 nl.defaults.trainer]: [0mEpoch 0-21, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:57:12 nl.defaults.trainer]: [0mEpoch 0-28, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:57:17 nl.defaults.trainer]: [0mEpoch 0-35, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:57:22 nl.defaults.trainer]: [0mEpoch 0-42, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:57:27 nl.defaults.trainer]: [0mEpoch 0-49, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:57:33 nl.defaults.trainer]: [0mEpoch 0-56, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 10:57:38 nl.defaults.

[32m[07/07 11:02:51 nl.defaults.trainer]: [0mEpoch 0-483, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:02:56 nl.defaults.trainer]: [0mEpoch 0-490, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:03:01 nl.defaults.trainer]: [0mEpoch 0-497, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:03:06 nl.defaults.trainer]: [0mEpoch 0-504, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:03:12 nl.defaults.trainer]: [0mEpoch 0-511, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:03:17 nl.defaults.trainer]: [0mEpoch 0-518, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:03:22 nl.defaults.trainer]: [0mEpoch 0-525, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:03:27 nl.defaults.trainer]: [0mEpoch 0-532, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:03:32 nl.

[32m[07/07 11:08:46 nl.defaults.trainer]: [0mEpoch 0-959, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:08:51 nl.defaults.trainer]: [0mEpoch 0-966, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:08:56 nl.defaults.trainer]: [0mEpoch 0-973, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:09:01 nl.defaults.trainer]: [0mEpoch 0-980, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:09:07 nl.defaults.trainer]: [0mEpoch 0-987, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:09:12 nl.defaults.trainer]: [0mEpoch 0-994, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:09:17 nl.defaults.trainer]: [0mEpoch 0-1001, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:09:22 nl.defaults.trainer]: [0mEpoch 0-1008, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:09:27 n

[32m[07/07 11:14:35 nl.defaults.trainer]: [0mEpoch 0-1428, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:14:40 nl.defaults.trainer]: [0mEpoch 0-1435, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:14:45 nl.defaults.trainer]: [0mEpoch 0-1442, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:14:51 nl.defaults.trainer]: [0mEpoch 0-1449, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:14:56 nl.defaults.trainer]: [0mEpoch 0-1456, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:15:01 nl.defaults.trainer]: [0mEpoch 0-1463, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:15:06 nl.defaults.trainer]: [0mEpoch 0-1470, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:15:12 nl.defaults.trainer]: [0mEpoch 0-1477, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:1

[32m[07/07 11:20:25 nl.defaults.trainer]: [0mEpoch 0-1897, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:20:30 nl.defaults.trainer]: [0mEpoch 0-1904, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:20:35 nl.defaults.trainer]: [0mEpoch 0-1911, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:20:41 nl.defaults.trainer]: [0mEpoch 0-1918, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:20:46 nl.defaults.trainer]: [0mEpoch 0-1925, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:20:51 nl.defaults.trainer]: [0mEpoch 0-1932, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:20:56 nl.defaults.trainer]: [0mEpoch 0-1939, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:21:02 nl.defaults.trainer]: [0mEpoch 0-1946, Train loss: nan, validation loss: nan, learning rate: [0.025]
[32m[07/07 11:2

[32m[07/07 11:24:02 nl.defaults.trainer]: [0mEpoch 0 done. Train accuracy (top1, top5): 9.98000, 50.00286, Validation accuracy: 10.03199, 49.89145
[32m[07/07 11:24:02 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
+nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, 0
+nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, 0
+nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, 0
+nan, +nan, +nan, +nan, +nan, +nan, 0
+nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, 0
+nan, +nan, +nan, +nan, +nan, +nan, 0
+nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan, +nan

[32m[07/07 11:24:05 nl.defaults.trainer]: [0mEpoch 1-3, Train loss: nan, validation loss: nan, learning rate: [0.02499407872438878]
[32m[07/07 11:24:10 nl.defaults.trainer]: [0mEpoch 1-10, Train loss: nan, validation loss: nan, learning rate: [0.02499407872438878]
[32m[07/07 11:24:15 nl.defaults.trainer]: [0mEpoch 1-17, Train loss: nan, validation loss: nan, learning rate: [0.02499407872438878]
[32m[07/07 11:24:21 nl.defaults.trainer]: [0mEpoch 1-24, Train loss: nan, validation loss: nan, learning rate: [0.02499407872438878]
[32m[07/07 11:24:26 nl.defaults.trainer]: [0mEpoch 1-31, Train loss: nan, validation loss: nan, learning rate: [0.02499407872438878]
[32m[07/07 11:24:31 nl.defaults.trainer]: [0mEpoch 1-38, Train loss: nan, validation loss: nan, learning rate: [0.02499407872438878]
[32m[07/07 11:24:36 nl.defaults.trainer]: [0mEpoch 1-45, Train loss: nan, validation loss: nan, learning rate: [0.02499407872438878]
[32m[07/07 11:24:42 nl.defaults.trainer]: [0mEpoch 1-5

In [None]:
trainer.evaluate_oneshot()