In [1]:
import logging
from naslib.defaults.trainer import Trainer
from naslib.optimizers import DARTSOptimizer
from naslib.search_spaces import DartsSearchSpace
from naslib.utils import utils, setup_logger, get_config_from_args, set_seed, log_args
from naslib.search_spaces.core.graph import Graph, EdgeData
from naslib.search_spaces.core import primitives as ops
from torch import nn
from fvcore.common.config import CfgNode
from copy import deepcopy
from IPython.display import clear_output
import torch

device: cuda:0
device: cpu
device: cuda:0
device: cuda:0
device: cuda:0
device: cuda:0


In [8]:
config = utils.get_config_from_args(config_type='nas')
config.optimizer = 'darts'
utils.set_seed(config.seed)
clear_output(wait=True)
utils.log_args(config)

logger = setup_logger(config.save + '/log.log')
logger.setLevel(logging.INFO)

[32m[06/29 16:28:08 nl.utils.utils]: [0mdataset....................................cifar10
[32m[06/29 16:28:08 nl.utils.utils]: [0mseed.............................................0
[32m[06/29 16:28:08 nl.utils.utils]: [0msearch_space...........................nasbench201
[32m[06/29 16:28:08 nl.utils.utils]: [0mout_dir........................................run
[32m[06/29 16:28:08 nl.utils.utils]: [0moptimizer....................................darts
[32m[06/29 16:28:08 nl.utils.utils]: [0msearchacq_fn_optimization: random_sampling
acq_fn_type: its
arch_learning_rate: 0.0003
arch_weight_decay: 0.001
batch_size: 256
checkpoint_freq: 1000
cutout: False
cutout_length: 16
cutout_prob: 1.0
data_size: 25000
debug_predictor: False
drop_path_prob: 0.0
encoding_type: adjacency_one_hot
epochs: 100
fidelity: -1
gpu: None
grad_clip: 5
k: 10
learning_rate: 0.025
learning_rate_min: 0.001
max_mutations: 1
momentum: 0.9
num_arches_to_mutate: 2
num_candidates: 20
num_ensemble: 3
num_init: 1

In [41]:
class Minimum(ops.AbstractPrimitive):

    def __init__(self, **kwargs):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return torch.minimum(x[0], x[1])

    def get_embedded_ops(self):
        return None
    

class Maximum(ops.AbstractPrimitive):

    def __init__(self, **kwargs):
        super().__init__(locals())

    def forward(self, x, edge_data=None):
        return torch.maximum(x[0], x[1])

    def get_embedded_ops(self):
        return None

In [42]:
class SimpleSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'a_stage_2'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        stages = ['a_stage_1', 'a_stage_2']

        # cell definition
        activation_cell = Graph()
        activation_cell.name = 'activation_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2) # intermediate node
        activation_cell.add_node(3) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData())]) # mutable intermediate edge
        activation_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable output edge

        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        for i, scope in zip(range(3, 5), stages):
            self.add_node(i, subgraph=deepcopy(activation_cell).set_scope(scope).set_input([i-1])) # activation cell i
            self.nodes[i]['subgraph'].name = scope
        self.add_node(5) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 5)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        self.edges[4, 5].set('op', 
            ops.Sequential(
                nn.Linear(400, 10), 
                nn.Softmax(dim=1)
            )) # linear edge
        
        for scope in stages:
            self.update_edges(
                update_func=lambda edge: self._set_ops(edge),
                scope=scope,
                private_edge_data=True,
            )

    def _set_ops(self, edge):
        edge.data.set('op', [
            ops.Sequential(nn.ReLU()),
            ops.Sequential(nn.Hardswish()),
            ops.Sequential(nn.LeakyReLU()),
            ops.Sequential(nn.Identity())
        ])

In [43]:
class stack():
    def __init__(self):
        pass
    def __call__(self, tensors, edges_data=None):
        return torch.stack(tensors)
    

class ComplexSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'u_stage_1',
        'u_stage_2',
        'b_stage_1'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        u_stages = ['u_stage_1', 'u_stage_2']
        
        # unary cell definition
        unary_cell = Graph()
        unary_cell.name = 'u_cell'
        unary_cell.add_node(1) # input node
        unary_cell.add_node(2) # intermediate node
        unary_cell.add_node(3) # output node
        unary_cell.add_edges_from([(1, 2, EdgeData())]) # mutable edge
        unary_cell.edges[1, 2].set('cell_name', 'u_cell')
        unary_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable edge
        
        # binary cell definition
        binary_cell = Graph()
        binary_cell.name = 'b_cell'
        binary_cell.add_node(1) # input node
        binary_cell.add_node(2) # input node
        binary_cell.add_node(3) # concatination node
        binary_cell.nodes[3]['comb_op'] = stack()
        binary_cell.add_node(4) # intermediate node
        binary_cell.add_node(5) # output node
        binary_cell.add_edges_from([(3, 4, EdgeData())]) # mutable edge
        binary_cell.edges[3, 4].set('cell_name', 'b_cell') 
        binary_cell.add_edges_from([(1, 3, EdgeData().finalize()),
                                    (2, 3, EdgeData().finalize()),
                                    (4, 5, EdgeData().finalize())]) # immutable edges
        
        # activation cell definition
        activation_cell = Graph()
        activation_cell.name = 'a_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2, subgraph=deepcopy(unary_cell).set_scope('u_stage_1').set_input([1])) # unary node
        activation_cell.nodes[2]['subgraph'].name = 'u_stage_1'
        activation_cell.add_node(3, subgraph=deepcopy(unary_cell).set_scope('u_stage_2').set_input([1])) # unary node
        activation_cell.nodes[3]['subgraph'].name = 'u_stage_2'
        activation_cell.add_node(4, subgraph=deepcopy(binary_cell).set_scope('b_stage_1').set_input([2, 3])) # binary node
        activation_cell.nodes[4]['subgraph'].name = 'b_stage_1'
        activation_cell.add_node(5) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData().finalize()), 
                                        (1, 3, EdgeData().finalize()),
                                        (2, 4, EdgeData().finalize()),
                                        (3, 4, EdgeData().finalize()), 
                                        (4, 5, EdgeData().finalize())])
        
        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        self.add_node(3, subgraph=deepcopy(activation_cell).set_input([2])) # activation cell
        self.nodes[3]['subgraph'].name = 'a_stage_1'
        self.add_node(4) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 4)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        self.edges[3, 4].set('op', 
            ops.Sequential(
                nn.Linear(400, 10), 
                nn.Softmax(dim=1)
            )) # linear edge
        
        for scope in u_stages:
            self.update_edges(
                update_func=lambda edge: self._set_unary_ops(edge),
                scope=scope,
                private_edge_data=True,
            ) # set unary cell ops
        
        self.update_edges(
            update_func=lambda edge: self._set_binary_ops(edge),
            scope='b_stage_1',
            private_edge_data=True
        ) # set binary cell ops
        

    def _set_unary_ops(self, edge):
        edge.data.set('op', [ops.Identity(), ops.Zero(stride=1)]) 
        
        
    def _set_binary_ops(self, edge):
        edge.data.set('op', [Minimum(), Maximum()]) 

In [44]:
search_space = ComplexSearchSpace()

In [45]:
optimizer = DARTSOptimizer(config)
optimizer.adapt_search_space(search_space)

[32m[06/29 16:47:50 nl.optimizers.oneshot.darts.optimizer]: [0mParsed graph:
Graph a_stage_1:
 Graph(
  (a_stage_1-edge(1,2)): Identity()
  (a_stage_1-edge(1,3)): Identity()
  (a_stage_1-subgraph_at(2)): Graph u_stage_1-0.1007012, scope u_stage_1, 3 nodes
  (a_stage_1-edge(2,4)): Identity()
  (a_stage_1-subgraph_at(3)): Graph u_stage_2-0.1007012, scope u_stage_2, 3 nodes
  (a_stage_1-edge(3,4)): Identity()
  (a_stage_1-subgraph_at(4)): Graph b_stage_1-0.4341718, scope b_stage_1, 5 nodes
  (a_stage_1-edge(4,5)): Identity()
)
Graph b_stage_1:
 Graph(
  (b_stage_1-edge(1,3)): Identity()
  (b_stage_1-edge(2,3)): Identity()
  (b_stage_1-edge(3,4)): MixedOp(
    (primitive-0): Minimum()
    (primitive-1): Maximum()
  )
  (b_stage_1-edge(4,5)): Identity()
)
Graph u_stage_1:
 Graph(
  (u_stage_1-edge(1,2)): MixedOp(
    (primitive-0): Identity()
    (primitive-1): Zero (stride=1)
  )
  (u_stage_1-edge(2,3)): Identity()
)
Graph u_stage_2:
 Graph(
  (u_stage_2-edge(1,2)): MixedOp(
    (primiti

In [46]:
trainer = Trainer(optimizer, config)
trainer.search()

[32m[06/29 16:47:51 nl.defaults.trainer]: [0mparam size = 0.006882MB
[32m[06/29 16:47:51 nl.defaults.trainer]: [0mStart training
Files already downloaded and verified
Files already downloaded and verified
[32m[06/29 16:47:52 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.000181, -0.000723, 0
+0.000181, -0.001192, 0
+0.000235, -0.000816, 0
[32m[06/29 16:47:53 nl.defaults.trainer]: [0mEpoch 0-0, Train loss: 2.30400, validation loss: 2.30305, learning rate: [0.025]
[32m[06/29 16:47:53 nl.defaults.trainer]: [0mcuda consumption
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |   

[32m[06/29 16:47:58 nl.defaults.trainer]: [0mEpoch 0-29, Train loss: 2.29714, validation loss: 2.29821, learning rate: [0.025]
[32m[06/29 16:48:03 nl.defaults.trainer]: [0mEpoch 0-59, Train loss: 2.24897, validation loss: 2.24524, learning rate: [0.025]
[32m[06/29 16:48:08 nl.defaults.trainer]: [0mEpoch 0-88, Train loss: 2.23534, validation loss: 2.21499, learning rate: [0.025]
[32m[06/29 16:48:14 nl.defaults.trainer]: [0mEpoch 0-118, Train loss: 2.17038, validation loss: 2.23552, learning rate: [0.025]
[32m[06/29 16:48:17 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([ 0.0002, -0.0002], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.0435, -0.0445], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.0436, -0.0442], device='cuda:0', requires_grad=True)]
[32m[06/29 16:48:18 nl.defaults.trainer]: [0mEpoch 0 done. Train accuracy (top1, top5): 20.32286, 65.93429, Validation

[32m[06/29 16:50:26 nl.defaults.trainer]: [0mEpoch 6-59, Train loss: 2.07154, validation loss: 2.01475, learning rate: [0.02478744700874427]
[32m[06/29 16:50:31 nl.defaults.trainer]: [0mEpoch 6-89, Train loss: 2.04304, validation loss: 2.09309, learning rate: [0.02478744700874427]
[32m[06/29 16:50:36 nl.defaults.trainer]: [0mEpoch 6-119, Train loss: 2.07289, validation loss: 2.07034, learning rate: [0.02478744700874427]
[32m[06/29 16:50:39 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.0019,  0.0019], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.1640, -0.1649], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.1638, -0.1644], device='cuda:0', requires_grad=True)]
[32m[06/29 16:50:39 nl.defaults.trainer]: [0mEpoch 6 done. Train accuracy (top1, top5): 38.77429, 83.17714, Validation accuracy: 38.22708, 82.88948
[32m[06/29 16:50:39 nl.optimizers.oneshot.darts.optimize

[32m[06/29 16:52:49 nl.defaults.trainer]: [0mEpoch 12-66, Train loss: 1.97799, validation loss: 2.04178, learning rate: [0.02415731783065902]
[32m[06/29 16:52:54 nl.defaults.trainer]: [0mEpoch 12-95, Train loss: 2.02713, validation loss: 2.06236, learning rate: [0.02415731783065902]
[32m[06/29 16:52:59 nl.defaults.trainer]: [0mEpoch 12-124, Train loss: 2.02625, validation loss: 2.03613, learning rate: [0.02415731783065902]
[32m[06/29 16:53:01 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.0077,  0.0077], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.2398, -0.2407], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.2366, -0.2370], device='cuda:0', requires_grad=True)]
[32m[06/29 16:53:01 nl.defaults.trainer]: [0mEpoch 12 done. Train accuracy (top1, top5): 43.85143, 87.68571, Validation accuracy: 42.86040, 87.29756
[32m[06/29 16:53:01 nl.optimizers.oneshot.darts.opti

[32m[06/29 16:55:12 nl.defaults.trainer]: [0mEpoch 18-69, Train loss: 1.97087, validation loss: 2.00626, learning rate: [0.023131935106024185]
[32m[06/29 16:55:17 nl.defaults.trainer]: [0mEpoch 18-99, Train loss: 1.96976, validation loss: 2.00142, learning rate: [0.023131935106024185]
[32m[06/29 16:55:22 nl.defaults.trainer]: [0mEpoch 18-129, Train loss: 1.98807, validation loss: 2.02667, learning rate: [0.023131935106024185]
[32m[06/29 16:55:23 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.1066,  0.1066], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.2996, -0.3004], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.2365, -0.2369], device='cuda:0', requires_grad=True)]
[32m[06/29 16:55:23 nl.defaults.trainer]: [0mEpoch 18 done. Train accuracy (top1, top5): 46.68571, 89.32857, Validation accuracy: 45.54630, 88.65192
[32m[06/29 16:55:23 nl.optimizers.oneshot.darts.o

[32m[06/29 16:57:35 nl.defaults.trainer]: [0mEpoch 24-78, Train loss: 2.00406, validation loss: 2.00500, learning rate: [0.02174762352905694]
[32m[06/29 16:57:40 nl.defaults.trainer]: [0mEpoch 24-108, Train loss: 1.93881, validation loss: 1.97700, learning rate: [0.02174762352905694]
[32m[06/29 16:57:45 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.2740,  0.2740], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.4179, -0.4186], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.1333, -0.1337], device='cuda:0', requires_grad=True)]
[32m[06/29 16:57:45 nl.defaults.trainer]: [0mEpoch 24 done. Train accuracy (top1, top5): 48.75714, 90.04857, Validation accuracy: 47.87865, 90.14884
[32m[06/29 16:57:45 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.274009, +0.274009, 1
+0.417941, -0.418618, 0
+0.133333, -0.133707, 0
[32m[06/29 16:5

[32m[06/29 16:59:57 nl.defaults.trainer]: [0mEpoch 30-76, Train loss: 1.91114, validation loss: 1.97223, learning rate: [0.020053423027509686]
[32m[06/29 17:00:02 nl.defaults.trainer]: [0mEpoch 30-106, Train loss: 1.95163, validation loss: 1.97738, learning rate: [0.020053423027509686]
[32m[06/29 17:00:07 nl.defaults.trainer]: [0mEpoch 30-135, Train loss: 1.92003, validation loss: 1.98991, learning rate: [0.020053423027509686]
[32m[06/29 17:00:07 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.3446,  0.3446], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.4863, -0.4869], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.0695, -0.0699], device='cuda:0', requires_grad=True)]
[32m[06/29 17:00:07 nl.defaults.trainer]: [0mEpoch 30 done. Train accuracy (top1, top5): 50.00857, 90.93143, Validation accuracy: 49.46966, 90.58223
[32m[06/29 17:00:07 nl.optimizers.oneshot.darts.

[32m[06/29 17:02:21 nl.defaults.trainer]: [0mEpoch 36-88, Train loss: 1.94927, validation loss: 1.93521, learning rate: [0.018109351498780877]
[32m[06/29 17:02:26 nl.defaults.trainer]: [0mEpoch 36-118, Train loss: 1.93874, validation loss: 1.93164, learning rate: [0.018109351498780877]
[32m[06/29 17:02:29 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.3917,  0.3917], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.5270, -0.5276], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.0209, -0.0213], device='cuda:0', requires_grad=True)]
[32m[06/29 17:02:29 nl.defaults.trainer]: [0mEpoch 36 done. Train accuracy (top1, top5): 51.47143, 91.50286, Validation accuracy: 50.43910, 91.13823
[32m[06/29 17:02:29 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.391667, +0.391667, 1
+0.527040, -0.527633, 0
+0.020941, -0.021281, 0
[32m[06/29 17

[32m[06/29 17:04:43 nl.defaults.trainer]: [0mEpoch 42-94, Train loss: 1.90924, validation loss: 1.95648, learning rate: [0.015984278645978258]
[32m[06/29 17:04:48 nl.defaults.trainer]: [0mEpoch 42-123, Train loss: 1.92022, validation loss: 1.91043, learning rate: [0.015984278645978258]
[32m[06/29 17:04:50 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.4542,  0.4542], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.5813, -0.5818], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.0426,  0.0423], device='cuda:0', requires_grad=True)]
[32m[06/29 17:04:50 nl.defaults.trainer]: [0mEpoch 42 done. Train accuracy (top1, top5): 52.37143, 91.49714, Validation accuracy: 51.89040, 91.19811
[32m[06/29 17:04:50 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.454154, +0.454154, 1
+0.581273, -0.581836, 0
-0.042585, +0.042254, 1
[32m[06/29 17

[32m[06/29 17:07:06 nl.defaults.trainer]: [0mEpoch 48-100, Train loss: 1.90583, validation loss: 1.92787, learning rate: [0.013753486234351759]
[32m[06/29 17:07:11 nl.defaults.trainer]: [0mEpoch 48-129, Train loss: 1.93382, validation loss: 1.95207, learning rate: [0.013753486234351759]
[32m[06/29 17:07:12 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.4809,  0.4809], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.6069, -0.6075], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.0678,  0.0675], device='cuda:0', requires_grad=True)]
[32m[06/29 17:07:12 nl.defaults.trainer]: [0mEpoch 48 done. Train accuracy (top1, top5): 53.12571, 91.96857, Validation accuracy: 52.88549, 91.71704
[32m[06/29 17:07:12 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.480936, +0.480936, 1
+0.606941, -0.607478, 0
-0.067825, +0.067503, 1
[32m[06/29 1

[32m[06/29 17:09:28 nl.defaults.trainer]: [0mEpoch 54-106, Train loss: 1.92014, validation loss: 1.90585, learning rate: [0.01149600119722834]
[32m[06/29 17:09:33 nl.defaults.trainer]: [0mEpoch 54-136, Train loss: 1.89447, validation loss: 1.94558, learning rate: [0.01149600119722834]
[32m[06/29 17:09:33 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.5176,  0.5176], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.6484, -0.6489], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.0978,  0.0975], device='cuda:0', requires_grad=True)]
[32m[06/29 17:09:33 nl.defaults.trainer]: [0mEpoch 54 done. Train accuracy (top1, top5): 54.05714, 92.20000, Validation accuracy: 54.19708, 92.04208
[32m[06/29 17:09:33 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.517612, +0.517612, 1
+0.648423, -0.648935, 0
-0.097802, +0.097486, 1
[32m[06/29 17:

[32m[06/29 17:11:51 nl.defaults.trainer]: [0mEpoch 60-115, Train loss: 1.89199, validation loss: 1.93643, learning rate: [0.009291796067500625]
[32m[06/29 17:11:55 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.5616,  0.5616], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.6944, -0.6949], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.1356,  0.1353], device='cuda:0', requires_grad=True)]
[32m[06/29 17:11:55 nl.defaults.trainer]: [0mEpoch 60 done. Train accuracy (top1, top5): 55.21429, 92.39714, Validation accuracy: 54.33109, 92.12192
[32m[06/29 17:11:55 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.561574, +0.561574, 1
+0.694419, -0.694908, 0
-0.135572, +0.135263, 1
[32m[06/29 17:11:56 nl.defaults.trainer]: [0mEpoch 61-8, Train loss: 1.92079, validation loss: 1.90504, learning rate: [0.008935144957056492]
[32m[06/29 17:

[32m[06/29 17:14:14 nl.defaults.trainer]: [0mEpoch 66-122, Train loss: 1.97629, validation loss: 1.92971, learning rate: [0.007218955910779407]
[32m[06/29 17:14:16 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.6100,  0.6100], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.7464, -0.7469], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.1764,  0.1761], device='cuda:0', requires_grad=True)]
[32m[06/29 17:14:16 nl.defaults.trainer]: [0mEpoch 66 done. Train accuracy (top1, top5): 55.84000, 92.67429, Validation accuracy: 55.18932, 92.27019
[32m[06/29 17:14:16 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.610038, +0.610038, 1
+0.746398, -0.746862, 0
-0.176374, +0.176071, 1
[32m[06/29 17:14:19 nl.defaults.trainer]: [0mEpoch 67-15, Train loss: 1.93341, validation loss: 1.89031, learning rate: [0.006891503010995536]
[32m[06/29 17

[32m[06/29 17:16:37 nl.defaults.trainer]: [0mEpoch 72-132, Train loss: 1.89048, validation loss: 1.90976, learning rate: [0.005350912123015718]
[32m[06/29 17:16:38 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.6309,  0.6309], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.7754, -0.7759], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.1870,  0.1867], device='cuda:0', requires_grad=True)]
[32m[06/29 17:16:38 nl.defaults.trainer]: [0mEpoch 72 done. Train accuracy (top1, top5): 56.28286, 92.53714, Validation accuracy: 55.53718, 92.40990
[32m[06/29 17:16:38 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.630901, +0.630901, 1
+0.775411, -0.775853, 0
-0.187028, +0.186730, 1
[32m[06/29 17:16:42 nl.defaults.trainer]: [0mEpoch 73-24, Train loss: 1.96042, validation loss: 1.94817, learning rate: [0.0050642576161161745]
[32m[06/29 1

[32m[06/29 17:19:00 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.6320,  0.6320], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.7901, -0.7905], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.1751,  0.1748], device='cuda:0', requires_grad=True)]
[32m[06/29 17:19:00 nl.defaults.trainer]: [0mEpoch 78 done. Train accuracy (top1, top5): 56.94000, 92.93429, Validation accuracy: 56.15591, 92.67222
[32m[06/29 17:19:00 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.631996, +0.631996, 1
+0.790113, -0.790533, 0
-0.175121, +0.174829, 1
[32m[06/29 17:19:01 nl.defaults.trainer]: [0mEpoch 79-4, Train loss: 1.93377, validation loss: 1.89864, learning rate: [0.0035181398514917118]
[32m[06/29 17:19:06 nl.defaults.trainer]: [0mEpoch 79-34, Train loss: 1.86696, validation loss: 1.89906, learning rate: [0.0035181398514917118]
[32m[06/29 17

[32m[06/29 17:21:22 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.6708,  0.6708], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.8405, -0.8409], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.2005,  0.2002], device='cuda:0', requires_grad=True)]
[32m[06/29 17:21:22 nl.defaults.trainer]: [0mEpoch 84 done. Train accuracy (top1, top5): 57.38857, 92.92286, Validation accuracy: 56.37260, 92.80908
[32m[06/29 17:21:22 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.670785, +0.670785, 1
+0.840490, -0.840888, 0
-0.200530, +0.200242, 1
[32m[06/29 17:21:24 nl.defaults.trainer]: [0mEpoch 85-9, Train loss: 1.92585, validation loss: 1.92219, learning rate: [0.0023079217097395845]
[32m[06/29 17:21:29 nl.defaults.trainer]: [0mEpoch 85-39, Train loss: 1.90481, validation loss: 1.85242, learning rate: [0.0023079217097395845]
[32m[06/29 17

[32m[06/29 17:23:43 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.6836,  0.6836], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.8652, -0.8656], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.1989,  0.1986], device='cuda:0', requires_grad=True)]
[32m[06/29 17:23:43 nl.defaults.trainer]: [0mEpoch 90 done. Train accuracy (top1, top5): 57.70571, 93.03143, Validation accuracy: 56.75182, 92.67222
[32m[06/29 17:23:43 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.683575, +0.683575, 1
+0.865191, -0.865567, 0
-0.198873, +0.198591, 1
[32m[06/29 17:23:47 nl.defaults.trainer]: [0mEpoch 91-17, Train loss: 1.90843, validation loss: 1.89098, learning rate: [0.0014764757718766838]
[32m[06/29 17:23:52 nl.defaults.trainer]: [0mEpoch 91-47, Train loss: 1.89648, validation loss: 1.93178, learning rate: [0.0014764757718766838]
[32m[06/29 1

[32m[06/29 17:26:06 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.6945,  0.6945], device='cuda:0', requires_grad=True), Parameter containing:
tensor([ 0.8845, -0.8849], device='cuda:0', requires_grad=True), Parameter containing:
tensor([-0.1971,  0.1968], device='cuda:0', requires_grad=True)]
[32m[06/29 17:26:06 nl.defaults.trainer]: [0mEpoch 96 done. Train accuracy (top1, top5): 57.56571, 93.01143, Validation accuracy: 56.26426, 92.57242
[32m[06/29 17:26:06 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.694516, +0.694516, 1
+0.884497, -0.884851, 0
-0.197071, +0.196793, 1
[32m[06/29 17:26:10 nl.defaults.trainer]: [0mEpoch 97-22, Train loss: 1.83256, validation loss: 1.86518, learning rate: [0.00105325642476304]
[32m[06/29 17:26:15 nl.defaults.trainer]: [0mEpoch 97-52, Train loss: 1.86873, validation loss: 1.89520, learning rate: [0.00105325642476304]
[32m[06/29 17:26

In [47]:
trainer.evaluate_oneshot()

[32m[06/29 18:09:42 nl.defaults.trainer]: [0mStart one-shot evaluation
Files already downloaded and verified
Files already downloaded and verified
[32m[06/29 18:09:48 nl.defaults.trainer]: [0mEvaluation finished


56.85999998982747