In [1]:
import logging
from naslib.defaults.trainer import Trainer
from naslib.optimizers import DARTSOptimizer
from naslib.search_spaces import DartsSearchSpace
from naslib.utils import utils, setup_logger, get_config_from_args, set_seed, log_args
from naslib.search_spaces.core.graph import Graph, EdgeData
from naslib.search_spaces.core import primitives as ops
from torch import nn
from fvcore.common.config import CfgNode
from copy import deepcopy
from IPython.display import clear_output

device: cuda:0
device: cpu
device: cuda:0
device: cuda:0
device: cuda:0
device: cuda:0


In [8]:
config = utils.get_config_from_args(config_type='nas')
# config.search.epochs = 1 # for testing
config.optimizer = 'darts'
utils.set_seed(config.seed)
clear_output(wait=True)
utils.log_args(config)

logger = setup_logger(config.save + '/log.log')
logger.setLevel(logging.INFO)

[32m[06/27 10:13:28 nl.utils.utils]: [0mdataset....................................cifar10
[32m[06/27 10:13:28 nl.utils.utils]: [0mseed.............................................0
[32m[06/27 10:13:28 nl.utils.utils]: [0msearch_space...........................nasbench201
[32m[06/27 10:13:28 nl.utils.utils]: [0mout_dir........................................run
[32m[06/27 10:13:28 nl.utils.utils]: [0moptimizer....................................darts
[32m[06/27 10:13:28 nl.utils.utils]: [0msearchacq_fn_optimization: random_sampling
acq_fn_type: its
arch_learning_rate: 0.0003
arch_weight_decay: 0.001
batch_size: 256
checkpoint_freq: 1000
cutout: False
cutout_length: 16
cutout_prob: 1.0
data_size: 25000
debug_predictor: False
drop_path_prob: 0.0
encoding_type: adjacency_one_hot
epochs: 100
fidelity: -1
gpu: None
grad_clip: 5
k: 10
learning_rate: 0.025
learning_rate_min: 0.001
max_mutations: 1
momentum: 0.9
num_arches_to_mutate: 2
num_candidates: 20
num_ensemble: 3
num_init: 1

In [9]:
from naslib.search_spaces.core.graph import Graph, EdgeData
from naslib.search_spaces.core import primitives as ops
from torch import nn
from copy import deepcopy

class DartsSearchSpace(Graph):

    OPTIMIZER_SCOPE = [
        'a_stage_1',
        'a_stage_2', 
        'a_stage_3'
    ]

    QUERYABLE = False

    def __init__(self):
        super().__init__()

        channels = [(16 * 5 * 5, 120), (120, 84), (84, 10)]
        stages = ['a_stage_1', 'a_stage_2', 'a_stage_3']

        # cell definition
        activation_cell = Graph()
        activation_cell.name = 'activation_cell'
        activation_cell.add_node(1) # input node
        activation_cell.add_node(2) # intermediate node
        activation_cell.add_node(3) # output node
        activation_cell.add_edges_from([(1, 2, EdgeData())]) # mutable intermediate edge
        activation_cell.edges[1, 2].set('cell_name', 'activation_cell') 
        activation_cell.add_edges_from([(2, 3, EdgeData().finalize())]) # immutable output edge

        # macroarchitecture definition
        self.name = 'makrograph'
        self.add_node(1) # input node
        self.add_node(2) # intermediate node
        for i, scope in zip(range(3, 6), stages):
            self.add_node(i, subgraph=deepcopy(activation_cell).set_scope(scope).set_input([i-1])) # activation node i
            self.nodes[i]['subgraph'].name = scope # set 
        self.add_node(6) # output node
        self.add_edges_from([(i, i+1, EdgeData()) for i in range(1, 6)])
        self.edges[1, 2].set('op',
            ops.Sequential(
                nn.Conv2d(3, 6, 5),
                nn.MaxPool2d(2),
                nn.Conv2d(6, 16, 5),
                nn.MaxPool2d(2),
                nn.Flatten()
            )) # convolutional edge
        
        for scope, (in_dim, out_dim) in zip(stages, channels):
            self.update_edges(
                update_func=lambda edge: self._set_ops(edge, in_dim, out_dim),
                scope=scope,
                private_edge_data=True,
            )

    def _set_ops(self, edge, in_dim, out_dim):
        if out_dim != 10:
            edge.data.set('op', [
                ops.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU()),
                ops.Sequential(nn.Linear(in_dim, out_dim), nn.Hardswish()),
                ops.Sequential(nn.Linear(in_dim, out_dim), nn.LeakyReLU()),
                ops.Sequential(nn.Linear(in_dim, out_dim), nn.Identity())
            ])
        else:
            edge.data.set('op', [
                ops.Sequential(nn.Linear(in_dim, out_dim), nn.Softmax(dim=1))
            ])  

In [10]:
search_space = DartsSearchSpace()

In [11]:
optimizer = DARTSOptimizer(config)
optimizer.adapt_search_space(search_space)

[32m[06/27 10:13:37 nl.optimizers.oneshot.darts.optimizer]: [0mParsed graph:
Graph a_stage_1:
 Graph(
  (a_stage_1-edge(1,2)): MixedOp(
    (primitive-0): Sequential(
      (op): Sequential(
        (0): Linear(in_features=400, out_features=120, bias=True)
        (1): ReLU()
      )
    )
    (primitive-1): Sequential(
      (op): Sequential(
        (0): Linear(in_features=400, out_features=120, bias=True)
        (1): Hardswish()
      )
    )
    (primitive-2): Sequential(
      (op): Sequential(
        (0): Linear(in_features=400, out_features=120, bias=True)
        (1): LeakyReLU(negative_slope=0.01)
      )
    )
    (primitive-3): Sequential(
      (op): Sequential(
        (0): Linear(in_features=400, out_features=120, bias=True)
        (1): Identity()
      )
    )
  )
  (a_stage_1-edge(2,3)): Identity()
)
Graph a_stage_2:
 Graph(
  (a_stage_2-edge(1,2)): MixedOp(
    (primitive-0): Sequential(
      (op): Sequential(
        (0): Linear(in_features=120, out_features=84,

In [12]:
trainer = Trainer(optimizer, config)
trainer.search()

[32m[06/27 10:13:38 nl.defaults.trainer]: [0mparam size = 0.236858MB
[32m[06/27 10:13:38 nl.defaults.trainer]: [0mStart training
Files already downloaded and verified
Files already downloaded and verified
[32m[06/27 10:13:39 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.000529, +0.001313, +0.000432, +0.000184, 1
+0.000824, -0.001669, +0.000387, +0.000960, 3
+0.001344, 0
[32m[06/27 10:13:40 nl.defaults.trainer]: [0mEpoch 0-0, Train loss: 2.30296, validation loss: 2.30231, learning rate: [0.025]
[32m[06/27 10:13:45 nl.defaults.trainer]: [0mEpoch 0-30, Train loss: 2.30282, validation loss: 2.30232, learning rate: [0.025]
[32m[06/27 10:13:50 nl.defaults.trainer]: [0mEpoch 0-60, Train loss: 2.30240, validation loss: 2.30287, learning rate: [0.025]
[32m[06/27 10:13:55 nl.defaults.trainer]: [0mEpoch 0-90, Train loss: 2.30253, validation loss: 2.30225, learning rate: [0.025]
[32m[06/27 10:14:00 nl.defaults.trainer]: [0mEpoch 0-120, Tr

[32m[06/27 10:15:58 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.3235, -0.3281, -0.3506,  0.4281], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2555, -0.2628, -0.2551,  0.3309], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([1.1911e-43], device='cuda:0', requires_grad=True)]
[32m[06/27 10:15:58 nl.defaults.trainer]: [0mEpoch 5 done. Train accuracy (top1, top5): 25.34000, 72.82000, Validation accuracy: 26.10059, 72.66195
[32m[06/27 10:15:58 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.323537, -0.328100, -0.350614, +0.428123, 3
-0.255544, -0.262764, -0.255105, +0.330919, 3
+0.000000, 0
[32m[06/27 10:16:02 nl.defaults.trainer]: [0mEpoch 6-20, Train loss: 2.18517, validation loss: 2.14826, learning rate: [0.02478744700874427]
[32m[06/27 10:16:07 nl.defaults.trainer]: [0mEpoch 6-50, Train loss: 2.20430, valida

[32m[06/27 10:18:10 nl.defaults.trainer]: [0mEpoch 11-88, Train loss: 2.12945, validation loss: 2.11816, learning rate: [0.02429056922745071]
[32m[06/27 10:18:15 nl.defaults.trainer]: [0mEpoch 11-118, Train loss: 2.11600, validation loss: 2.10283, learning rate: [0.02429056922745071]
[32m[06/27 10:18:18 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.2908, -0.2723, -0.3173,  0.4096], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2758, -0.2630, -0.2219,  0.3516], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([1.1911e-43], device='cuda:0', requires_grad=True)]
[32m[06/27 10:18:18 nl.defaults.trainer]: [0mEpoch 11 done. Train accuracy (top1, top5): 35.25429, 81.03429, Validation accuracy: 34.34934, 80.16081
[32m[06/27 10:18:18 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.290776, -0.272266, -0.317259, +0.409648, 

[32m[06/27 10:20:17 nl.defaults.trainer]: [0mEpoch 17-11, Train loss: 2.08191, validation loss: 2.02331, learning rate: [0.023328904324047328]
[32m[06/27 10:20:22 nl.defaults.trainer]: [0mEpoch 17-42, Train loss: 2.06835, validation loss: 2.09555, learning rate: [0.023328904324047328]
[32m[06/27 10:20:27 nl.defaults.trainer]: [0mEpoch 17-72, Train loss: 2.03738, validation loss: 2.03639, learning rate: [0.023328904324047328]
[32m[06/27 10:20:32 nl.defaults.trainer]: [0mEpoch 17-102, Train loss: 2.05480, validation loss: 2.05313, learning rate: [0.023328904324047328]
[32m[06/27 10:20:37 nl.defaults.trainer]: [0mEpoch 17-132, Train loss: 2.04586, validation loss: 2.03606, learning rate: [0.023328904324047328]
[32m[06/27 10:20:38 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.2299, -0.2131, -0.2790,  0.3594], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2752, -0.2143, -0.2151,  0

[32m[06/27 10:22:33 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.191816, -0.181581, -0.246612, +0.325780, 3
-0.265507, -0.215335, -0.239729, +0.360228, 3
+0.000000, 0
[32m[06/27 10:22:34 nl.defaults.trainer]: [0mEpoch 23-2, Train loss: 2.03155, validation loss: 2.00680, learning rate: [0.022001332835565518]
[32m[06/27 10:22:39 nl.defaults.trainer]: [0mEpoch 23-32, Train loss: 2.00301, validation loss: 2.04757, learning rate: [0.022001332835565518]
[32m[06/27 10:22:44 nl.defaults.trainer]: [0mEpoch 23-62, Train loss: 2.02952, validation loss: 2.03021, learning rate: [0.022001332835565518]
[32m[06/27 10:22:49 nl.defaults.trainer]: [0mEpoch 23-92, Train loss: 1.97464, validation loss: 2.05167, learning rate: [0.022001332835565518]
[32m[06/27 10:22:54 nl.defaults.trainer]: [0mEpoch 23-122, Train loss: 2.04171, validation loss: 2.03158, learning rate: [0.022001332835565518]
[32m[06/27 10:22:56 nl.optimizers.oneshot.darts.optimizer]: 

[32m[06/27 10:24:52 nl.defaults.trainer]: [0mEpoch 28 done. Train accuracy (top1, top5): 47.51143, 90.51143, Validation accuracy: 47.11736, 90.47958
[32m[06/27 10:24:52 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.127149, -0.127609, -0.201032, +0.267145, 3
-0.267184, -0.195244, -0.258498, +0.369733, 3
+0.000000, 0
[32m[06/27 10:24:56 nl.defaults.trainer]: [0mEpoch 29-21, Train loss: 1.98537, validation loss: 2.02530, learning rate: [0.020354884643835724]
[32m[06/27 10:25:01 nl.defaults.trainer]: [0mEpoch 29-51, Train loss: 2.01823, validation loss: 2.00136, learning rate: [0.020354884643835724]
[32m[06/27 10:25:06 nl.defaults.trainer]: [0mEpoch 29-81, Train loss: 1.94557, validation loss: 1.98259, learning rate: [0.020354884643835724]
[32m[06/27 10:25:11 nl.defaults.trainer]: [0mEpoch 29-112, Train loss: 2.02485, validation loss: 1.95680, learning rate: [0.020354884643835724]
[32m[06/27 10:25:15 nl.optimizers.oneshot.darts.optim

[32m[06/27 10:27:10 nl.defaults.trainer]: [0mEpoch 34 done. Train accuracy (top1, top5): 50.53143, 91.77429, Validation accuracy: 49.42119, 91.72274
[32m[06/27 10:27:10 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.089456, -0.086846, -0.166087, +0.226517, 3
-0.268539, -0.185934, -0.272110, +0.380117, 3
+0.000000, 0
[32m[06/27 10:27:13 nl.defaults.trainer]: [0mEpoch 35-16, Train loss: 1.97568, validation loss: 1.94666, learning rate: [0.018447885996874566]
[32m[06/27 10:27:18 nl.defaults.trainer]: [0mEpoch 35-46, Train loss: 1.94615, validation loss: 1.95124, learning rate: [0.018447885996874566]
[32m[06/27 10:27:23 nl.defaults.trainer]: [0mEpoch 35-76, Train loss: 1.96707, validation loss: 2.00272, learning rate: [0.018447885996874566]
[32m[06/27 10:27:28 nl.defaults.trainer]: [0mEpoch 35-106, Train loss: 1.92623, validation loss: 1.91695, learning rate: [0.018447885996874566]
[32m[06/27 10:27:33 nl.defaults.trainer]: [0mEpoch 3

[32m[06/27 10:29:29 nl.defaults.trainer]: [0mEpoch 40 done. Train accuracy (top1, top5): 52.67143, 92.34571, Validation accuracy: 51.38572, 92.10196
[32m[06/27 10:29:29 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.053584, -0.056476, -0.148490, +0.194349, 3
-0.264828, -0.175606, -0.274311, +0.381728, 3
+0.000000, 0
[32m[06/27 10:29:30 nl.defaults.trainer]: [0mEpoch 41-5, Train loss: 1.92735, validation loss: 1.91318, learning rate: [0.016347893272470757]
[32m[06/27 10:29:36 nl.defaults.trainer]: [0mEpoch 41-36, Train loss: 1.93593, validation loss: 1.95087, learning rate: [0.016347893272470757]
[32m[06/27 10:29:41 nl.defaults.trainer]: [0mEpoch 41-66, Train loss: 1.95679, validation loss: 1.92707, learning rate: [0.016347893272470757]
[32m[06/27 10:29:46 nl.defaults.trainer]: [0mEpoch 41-96, Train loss: 1.84390, validation loss: 1.97401, learning rate: [0.016347893272470757]
[32m[06/27 10:29:51 nl.defaults.trainer]: [0mEpoch 41-

[32m[06/27 10:31:48 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.0472, -0.0518, -0.1277,  0.1834], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2704, -0.1547, -0.2745,  0.3812], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([1.1911e-43], device='cuda:0', requires_grad=True)]
[32m[06/27 10:31:48 nl.defaults.trainer]: [0mEpoch 46 done. Train accuracy (top1, top5): 54.57143, 92.97429, Validation accuracy: 53.84352, 92.83474
[32m[06/27 10:31:48 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.047159, -0.051820, -0.127711, +0.183417, 3
-0.270356, -0.154651, -0.274472, +0.381169, 3
+0.000000, 0
[32m[06/27 10:31:53 nl.defaults.trainer]: [0mEpoch 47-28, Train loss: 1.85741, validation loss: 1.91557, learning rate: [0.014129299759822168]
[32m[06/27 10:31:58 nl.defaults.trainer]: [0mEpoch 47-58, Train loss: 1.91024, va

[32m[06/27 10:34:00 nl.defaults.trainer]: [0mEpoch 52-92, Train loss: 1.92628, validation loss: 1.90777, learning rate: [0.012246513765648233]
[32m[06/27 10:34:05 nl.defaults.trainer]: [0mEpoch 52-122, Train loss: 1.86966, validation loss: 1.93218, learning rate: [0.012246513765648233]
[32m[06/27 10:34:07 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([-0.0054, -0.0084, -0.0959,  0.1362], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2535, -0.1526, -0.2721,  0.3785], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([1.1911e-43], device='cuda:0', requires_grad=True)]
[32m[06/27 10:34:07 nl.defaults.trainer]: [0mEpoch 52 done. Train accuracy (top1, top5): 56.21143, 93.44286, Validation accuracy: 55.15511, 93.09991
[32m[06/27 10:34:07 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
-0.005351, -0.008439, -0.095880, +0.136159

[32m[06/27 10:36:07 nl.defaults.trainer]: [0mEpoch 58-22, Train loss: 1.87376, validation loss: 1.90462, learning rate: [0.01001572135402173]
[32m[06/27 10:36:12 nl.defaults.trainer]: [0mEpoch 58-52, Train loss: 1.93280, validation loss: 1.91972, learning rate: [0.01001572135402173]
[32m[06/27 10:36:18 nl.defaults.trainer]: [0mEpoch 58-82, Train loss: 1.92041, validation loss: 1.89420, learning rate: [0.01001572135402173]
[32m[06/27 10:36:23 nl.defaults.trainer]: [0mEpoch 58-112, Train loss: 1.87793, validation loss: 1.89864, learning rate: [0.01001572135402173]
[32m[06/27 10:36:27 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([ 0.0189,  0.0024, -0.0950,  0.1194], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2631, -0.1515, -0.2832,  0.3902], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([1.1911e-43], device='cuda:0', requires_grad=True)]
[32m[06/27 10:3

[32m[06/27 10:38:25 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
+0.034585, +0.026617, -0.093265, +0.100196, 3
-0.241882, -0.139880, -0.274825, +0.378599, 3
+0.000000, 0
[32m[06/27 10:38:25 nl.defaults.trainer]: [0mEpoch 64-0, Train loss: 1.92323, validation loss: 1.85493, learning rate: [0.007890648501219118]
[32m[06/27 10:38:30 nl.defaults.trainer]: [0mEpoch 64-30, Train loss: 1.86775, validation loss: 1.90693, learning rate: [0.007890648501219118]
[32m[06/27 10:38:35 nl.defaults.trainer]: [0mEpoch 64-60, Train loss: 1.86908, validation loss: 1.90975, learning rate: [0.007890648501219118]
[32m[06/27 10:38:40 nl.defaults.trainer]: [0mEpoch 64-90, Train loss: 1.88292, validation loss: 1.88692, learning rate: [0.007890648501219118]
[32m[06/27 10:38:45 nl.defaults.trainer]: [0mEpoch 64-120, Train loss: 1.85365, validation loss: 1.92398, learning rate: [0.007890648501219118]
[32m[06/27 10:38:48 nl.optimizers.oneshot.darts.optimizer]: 

[32m[06/27 10:40:44 nl.defaults.trainer]: [0mEpoch 69 done. Train accuracy (top1, top5): 59.74000, 94.14286, Validation accuracy: 58.22879, 93.90682
[32m[06/27 10:40:44 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
+0.079594, +0.060192, -0.073885, +0.055281, 0
-0.244054, -0.153009, -0.289380, +0.394228, 3
+0.000000, 0
[32m[06/27 10:40:47 nl.defaults.trainer]: [0mEpoch 70-15, Train loss: 1.83823, validation loss: 1.92398, learning rate: [0.005946576972490318]
[32m[06/27 10:40:52 nl.defaults.trainer]: [0mEpoch 70-45, Train loss: 1.82258, validation loss: 1.82930, learning rate: [0.005946576972490318]
[32m[06/27 10:40:57 nl.defaults.trainer]: [0mEpoch 70-75, Train loss: 1.86532, validation loss: 1.84459, learning rate: [0.005946576972490318]
[32m[06/27 10:41:02 nl.defaults.trainer]: [0mEpoch 70-104, Train loss: 1.85803, validation loss: 1.87528, learning rate: [0.005946576972490318]
[32m[06/27 10:41:07 nl.defaults.trainer]: [0mEpoch 7

[32m[06/27 10:43:04 nl.defaults.trainer]: [0mEpoch 75 done. Train accuracy (top1, top5): 60.62571, 94.40857, Validation accuracy: 58.67359, 94.06364
[32m[06/27 10:43:04 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
+0.111382, +0.086710, -0.062312, +0.021563, 0
-0.255282, -0.177951, -0.312404, +0.421237, 3
+0.000000, 0
[32m[06/27 10:43:04 nl.defaults.trainer]: [0mEpoch 76-2, Train loss: 1.87498, validation loss: 1.86836, learning rate: [0.00425237647094306]
[32m[06/27 10:43:09 nl.defaults.trainer]: [0mEpoch 76-32, Train loss: 1.80951, validation loss: 1.92093, learning rate: [0.00425237647094306]
[32m[06/27 10:43:14 nl.defaults.trainer]: [0mEpoch 76-62, Train loss: 1.86040, validation loss: 1.88246, learning rate: [0.00425237647094306]
[32m[06/27 10:43:19 nl.defaults.trainer]: [0mEpoch 76-92, Train loss: 1.87222, validation loss: 1.91990, learning rate: [0.00425237647094306]
[32m[06/27 10:43:24 nl.defaults.trainer]: [0mEpoch 76-122,

[32m[06/27 10:45:22 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([ 0.1223,  0.1131, -0.0561, -0.0004], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2620, -0.1715, -0.3374,  0.4341], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([1.1911e-43], device='cuda:0', requires_grad=True)]
[32m[06/27 10:45:22 nl.defaults.trainer]: [0mEpoch 81 done. Train accuracy (top1, top5): 61.30571, 94.39714, Validation accuracy: 59.12409, 94.06079
[32m[06/27 10:45:22 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
+0.122254, +0.113092, -0.056064, -0.000362, 0
-0.262021, -0.171534, -0.337417, +0.434138, 3
+0.000000, 0
[32m[06/27 10:45:26 nl.defaults.trainer]: [0mEpoch 82-20, Train loss: 1.86792, validation loss: 1.88708, learning rate: [0.002868064893975819]
[32m[06/27 10:45:31 nl.defaults.trainer]: [0mEpoch 82-50, Train loss: 1.86205, va

[32m[06/27 10:47:34 nl.defaults.trainer]: [0mEpoch 87-83, Train loss: 1.82481, validation loss: 1.89604, learning rate: [0.0019869444917922276]
[32m[06/27 10:47:40 nl.defaults.trainer]: [0mEpoch 87-113, Train loss: 1.84941, validation loss: 1.84690, learning rate: [0.0019869444917922276]
[32m[06/27 10:47:43 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([ 0.1461,  0.1363, -0.0527, -0.0271], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2853, -0.1909, -0.3706,  0.4672], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([1.1911e-43], device='cuda:0', requires_grad=True)]
[32m[06/27 10:47:43 nl.defaults.trainer]: [0mEpoch 87 done. Train accuracy (top1, top5): 61.47143, 94.50857, Validation accuracy: 60.23038, 94.46567
[32m[06/27 10:47:43 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
+0.146070, +0.136280, -0.052718, -0.0271

[32m[06/27 10:49:41 nl.defaults.trainer]: [0mEpoch 93-7, Train loss: 1.80829, validation loss: 1.90000, learning rate: [0.0012889988567350316]
[32m[06/27 10:49:46 nl.defaults.trainer]: [0mEpoch 93-37, Train loss: 1.88105, validation loss: 1.85761, learning rate: [0.0012889988567350316]
[32m[06/27 10:49:51 nl.defaults.trainer]: [0mEpoch 93-67, Train loss: 1.85404, validation loss: 1.83059, learning rate: [0.0012889988567350316]
[32m[06/27 10:49:56 nl.defaults.trainer]: [0mEpoch 93-97, Train loss: 1.87670, validation loss: 1.85115, learning rate: [0.0012889988567350316]
[32m[06/27 10:50:01 nl.defaults.trainer]: [0mEpoch 93-127, Train loss: 1.84609, validation loss: 1.84077, learning rate: [0.0012889988567350316]
[32m[06/27 10:50:03 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights before discretization: [Parameter containing:
tensor([ 0.1611,  0.1403, -0.0495, -0.0400], device='cuda:0',
       requires_grad=True), Parameter containing:
tensor([-0.2886, -0.2017, -0.3964,

[32m[06/27 10:51:59 nl.defaults.trainer]: [0mEpoch 98 done. Train accuracy (top1, top5): 62.53714, 94.43143, Validation accuracy: 60.39291, 94.36017
[32m[06/27 10:51:59 nl.optimizers.oneshot.darts.optimizer]: [0mArch weights (alphas, last column argmax): 
+0.164252, +0.156129, -0.061027, -0.046950, 0
-0.286570, -0.203149, -0.408523, +0.497054, 3
+0.000000, 0
[32m[06/27 10:52:03 nl.defaults.trainer]: [0mEpoch 99-28, Train loss: 1.80052, validation loss: 1.86617, learning rate: [0.0010059212756112208]
[32m[06/27 10:52:08 nl.defaults.trainer]: [0mEpoch 99-58, Train loss: 1.79381, validation loss: 1.88038, learning rate: [0.0010059212756112208]
[32m[06/27 10:52:14 nl.defaults.trainer]: [0mEpoch 99-88, Train loss: 1.81332, validation loss: 1.83958, learning rate: [0.0010059212756112208]
[32m[06/27 10:52:19 nl.defaults.trainer]: [0mEpoch 99-118, Train loss: 1.88037, validation loss: 1.82860, learning rate: [0.0010059212756112208]
[32m[06/27 10:52:22 nl.optimizers.oneshot.darts.o

In [13]:
trainer.evaluate_oneshot()

[32m[06/27 10:58:44 nl.defaults.trainer]: [0mStart one-shot evaluation
Files already downloaded and verified
Files already downloaded and verified
[32m[06/27 10:58:50 nl.defaults.trainer]: [0mEvaluation finished


60.559999985758466