In [5]:
import nni
import torch
from torchvision import transforms
from torchvision.datasets import CIFAR10
import nni.common.blob_utils
nni.common.blob_utils.NNI_BLOB = 'https://repo.dailylime.kr/mirror/nni'
from nni.nas.evaluator.pytorch import DataLoader
from nni.nas.hub.pytorch import DARTS as DartsSpace

CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]

transform_valid = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
])
valid_data = nni.trace(CIFAR10)(root='./data', train=False, download=True, transform=transform_valid)
valid_loader = DataLoader(valid_data, batch_size=256, num_workers=6)



Files already downloaded and verified


# Use your own search space

In [2]:
from nni.nas.hub.pytorch import ProxylessNAS
model_space = DartsSpace(
    width=16,           # the initial filters (channel number) for the model
    num_cells=8,        # the number of stacked cells in total
    dataset='cifar'     # to give a hint about input resolution, here is 32x32
)

In [3]:
fast_dev_run = False


In [4]:
import numpy as np
from nni.nas.evaluator.pytorch import Classification
from torch.utils.data import SubsetRandomSampler

transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
])

train_data = nni.trace(CIFAR10)(root='./data', train=True, download=True, transform=transform)

num_samples = len(train_data)
indices = np.random.permutation(num_samples)
split = num_samples // 2

search_train_loader = DataLoader(
    train_data, batch_size=256, num_workers=6,
    sampler=SubsetRandomSampler(indices[:split]),
)

search_valid_loader = DataLoader(
    train_data, batch_size=352, num_workers=6,
    sampler=SubsetRandomSampler(indices[split:]),
)

evaluator = Classification(
    learning_rate=1e-3,
    weight_decay=1e-4,
    train_dataloaders=search_train_loader,
    val_dataloaders=search_valid_loader,
    max_epochs=10,
    fast_dev_run=fast_dev_run,
    num_classes=10
)

Files already downloaded and verified


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/ext3/miniconda3/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default


In [5]:
from nni.nas.strategy import DARTS as DartsStrategy

strategy = DartsStrategy()

In [6]:
import os
os.environ["NNI_CONFIG_DIR"] = "/scratch/pt2295/Assign_IDLS/PROJECT"

In [8]:
from nni.nas.experiment import NasExperiment

experiment = NasExperiment(model_space, evaluator, strategy)
experiment.run()

[2023-12-14 01:03:50] [32mConfig is not provided. Will try to infer.[0m
[2023-12-14 01:03:50] [32mStrategy is found to be a one-shot strategy. Setting execution engine to "sequential" and format to "raw".[0m
[2023-12-14 01:03:52] [32mCheckpoint saved to /home/pt2295/nni-experiments/uo9e1vwa/checkpoint.[0m
[2023-12-14 01:03:52] [32mExperiment initialized successfully. Starting exploration strategy...[0m


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name            | Type                 | Params
---------------------------------------------------------
0 | training_module | ClassificationModule | 2.0 M 
---------------------------------------------------------
2.0 M     Trainable params
0         Non-trainable params
2.0 M     Total params
7.950     Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 9: 100%|██████████| 98/98 [04:10<00:00,  0.39it/s, v_num=31, train_loss=0.414, train_acc=0.845]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 98/98 [04:11<00:00,  0.39it/s, v_num=31, train_loss=0.414, train_acc=0.845]
[2023-12-14 01:45:54] [32mWaiting for models submitted to engine to finish...[0m
[2023-12-14 01:45:55] [32mExperiment is completed.[0m


True

In [9]:
exported_arch = experiment.export_top_models(formatter='dict')[0]

exported_arch

[2023-12-14 01:45:55] [32mSorted weights in differentiable cell export (normal cell, node 2): [(0.05137522518634796, 0, 'max_pool_3x3'), (0.048120446503162384, 1, 'sep_conv_3x3'), (0.02083323337137699, 1, 'max_pool_3x3'), (0.020558834075927734, 0, 'sep_conv_3x3'), (0.011632879264652729, 1, 'dil_conv_3x3'), (0.006348881870508194, 1, 'sep_conv_5x5'), (0.00574924610555172, 0, 'skip_connect'), (0.00035490901791490614, 0, 'sep_conv_5x5'), (-0.003916729241609573, 0, 'dil_conv_3x3'), (-0.0334053561091423, 1, 'skip_connect'), (-0.03584015741944313, 1, 'dil_conv_5x5'), (-0.0364624485373497, 0, 'dil_conv_5x5'), (-0.0832982212305069, 0, 'avg_pool_3x3'), (-0.12442274391651154, 1, 'avg_pool_3x3')][0m
[2023-12-14 01:45:55] [32mSorted weights in differentiable cell export (normal cell, node 3): [(0.09724710881710052, 0, 'max_pool_3x3'), (0.07058358192443848, 1, 'sep_conv_3x3'), (0.028199458494782448, 2, 'sep_conv_5x5'), (0.05604198947548866, 0, 'sep_conv_3x3'), (0.03751400113105774, 1, 'max_pool_3

{'normal/op_2_0': 'max_pool_3x3',
 'normal/input_2_0': [0],
 'normal/op_2_1': 'sep_conv_3x3',
 'normal/input_2_1': [1],
 'normal/op_3_0': 'max_pool_3x3',
 'normal/input_3_0': [0],
 'normal/op_3_1': 'sep_conv_3x3',
 'normal/input_3_1': [1],
 'normal/op_4_0': 'max_pool_3x3',
 'normal/input_4_0': [0],
 'normal/op_4_1': 'sep_conv_3x3',
 'normal/input_4_1': [3],
 'normal/op_5_0': 'max_pool_3x3',
 'normal/input_5_0': [0],
 'normal/op_5_1': 'sep_conv_5x5',
 'normal/input_5_1': [1],
 'reduce/op_2_0': 'skip_connect',
 'reduce/input_2_0': [1],
 'reduce/op_2_1': 'skip_connect',
 'reduce/input_2_1': [0],
 'reduce/op_3_0': 'sep_conv_5x5',
 'reduce/input_3_0': [0],
 'reduce/op_3_1': 'sep_conv_3x3',
 'reduce/input_3_1': [2],
 'reduce/op_4_0': 'sep_conv_3x3',
 'reduce/input_4_0': [2],
 'reduce/op_4_1': 'skip_connect',
 'reduce/input_4_1': [3],
 'reduce/op_5_0': 'sep_conv_5x5',
 'reduce/input_5_0': [3],
 'reduce/op_5_1': 'max_pool_3x3',
 'reduce/input_5_1': [0]}

In [16]:
exported_arch={'normal/op_2_0': 'max_pool_3x3',
 'normal/input_2_0': [0],
 'normal/op_2_1': 'sep_conv_3x3',
 'normal/input_2_1': [1],
 'normal/op_3_0': 'max_pool_3x3',
 'normal/input_3_0': [0],
 'normal/op_3_1': 'sep_conv_3x3',
 'normal/input_3_1': [1],
 'normal/op_4_0': 'max_pool_3x3',
 'normal/input_4_0': [0],
 'normal/op_4_1': 'sep_conv_3x3',
 'normal/input_4_1': [3],
 'normal/op_5_0': 'max_pool_3x3',
 'normal/input_5_0': [0],
 'normal/op_5_1': 'sep_conv_5x5',
 'normal/input_5_1': [1],
 'reduce/op_2_0': 'skip_connect',
 'reduce/input_2_0': [1],
 'reduce/op_2_1': 'skip_connect',
 'reduce/input_2_1': [0],
 'reduce/op_3_0': 'sep_conv_5x5',
 'reduce/input_3_0': [0],
 'reduce/op_3_1': 'sep_conv_3x3',
 'reduce/input_3_1': [2],
 'reduce/op_4_0': 'sep_conv_3x3',
 'reduce/input_4_0': [2],
 'reduce/op_4_1': 'skip_connect',
 'reduce/input_4_1': [3],
 'reduce/op_5_0': 'sep_conv_5x5',
 'reduce/input_5_0': [3],
 'reduce/op_5_1': 'max_pool_3x3',
 'reduce/input_5_1': [0]}

In [17]:
import json
# with open("exported_arch/best_darts.json", "w") as outfile: 
#     json.dump(exported_arch, outfile)

In [18]:
from nni.nas.space import model_context

with model_context(exported_arch):
    final_model = DartsSpace(width=16, num_cells=8, dataset='cifar')

In [11]:
final_model# this is just the architecture

DARTS(
  (stem): Sequential(
    (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (stages): ModuleList(
    (0): Sequential(
      (0): Cell(
        (preprocessor): CellPreprocessor(
          (pre0): ReLUConvBN(
            (0): ReLU()
            (1): Conv2d(48, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
          (pre1): ReLUConvBN(
            (0): ReLU()
            (1): Conv2d(48, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (ops): ModuleList(
          (0-2): 3 x ModuleList(
            (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
            (1): SepConv(
              (0

In [14]:
train_loader = DataLoader(train_data, batch_size=256, num_workers=6)  # Use the original training data

In [15]:
max_epochs = 50

evaluator = Classification(
    learning_rate=1e-3,
    weight_decay=1e-4,
    train_dataloaders=train_loader,
    val_dataloaders=valid_loader,
    max_epochs=max_epochs,
    num_classes=10,
    export_onnx=False,          # Disable ONNX export for this experiment
    fast_dev_run=fast_dev_run   # Should be false for fully training
)

evaluator.fit(final_model)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA A100-SXM4-80GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | criterion | CrossEntropyLoss | 0     
1 | metrics   | ModuleDict       | 0     
2 | _model    | DARTS            | 279 K 
-----------------------------------------------
279 K     Trainable params
0         Non-trainable params
279 K     Total params
1.119     Total estimated model params size (MB)
SLURM auto-requeueing enabled. Setting signal han

Epoch 0: 100%|██████████| 196/196 [00:17<00:00, 10.92it/s, v_num=33, train_loss=1.210, train_acc=0.637]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/40 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/40 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▎         | 1/40 [00:00<00:01, 31.70it/s][A
Validation DataLoader 0:   5%|▌         | 2/40 [00:00<00:01, 34.59it/s][A
Validation DataLoader 0:   8%|▊         | 3/40 [00:00<00:01, 35.48it/s][A
Validation DataLoader 0:  10%|█         | 4/40 [00:00<00:00, 36.09it/s][A
Validation DataLoader 0:  12%|█▎        | 5/40 [00:00<00:00, 38.27it/s][A
Validation DataLoader 0:  15%|█▌        | 6/40 [00:00<00:00, 40.21it/s][A
Validation DataLoader 0:  18%|█▊        | 7/40 [00:00<00:00, 42.19it/s][A
Validation DataLoader 0:  20%|██        | 8/40 [00:00<00:00, 43.77it/s][A
Validation DataLoader 0:  22%|██▎       | 9/40 [00:00<00:00, 45.00it/s][A
Validation DataLoader 0:  25%|██▌       | 10/40 [00

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 49: 100%|██████████| 196/196 [00:16<00:00, 11.72it/s, v_num=33, train_loss=0.0473, train_acc=0.988, val_loss=0.570, val_acc=0.853]


In [17]:
# torch.save(final_model.state_dict(), 'exported_arch/best_darts_model.pt')

In [18]:
# final_model.state_dict()

OrderedDict([('stem.0.weight',
              tensor([[[[-1.0810e-01,  4.5962e-02,  7.4931e-02],
                        [-3.3627e-02,  2.5726e-01,  2.5470e-01],
                        [ 9.1658e-02,  9.6494e-02, -1.8675e-01]],
              
                       [[-1.3831e-01, -6.8255e-02, -1.2840e-01],
                        [ 1.0757e-01, -2.8080e-01,  5.7315e-02],
                        [-2.7371e-02, -1.0681e-01, -1.0243e-01]],
              
                       [[-5.1902e-02,  1.6873e-01,  9.7472e-02],
                        [ 5.6324e-02,  9.0512e-02, -2.1234e-02],
                        [ 8.7810e-02, -1.9968e-01, -1.1989e-01]]],
              
              
                      [[[-8.1313e-02, -6.9837e-03,  1.2263e-01],
                        [-2.7110e-01, -3.9833e-01, -1.0362e-01],
                        [ 9.7725e-02, -1.2452e-01, -3.0303e-01]],
              
                       [[ 1.7308e-01,  1.0443e-01,  1.6599e-01],
                        [-1.1353e-01, -1.301

In [19]:
final_model.state_dict()

OrderedDict([('stem.0.weight',
              tensor([[[[-0.0107, -0.1763, -0.0389],
                        [ 0.0556, -0.1041, -0.0956],
                        [-0.0539,  0.1611, -0.0784]],
              
                       [[ 0.1667,  0.0699, -0.1545],
                        [-0.1574,  0.1117,  0.0755],
                        [ 0.0521, -0.0124, -0.0976]],
              
                       [[-0.0247, -0.0714,  0.0570],
                        [-0.0286, -0.1739, -0.1179],
                        [ 0.0789,  0.1906, -0.0262]]],
              
              
                      [[[-0.0987,  0.0048,  0.1672],
                        [-0.0736,  0.1097, -0.0639],
                        [-0.1861,  0.0207, -0.0855]],
              
                       [[-0.0055, -0.1552,  0.0052],
                        [-0.1098,  0.0987, -0.0294],
                        [ 0.0026, -0.1104, -0.1528]],
              
                       [[-0.0321, -0.0543, -0.0507],
                        [

In [22]:
final_model.parameters

<bound method Module.parameters of DARTS(
  (stem): Sequential(
    (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (stages): ModuleList(
    (0): Sequential(
      (0): Cell(
        (preprocessor): CellPreprocessor(
          (pre0): ReLUConvBN(
            (0): ReLU()
            (1): Conv2d(48, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
          (pre1): ReLUConvBN(
            (0): ReLU()
            (1): Conv2d(48, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (ops): ModuleList(
          (0-2): 3 x ModuleList(
            (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
       