In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

! wget https://raw.githubusercontent.com/kumgleb/NASTestTask/master/model.py -O model.py
! wget https://raw.githubusercontent.com/kumgleb/NASTestTask/master/utils.py -O utils.py

from model import SuperNet
from utils import evaluate_accuracy, train_and_evaluate_from_scratch

  import pandas.util.testing as tm


--2020-08-10 17:56:59--  https://raw.githubusercontent.com/kumgleb/NASTestTask/master/model.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7506 (7.3K) [text/plain]
Saving to: ‘model.py’


2020-08-10 17:57:00 (47.6 MB/s) - ‘model.py’ saved [7506/7506]

--2020-08-10 17:57:01--  https://raw.githubusercontent.com/kumgleb/NASTestTask/master/utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2018 (2.0K) [text/plain]
Saving to: ‘utils.py’


2020-08-10 17:57:01 (27.4 MB/s) - ‘utils.py’ saved [2018/2018]



# **Data**:
MNIST dataset is used to train and evaluate a model. <br>
Basic MNIST test set is split equally into validation and test sets.
* SuperNet is trained on a MNIST train set.
* SuperNet is evaluated on MNIST a validation set.
* SubNets are evaluated on a validation set.
* Final architectures trained form scratch are evaluated on a test set.


In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X_val, X_test, y_val, y_test = train_test_split(X_test, y_test,
                                                stratify=y_test,
                                                test_size=0.5,
                                                random_state=42)

X_train = X_train.reshape(-1, 28, 28, 1) / 255
X_val = X_val.reshape(-1, 28, 28, 1) / 255
X_test = X_test.reshape(-1, 28, 28, 1) / 255

print('')
print('Train shape: ', X_train.shape)
print('Validation shape: ', X_val.shape)
print('Test shape: ', X_test.shape )

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz

Train shape:  (60000, 28, 28, 1)
Validation shape:  (5000, 28, 28, 1)
Test shape:  (5000, 28, 28, 1)




---



# **Experiments**:

In the following section, experiments with SuperNet are described:
1. The main part consists of experiments provided in the task. SuperNet is trained with subsampling on SubNets and their weights' update.
2. An additional part consists of one more experiment with another SuperNet training approach. SuperNet is trained with a path dropout.

In all the experiments, layers parameters, such as a number of filters in convolution layers and number on neurons in a dense layer remains the same. <br>
Learning rate, batch size and number of epochs remain the same as well.

In [3]:
# Fixed parameters
N_FILTERS = 16 # number of filters in all convolution layers
N_DENSE = 64 # number of neurons in dense layer
N_ClASSES = 10 # number of classes in dataset

N_EPOCHS = 3
BATCH_SIZE = 256
LEARNING_RATE = 1e-3

# 1. Main part:
In a part below the followng experiments are described:
1. Training of a SuperNet with `random_subnet` strategy (during the training, different subnets parameters are updated, subnets are selected randomly after a defined number of batches, which is regulated by `N_BTHCH_TO_SWITCH` parameter).
2. Then SuperNet is trained, SubNets are sampled form it and are evaluated on a validation set. Top-1 accuracy on a validation set is defined.
3. Architectures form above are retrained with random initialization, then accuracies are evaluated on the test set. Top-1 accuracy on the test set is defined.


In [None]:
model = SuperNet(N_FILTERS, N_DENSE, N_ClASSES)

1. Training of a SuperNet with `random_subnet` strategy:

In [None]:
N_BTHCH_TO_SWITCH = 32 # number of batches before model randomly swith to another SubNet during training

model.fit(X_train, y_train, N_EPOCHS, BATCH_SIZE, LEARNING_RATE,
          (X_val, y_val), N_BTHCH_TO_SWITCH, train_mode='random_subnet', verbosity=1)

Epoch: 0, SuperNet train loss: 0.5009, SuperNet train accuracy: 88.60%
SuperNet validation accuracy: 93.60%
Epoch: 1, SuperNet train loss: 0.2261, SuperNet train accuracy: 95.17%
SuperNet validation accuracy: 96.54%
Epoch: 2, SuperNet train loss: 0.1408, SuperNet train accuracy: 96.93%
SuperNet validation accuracy: 97.02%


2. Sampling and evaluating of SubNets on a validation set:

In [None]:
for model_idx in [1, 2, 3, 4]:
  sample = model.sample_subnet(model_idx)
  test_accuracy = evaluate_accuracy(sample, X_val, y_val)
  print(f'SubNet: {model_idx}, validation accuracy: {test_accuracy:.2%}')

SubNet: 1, validation accuracy: 95.12%
SubNet: 2, validation accuracy: 96.32%
SubNet: 3, validation accuracy: 95.70%
SubNet: 4, validation accuracy: 95.82%


Top-1 accuracy on a validation set of a stand-alone model is 96.32%. <br> 
It corresponds to SubNet 2 with following architecture:

In [None]:
sample = model.sample_subnet(2)
sample.summary()

Model: "subnet_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1418 (InputLayer)         [(None, 28, 28, 1)]  0                                            
__________________________________________________________________________________________________
3x3_1_conv_subnets_1_and_2 (Con (None, 28, 28, 16)   144         input_1418[0][0]                 
__________________________________________________________________________________________________
tf_op_layer_ZerosLike_990 (Tens [(None, 28, 28, 16)] 0           3x3_1_conv_subnets_1_and_2[1191][
__________________________________________________________________________________________________
concatenate_2834 (Concatenate)  (None, 28, 28, 32)   0           3x3_1_conv_subnets_1_and_2[1191][
                                                                 tf_op_layer_ZerosLike_990[

3. Training of SubNet's form scratch:

In [5]:
train_and_evaluate_from_scratch((X_train, y_train),
                                (X_val, y_val),
                                (X_test, y_test), SuperNet,
                                N_FILTERS, N_DENSE, N_ClASSES, LEARNING_RATE, BATCH_SIZE)

SubNet 1 test accuracy: 98.71%+-0.08%
SubNet 2 test accuracy: 98.92%+-0.15%
SubNet 3 test accuracy: 98.93%+-0.09%
SubNet 4 test accuracy: 98.91%+-0.15%


Top-1 mean accuracy evaluated on a test set for a models trained from scratch  is 98.93%.

# 2. Additional part:

In these part SuperNet trained with path dropout strategy.

In [8]:
model = SuperNet(N_FILTERS, N_DENSE, N_ClASSES)

In [9]:
PATH_DROPOUT_P = 0.2

model.fit(X_train, y_train, N_EPOCHS, BATCH_SIZE, LEARNING_RATE,
          (X_val, y_val), path_dropout_prob=PATH_DROPOUT_P, train_mode='path_dropout', verbosity=1)

Epoch: 0, SuperNet train loss: 0.6502, SuperNet train accuracy: 88.98%
SuperNet validation accuracy: 95.40%
Epoch: 1, SuperNet train loss: 0.3501, SuperNet train accuracy: 96.72%
SuperNet validation accuracy: 97.64%
Epoch: 2, SuperNet train loss: 0.2982, SuperNet train accuracy: 97.58%
SuperNet validation accuracy: 97.96%


In [10]:
for model_idx in [1, 2, 3, 4]:
  sample = model.sample_subnet(model_idx)
  test_accuracy = evaluate_accuracy(sample, X_val, y_val)
  print(f'SubNet: {model_idx}, validation accuracy: {test_accuracy:.2%}')

SubNet: 1, validation accuracy: 96.42%
SubNet: 2, validation accuracy: 97.54%
SubNet: 3, validation accuracy: 97.02%
SubNet: 4, validation accuracy: 97.86%
