In [1]:
import os
import xml
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from nni.compression.pytorch import ModelSpeedup
from nni.compression.pytorch.utils.counter import count_flops_params

from utils import create_model, get_dataloader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_workers = 16
torch.set_num_threads(num_workers)

### Stanford Dogs (Data Download)

The [Stanford Dogs](http://vision.stanford.edu/aditya86/ImageNetDogs/) dataset contains images of 120 breeds of dogs from around the world. It is built using images and annotation from ImageNet for the task of fine-grained image classification. We choose this task to simulate a transfer learning scenario, where a model pre-trained on the ImageNet is further transferred to an often simpler downstream task.

To download and prepare the data, please run `prepare_data.sh`, which downloads the images and annotations, and preprocesses the images for training.

In [None]:
# Run prepare_data.sh
!chmod u+x prepare_data.sh
!./prepare_data.sh

 Then, you may run following code block, which shows several instances:

# Model Pre-training
First, we obtain a MobileNetV2 model on this task, which will serve as the base model for compression. Unfortunately, although this step is often called model "pre-training" in the model compression teminologies, we are actually finetuning a model pre-trained on ImageNet. 

In [None]:
%run pretrain.py --model_type mobilenet_v2_torchhub

In [None]:
%run pretrain.py --model_type resnet18 --n_epochs 1

In [9]:
%run pretrain.py --model_type efficientnet_se_b0  --n_epochs 10

Namespace(model_type='efficientnet_se_b0', experiment_dir=None, checkpoint_name='checkpoint_best.pt', n_workers=16, n_epochs=10, learning_rate=0.0001, weight_decay=0.0, batch_size=64)


Using cache found in /home/flo/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


Loading checkpoint from pretrained_efficientnet_se_b0/checkpoint_best.pt
Start training epoch 0


100%|██████████| 169/169 [00:39<00:00,  4.31it/s]
100%|██████████| 19/19 [00:02<00:00,  9.35it/s]


Epoch 0: train loss 0.3454, valid loss 0.4892, valid acc 0.8457
Start training epoch 1


100%|██████████| 169/169 [00:39<00:00,  4.24it/s]
100%|██████████| 19/19 [00:02<00:00,  7.80it/s]


Epoch 1: train loss 0.3255, valid loss 0.4842, valid acc 0.8473
Start training epoch 2


100%|██████████| 169/169 [00:40<00:00,  4.20it/s]
100%|██████████| 19/19 [00:02<00:00,  8.45it/s]


Epoch 2: train loss 0.3237, valid loss 0.4820, valid acc 0.8490
Start training epoch 3


100%|██████████| 169/169 [00:41<00:00,  4.08it/s]
100%|██████████| 19/19 [00:02<00:00,  7.60it/s]


Epoch 3: train loss 0.3226, valid loss 0.4803, valid acc 0.8432
Start training epoch 4


100%|██████████| 169/169 [00:39<00:00,  4.29it/s]
100%|██████████| 19/19 [00:02<00:00,  9.17it/s]


Epoch 4: train loss 0.3144, valid loss 0.4780, valid acc 0.8457
Start training epoch 5


100%|██████████| 169/169 [00:38<00:00,  4.38it/s]
100%|██████████| 19/19 [00:02<00:00,  7.75it/s]


Epoch 5: train loss 0.3143, valid loss 0.4767, valid acc 0.8448
Start training epoch 6


100%|██████████| 169/169 [00:40<00:00,  4.17it/s]
100%|██████████| 19/19 [00:02<00:00,  7.80it/s]


Epoch 6: train loss 0.3042, valid loss 0.4768, valid acc 0.8481
Start training epoch 7


100%|██████████| 169/169 [00:40<00:00,  4.20it/s]
100%|██████████| 19/19 [00:02<00:00,  8.09it/s]


Epoch 7: train loss 0.3049, valid loss 0.4754, valid acc 0.8465
Start training epoch 8


100%|██████████| 169/169 [00:41<00:00,  4.05it/s]
100%|██████████| 19/19 [00:02<00:00,  9.46it/s]


Epoch 8: train loss 0.3074, valid loss 0.4740, valid acc 0.8465
Start training epoch 9


100%|██████████| 169/169 [00:41<00:00,  4.03it/s]
100%|██████████| 19/19 [00:02<00:00,  8.44it/s]


Epoch 9: train loss 0.3009, valid loss 0.4744, valid acc 0.8473


In [10]:
%run pretrain.py --model_type efficientnet_b0 --n_epochs 100

Namespace(model_type='efficientnet_b0', experiment_dir=None, checkpoint_name='checkpoint_best.pt', n_workers=16, n_epochs=100, learning_rate=0.0001, weight_decay=0.0, batch_size=64)


Using cache found in /home/flo/.cache/torch/hub/NVIDIA_DeepLearningExamples_torchhub


Loading checkpoint from pretrained_efficientnet_b0/checkpoint_best.pt
Start training epoch 0


100%|██████████| 169/169 [00:39<00:00,  4.32it/s]
100%|██████████| 19/19 [00:02<00:00,  8.75it/s]


Epoch 0: train loss 1.0823, valid loss 0.9694, valid acc 0.7621
Start training epoch 1


100%|██████████| 169/169 [00:42<00:00,  3.96it/s]
100%|██████████| 19/19 [00:02<00:00,  9.10it/s]


Epoch 1: train loss 1.0662, valid loss 0.9570, valid acc 0.7678
Start training epoch 2


100%|██████████| 169/169 [00:41<00:00,  4.03it/s]
100%|██████████| 19/19 [00:02<00:00,  6.79it/s]


Epoch 2: train loss 1.0478, valid loss 0.9448, valid acc 0.7744
Start training epoch 3


100%|██████████| 169/169 [00:40<00:00,  4.17it/s]
100%|██████████| 19/19 [00:02<00:00,  8.60it/s]


Epoch 3: train loss 1.0257, valid loss 0.9320, valid acc 0.7769
Start training epoch 4


100%|██████████| 169/169 [00:38<00:00,  4.40it/s]
100%|██████████| 19/19 [00:02<00:00,  9.39it/s]


Epoch 4: train loss 1.0134, valid loss 0.9196, valid acc 0.7777
Start training epoch 5


100%|██████████| 169/169 [00:40<00:00,  4.21it/s]
100%|██████████| 19/19 [00:02<00:00,  8.73it/s]


Epoch 5: train loss 1.0083, valid loss 0.9090, valid acc 0.7793
Start training epoch 6


100%|██████████| 169/169 [00:41<00:00,  4.05it/s]
100%|██████████| 19/19 [00:02<00:00,  8.28it/s]


Epoch 6: train loss 0.9951, valid loss 0.9000, valid acc 0.7818
Start training epoch 7


100%|██████████| 169/169 [00:42<00:00,  3.94it/s]
100%|██████████| 19/19 [00:02<00:00,  8.96it/s]


Epoch 7: train loss 0.9842, valid loss 0.8918, valid acc 0.7834
Start training epoch 8


100%|██████████| 169/169 [00:39<00:00,  4.25it/s]
100%|██████████| 19/19 [00:02<00:00,  7.85it/s]


Epoch 8: train loss 0.9723, valid loss 0.8813, valid acc 0.7845
Start training epoch 9


100%|██████████| 169/169 [00:38<00:00,  4.40it/s]
100%|██████████| 19/19 [00:02<00:00,  7.11it/s]


Epoch 9: train loss 0.9602, valid loss 0.8730, valid acc 0.7889
Start training epoch 10


100%|██████████| 169/169 [00:41<00:00,  4.08it/s]
100%|██████████| 19/19 [00:02<00:00,  7.31it/s]


Epoch 10: train loss 0.9521, valid loss 0.8648, valid acc 0.7897
Start training epoch 11


100%|██████████| 169/169 [00:44<00:00,  3.84it/s]
100%|██████████| 19/19 [00:02<00:00,  8.20it/s]


Epoch 11: train loss 0.9389, valid loss 0.8560, valid acc 0.7914
Start training epoch 12


100%|██████████| 169/169 [00:44<00:00,  3.83it/s]
100%|██████████| 19/19 [00:02<00:00,  7.92it/s]


Epoch 12: train loss 0.9389, valid loss 0.8486, valid acc 0.7897
Start training epoch 13


100%|██████████| 169/169 [00:40<00:00,  4.13it/s]
100%|██████████| 19/19 [00:02<00:00,  8.43it/s]


Epoch 13: train loss 0.9232, valid loss 0.8400, valid acc 0.7906
Start training epoch 14


100%|██████████| 169/169 [00:41<00:00,  4.09it/s]
100%|██████████| 19/19 [00:02<00:00,  8.20it/s]


Epoch 14: train loss 0.9166, valid loss 0.8338, valid acc 0.7897
Start training epoch 15


100%|██████████| 169/169 [00:40<00:00,  4.19it/s]
100%|██████████| 19/19 [00:02<00:00,  8.95it/s]


Epoch 15: train loss 0.9018, valid loss 0.8249, valid acc 0.7914
Start training epoch 16


100%|██████████| 169/169 [00:41<00:00,  4.07it/s]
100%|██████████| 19/19 [00:02<00:00,  8.24it/s]


Epoch 16: train loss 0.8941, valid loss 0.8188, valid acc 0.7930
Start training epoch 17


100%|██████████| 169/169 [00:39<00:00,  4.23it/s]
100%|██████████| 19/19 [00:02<00:00,  9.16it/s]


Epoch 17: train loss 0.8875, valid loss 0.8119, valid acc 0.7971
Start training epoch 18


100%|██████████| 169/169 [00:42<00:00,  4.02it/s]
100%|██████████| 19/19 [00:02<00:00,  8.14it/s]


Epoch 18: train loss 0.8769, valid loss 0.8055, valid acc 0.7955
Start training epoch 19


100%|██████████| 169/169 [00:44<00:00,  3.83it/s]
100%|██████████| 19/19 [00:02<00:00,  7.07it/s]


Epoch 19: train loss 0.8785, valid loss 0.7986, valid acc 0.7988
Start training epoch 20


100%|██████████| 169/169 [00:41<00:00,  4.06it/s]
100%|██████████| 19/19 [00:02<00:00,  8.19it/s]


Epoch 20: train loss 0.8642, valid loss 0.7916, valid acc 0.7980
Start training epoch 21


100%|██████████| 169/169 [00:38<00:00,  4.35it/s]
100%|██████████| 19/19 [00:02<00:00,  8.60it/s]


Epoch 21: train loss 0.8573, valid loss 0.7861, valid acc 0.8013
Start training epoch 22


100%|██████████| 169/169 [00:39<00:00,  4.30it/s]
100%|██████████| 19/19 [00:01<00:00,  9.50it/s]


Epoch 22: train loss 0.8513, valid loss 0.7793, valid acc 0.8054
Start training epoch 23


100%|██████████| 169/169 [00:37<00:00,  4.45it/s]
100%|██████████| 19/19 [00:02<00:00,  9.09it/s]


Epoch 23: train loss 0.8405, valid loss 0.7743, valid acc 0.8013
Start training epoch 24


 14%|█▍        | 24/169 [00:05<00:34,  4.19it/s]


KeyboardInterrupt: 