# SSD Reduction tutorial

In this tutorial, we will show how to initialize, reduce and train a SSD300 network using POD and/or AHOSVD.

### IMPORTS
We start by importing all the necessary functions.

In [1]:
'''
Create reduced version of SSD300
'''

import argparse
import torch
from PIL import Image
from time import time
import numpy as np
import matplotlib.pyplot as plt
import os
import torchvision.transforms as transforms
from torch.utils import data
import pickle

from smithers.ml.vgg import VGG
from smithers.ml.models.aux_conv import AuxiliaryConvolutions
from smithers.ml.models.predictor import PredictionConvolutions
from smithers.ml.dataset.pascalvoc_dataset import PascalVOCDataset
from smithers.ml.models.detector import Detector, Reduced_Detector
from smithers.ml.models.utils import create_prior_boxes, save_checkpoint_objdet
from smithers.ml.netadapter import NetAdapter
from smithers.ml.utils import get_seq_model, Total_param, Total_flops

import warnings
warnings.filterwarnings("ignore")

ModuleNotFoundError: No module named 'smithers'

### LEARNING PARAMETERS
Then, we set the parameters used for the data, the detector and the learning phase.

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Learning parameters
batch_size = 8  # batch size
workers = 4  # number of workers for loading data in the DataLoader
iterations = 120000  # number of iterations to train
print_freq = 200  # print training status every __ batches
lr = 1e-4  # learning rate
decay_lr_at = [80000, 100000]  # decay learning rate after these many iterations
decay_lr_to = 0.1
# decay learning rate to this fraction of the existing learning rate
#n_classes = 6
momentum = 0.9  # momentum
weight_decay = 5e-4  # weight decay
grad_clip = None
# clip if gradients are exploding, which may happen at larger batch sizes

voc_labels = ('cat', 'dog')
'''voc_labels = ('aeroplane', 'bicycle', 'bird', 'boat',
        'bottle', 'bus', 'car', 'cat', 'chair',
        'cow', 'diningtable', 'dog', 'horse',
        'motorbike', 'person', 'pottedplant',
        'sheep', 'sofa', 'train', 'tvmonitor')'''
label_map = {k: v + 1 for v, k in enumerate(voc_labels)}
label_map['background'] = 0
n_classes = len(label_map)
print('categories:',label_map)
print('n_classes:', n_classes)

categories: {'cat': 1, 'dog': 2, 'background': 0}
n_classes: 3


### DATA PARAMETERS
We now define the train and test images, after they have been extracted and the subdivision has been carried out (refer to the data preparation tutorial for more details).

In [3]:
# Data parameters
data_folder = 'VOC_dog_cat/JSONfiles' #folder with json data files
keep_difficult = True


train_dataset = PascalVOCDataset(data_folder,
                                 split='train',
                                 keep_difficult=keep_difficult)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=train_dataset.collate_fn,
    num_workers=workers,
    pin_memory=True)

epochs = iterations // (len(train_dataset) // 32)
decay_lr_at = [it // (len(train_dataset) // 32) for it in decay_lr_at]
print('Training images:', len(train_dataset))
# Load test data
test_dataset = PascalVOCDataset(data_folder,
                                split='test',
                                keep_difficult=keep_difficult)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          collate_fn=test_dataset.collate_fn,
                                          num_workers=workers,
                                          pin_memory=True)
print('Testing images:', len(test_dataset))


Training images: 240
Testing images: 60


### REDUCTION VIA AHOSVD
The following block has been set up for carrying out the reduction using the AHOSVD technique. The fundamental parameter in this context is the list ``mode_list_batch``, which is composed as follows:
- the first element, at position ``0``, is the number of images that get processed together in the HOSVD
- the latter three numbers define, respectively, the number of channels, the height and the width of the output of this technique.

In [4]:
#checkpoint = 'checkpoint_ssd300.pth.tar'
init_time = time()

base_net = VGG(classifier='ssd', init_weights=False)
seq_model = get_seq_model(base_net)
cutoff_idx = 7
red_method = 'HOSVD'
if red_method == 'POD':
    red_dim = 50
elif red_method == 'HOSVD':
    mode_list_batch=[1, 35, 3, 3]
    red_dim = mode_list_batch[-1]*mode_list_batch[-1]*mode_list_batch[1]
inout_method = None
netadapter = NetAdapter(cutoff_idx, red_dim, red_method, inout_method)
red_model = netadapter.reduce_net(seq_model, train_dataset, None, train_loader, n_classes, device = device, mode_list_batch = mode_list_batch)
base_net = red_model.premodel
aux_conv = red_model.proj_model
#cfg_tot = [n of channels of the outputs of the pre model, n of the reduced channels]
if red_method == 'HOSVD':
    cfg_tot = list(reversed(list(red_model.proj_model.list_of_matrices[0].shape)))
elif red_method == 'POD':
    cfg_tot = [256, red_dim]
#cfg_tot_ssd = [512,1024,] #channel number
n_boxes = [4, 6]
predictor = PredictionConvolutions(n_classes, cfg_tot, n_boxes)
network = [base_net, aux_conv, predictor]

#create prior boxes custom for reduced net
if red_method == 'HOSVD':
    reduction_dims = list(reversed(list(red_model.proj_model.list_of_matrices[1].shape)))
    #fmaps_dims = {'premodel': width=height of the output of the pre model, 'projmodel': width=height of the reduced tensors}
    fmaps_dims = {'premodel': reduction_dims[0], 'projmodel': reduction_dims[1]}
elif red_method == 'POD':
    fmaps_dims = {'premodel': 38, 'projmodel': 1}
obj_scales = {'premodel': 0.1, 'projmodel': 0.725} #0.9
aspect_ratio = {'premodel': [1., 2., 0.5], 'projmodel': [1., 2., 3., 0.5, 0.333]}
priors_cxcy = create_prior_boxes(fmaps_dims, obj_scales, aspect_ratio)
init_end = time()
print('time needed to initialize the model', round(init_end - init_time,2), 'seconds')



Loaded base model.

Initializing reduction. Chosen reduction method is: HOSVD
Initializing dataset forwarding (AHOSVD included)
30
completato
Dataset forwarding (with AHOSVD reduction) complete
time needed to initialize the model 78.11 seconds


### REDUCTION VIA POD

In [4]:
checkpoint = 'checkpoint_ssd300.pth.tar'
checkpoint = None
init_time = time()

base_net = VGG(classifier='ssd', init_weights=False)
seq_model = get_seq_model(base_net)
print(seq_model)
cutoff_idx = 7
red_dim = 50
red_method = 'POD'
inout_method = None
netadapter = NetAdapter(cutoff_idx, red_dim, red_method, inout_method)
red_model = netadapter.reduce_net(seq_model, train_dataset, None, train_loader, n_classes)
print(red_model)
base_net = red_model.premodel
aux_conv = red_model.proj_model
print(aux_conv)
cfg_tot = [256, 50] #, 512, 256, 256, 256]
n_boxes = [4, 6]
predictor = PredictionConvolutions(n_classes, cfg_tot, n_boxes)
network = [base_net, aux_conv, predictor]

#create prior boxes custom for reduced net
fmaps_dims = {'premodel': 38, 'projmodel': 1} 
obj_scales = {'premodel': 0.1, 'projmodel': 0.725} #0.9
aspect_ratio = {'premodel': [1., 2., 0.5], 'projmodel': [1., 2., 3., 0.5, 0.333]}
priors_cxcy = create_prior_boxes(fmaps_dims, obj_scales, aspect_ratio)

init_end = time()
print('time needed to initialize the model', init_end - init_time)



Loaded base model.

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
  (17): Conv2d(2

### TRAINING THE REDUCED NETWORK
The following cell deals with the training of the reduced network. It is possible to either load an existing checkpoint and continue the training of it or start from a newly initialized network. As of now, the training phase will save a checkpoint every 500 epochs of training and after its completion. This parameter can be adjusted in the ``train_detector_with_eval_name`` function, that can be found in the detector.py script.

The second part of the cell measures the time needed for the network to make a single predicition.

In [6]:
check = None
#check = 'Results/3000_2_55_3_3_cut11/checkpoint_ssd300.pth.tar'
epochs = 10
start = time()
detector = Reduced_Detector(network, check, priors_cxcy, n_classes, epochs,
                    batch_size, print_freq, lr, decay_lr_at,
                    decay_lr_to, momentum, weight_decay, grad_clip,
                    train_loader, test_loader)

start = time()
check, loss_value, mAP_list = detector.train_detector_with_eval(label_map)
end = time()
print(f'Time needed for training: {round(end-start,2)} seconds, i.e. {round((end-start)/60,1)} minutes')


Training (with evaluation) has started.
Epoch: [0][0/30]	Batch Time 0.468 (0.468)	Data Time 0.298 (0.298)	Loss 5.4450 (5.4450)	

Mean Average Precision (mAP): 0.001
Epoch: [1][0/30]	Batch Time 0.421 (0.421)	Data Time 0.274 (0.274)	Loss 5.2341 (5.2341)	

Mean Average Precision (mAP): 0.000
Epoch: [2][0/30]	Batch Time 0.377 (0.377)	Data Time 0.242 (0.242)	Loss 5.3344 (5.3344)	

Mean Average Precision (mAP): 0.000
Epoch: [3][0/30]	Batch Time 0.419 (0.419)	Data Time 0.265 (0.265)	Loss 5.2606 (5.2606)	

Mean Average Precision (mAP): 0.000
Epoch: [4][0/30]	Batch Time 0.456 (0.456)	Data Time 0.288 (0.288)	Loss 5.3757 (5.3757)	

Mean Average Precision (mAP): 0.000
Epoch: [5][0/30]	Batch Time 0.363 (0.363)	Data Time 0.227 (0.227)	Loss 5.2238 (5.2238)	

Mean Average Precision (mAP): 0.000
Epoch: [6][0/30]	Batch Time 0.443 (0.443)	Data Time 0.284 (0.284)	Loss 5.2474 (5.2474)	

Mean Average Precision (mAP): 0.000
Epoch: [7][0/30]	Batch Time 0.442 (0.442)	Data Time 0.282 (0.282)	Loss 5.3080 (5.3080

### ACCURACY OF AN EXISTING NETWORK
With the following piece of code, it is possible to load a checkpoint and check its accuracy on the test dataset initially defined.
Moreover, the second part of the cell is used to test the network on a user given picture.

In [8]:
#check = None
#check = 'Results/Ulysses/3200_3_35_3_3/epoch_1700-3200/checkpoint_ssd300_epoch_2000.pth.tar'
epochs = 1
start = time()
detector = Reduced_Detector(network, check, priors_cxcy, n_classes, epochs,
                    batch_size, print_freq, lr, decay_lr_at,
                    decay_lr_to, momentum, weight_decay, grad_clip,
                    train_loader, test_loader)
start_test = time()
detector.eval_detector(label_map, check)
end_test = time()
print(f'Time needed for testing: {round(end_test-start_test,2)} seconds, i.e. {round((end_test-start_test)/60,1)} minutes')



img_path = '/u/s/szanin/Smithers/smithers/ml/tutorials/VOC_dog_cat/JPEGImages/001462.jpg'
original_image = Image.open(img_path, mode='r')
original_image = original_image.convert('RGB')
detector.detect(original_image,
                check,
                label_map,
                min_score=0.01,
                max_overlap=0.45,
                top_k=5).show()



Loaded checkpoint from epoch 10.



Evaluating: 100%|██████████| 8/8 [00:14<00:00,  1.79s/it]


{'cat': 0.00010449321416672319, 'dog': 0.0}
{'cat': 0.00010449321416672319, 'dog': 0.0}

Mean Average Precision (mAP): 0.000
Time needed for testing: 14.53 seconds, i.e. 0.2 minutes


FileNotFoundError: [Errno 2] No such file or directory: '/u/s/szanin/Smithers/smithers/ml/tutorials/VOC_dog_cat/JPEGImages/001463.jpg'

### STORAGE SIZE OF AN EXISTING (REDUCED) NETWORK
With the following piece of code, it is possible to load a checkpoint of a reduced network and check its storage size.

In [6]:
#check = 'checkpoint_ssd300_red_pascalvoc.pth.tar'
#check = 'checkpoint_ssd300.pth.tar'
check = torch.load(check)
model = check['model']

rednet_storage = torch.zeros(4)
rednet_flops = torch.zeros(4)

rednet_storage[0], rednet_storage[1], rednet_storage[2], rednet_storage[3] = [
       Total_param(model[0]),
       Total_param(model[1]),
       Total_param(model[2].features_loc),
       Total_param(model[2].features_cl)]

print('SSD300 reduced-storage')
print(
      ' Pre nnz = {:.2f}, POD_model nnz={:.2f}, feature_loc nnz={:.4f}, feature_cl nnz={:.4f}'.format(
                  rednet_storage[0], rednet_storage[1],
                  rednet_storage[2], rednet_storage[3]))


SSD300 reduced-storage
 Pre nnz = 38.13, POD_model nnz=0.11, feature_loc nnz=0.3267, feature_cl nnz=0.2450


### STORAGE SIZE OF AN EXISTING (UNREDUCED) NETWORK
With the following piece of code, it is possible to load a checkpoint of a unreduced SSD300 network and check its storage size.

In [7]:
check1 = 'checkpoint_ssd300.pth.tar'
check1 = torch.load(check1)
model = check1['model']

rednet_storage = torch.zeros(4)
rednet_flops = torch.zeros(4)

rednet_storage[0], rednet_storage[1], rednet_storage[2], rednet_storage[3]  = [
       Total_param(model[0]),
       Total_param(model[1].features),
       Total_param(model[2].features_loc),
       Total_param(model[2].features_cl)]

rednet_vgg_storage = torch.zeros(4)
rednet_vgg_storage[0], rednet_vgg_storage[1], rednet_vgg_storage[2], rednet_vgg_storage[3],  = [
       Total_param(model[0]),
       Total_param(model[0].features),
       Total_param(model[0].avgpool),
       Total_param(model[0].classifier)]


print('SSD300-storage')
print(
      ' Pre nnz = {:.2f}, aux_model nnz={:.2f}, feature_loc nnz={:.4f}, feature_cl nnz={:.4f}'.format(
                  rednet_storage[0], rednet_storage[1],
                  rednet_storage[2], rednet_storage[3]))

"""print(
      ' Pre nnz = {:.2f}, pre_vgg nnz={:.2f}, pre_avgpool nnz={:.4f}, pre_classifier nnz={:.4f}'.format(
                  rednet_vgg_storage[0], rednet_vgg_storage[1],
                  rednet_vgg_storage[2], rednet_vgg_storage[3]))"""


torch.save(detector, 'check_ssd300_red.pth')


SSD300-storage
 Pre nnz = 78.14, aux_model nnz=9.38, feature_loc nnz=2.0395, feature_cl nnz=1.5296
 Pre nnz = 78.14, pre_vgg nnz=56.13, pre_avgpool nnz=0.0000, pre_classifier nnz=22.0078
