In [16]:
import torch
PATH_TO_MODELS =  "/workspace/raid/data/eponomarev/pretrained"

model = torch.load(PATH_TO_MODELS+'/cifar10/compressed_models/vgg.vgg19_[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None].pth')
model

ModuleNotFoundError: No module named 'student'

In [4]:
import sys
sys.path.append('../')

import dataloaders

bs = 1

DATASET = 'cifar10'
DATA_ROOT = '/workspace/raid/data/datasets/'
loaders = dataloaders.get_loader(bs, DATASET, DATA_ROOT, num_workers = 4,
                                 simple_normalize=False)

Building cifar10 data loader with 4 workers
Files already downloaded and verified
Files already downloaded and verified


### Get  all  layers

Function  **get_layer_names()** returns names of model layers (convolutional and linear) and bool mask for convolutional layers. 

In [5]:
from model_utils import get_layer_names

layer_names, conv_layer_mask = get_layer_names(model)

fc_layer_mask = (1 - conv_layer_mask).astype(bool)

print(layer_names[conv_layer_mask])
print(layer_names[fc_layer_mask])

NameError: name 'model' is not defined

### Compress selected layers

For **convolutional** layers
- Set **decomposition**: 'tucker2', 'cp3' or 'cp4'
- Set  decomposition **ranks** for convolutional layers (namely, ranks we use to decompose convolutional weight tensors). 
  - In Tucker2 case, for one layer 
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = 0**, then  VBMF method with **vbmf_weaken_factor**  will be used to select (rank_cout, rank_cin).
      - Elif **rank = (-scalar) < 0**, then values (rank_cout, rank_cin) will be choosen as maximal values which allow **(sacalar x) layer parameter reduction**.
      - Else **rank = tuple** and determines absolute ranks values (rank_cout, rank_cin)
  - In CP case, rank for one layer is a scalar
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = (-scalar) < 0** then value for rank will be choosen as maximal rank which allows **(sacalar x) layer parameter reduction**.
      - Else **rank = scalar > 0** and determines absolute rank value.
      
For **linear** layers
- Set **decomposition** = 'svd'
- Set decomposition for linear layers (namely, ranks we use to factorize weight matrices)
    - In SVD case, rank for one layer is a scalar
      - If **rank = None**, the layer won't be decomposed.
      - Elif **rank = 0**, then  VBMF method with **vbmf_weaken_factor**  will be used to select rank.
      - Elif **rank = (-scalar) < 0** then value for rank will be choosen as maximal rank which allows **(sacalar x) layer parameter reduction**.
      - Else **rank = scalar > 0** and determines absolute rank value.

In [13]:
def split_resnet_layers_by_blocks(lnames):
    starts = ['conv1'] + ['layer{}'.format(i) for i in range(1,5)]

    start_idx = 0
    blocks_idxs = []
    layer_names_by_blocks = []

    for s in starts:
        curr_block =  [l for l in lnames if l.startswith(s)]
        layer_names_by_blocks.append(curr_block)

        blocks_idxs.append(np.arange(start_idx, start_idx+len(curr_block)))
        start_idx += len(curr_block)

    return blocks_idxs

In [15]:
from tensor_compression import get_compressed_model
import copy
import numpy as np

# decomposition_conv = 'cp4'
# decomposition_conv = 'cp3'
decomposition_conv = 'tucker2'
decomposition_fc = 'svd'

RANK_SELECTION = 'vbmf'
# RANK_SELECTION = 'nx'
# RANK_SELECTION = 'custom'

if RANK_SELECTION == 'vbmf':
    WEAKEN_FACTOR = 0.9
    X_FACTOR = 0
    rank_selection_suffix = "/wf:{}".format(WEAKEN_FACTOR)
elif RANK_SELECTION == 'nx':
    WEAKEN_FACTOR = None  
    X_FACTOR = 5
    rank_selection_suffix = "/{}x".format(X_FACTOR)
    
    
if MODEL_NAME == 'vgg16_imagenet':
    ranks_conv = [None] + [-X_FACTOR]*(len(layer_names[conv_layer_mask])-1)
    ranks_fc = [-X_FACTOR]*(len(layer_names[fc_layer_mask]))
elif MODEL_NAME == 'resnet50_imagenet':
    ranks_conv = [None if not name.endswith('conv2') else -X_FACTOR
                  for name in layer_names[conv_layer_mask]]
    ranks_fc = [-X_FACTOR]*(len(layer_names[fc_layer_mask]))
    

ranks = np.array([None]*len(layer_names))
ranks[conv_layer_mask] = ranks_conv
ranks[fc_layer_mask] = ranks_fc

decompositions = np.array([None]*len(layer_names))
decompositions[conv_layer_mask] = decomposition_conv
decompositions[fc_layer_mask] = decomposition_fc

CONV_SPLIT = 2
FC_SPLIT = 1
n_layers = len(layer_names)

RESNET_SPLIT = True
if MODEL_NAME == 'resnet50_imagenet' and RESNET_SPLIT:
    split_tuples = split_resnet_layers_by_blocks(layer_names[conv_layer_mask])[::-1]
else:
    split_tuples = np.array_split(np.arange(n_layers)[conv_layer_mask], CONV_SPLIT)[::-1]
split_tuples.append(np.array_split(np.arange(n_layers)[fc_layer_mask], FC_SPLIT))

In [16]:
compressed_model = copy.deepcopy(model)
for tupl in split_tuples:
    lname, rank, decomposition = layer_names[tupl], ranks[tupl], decompositions[tupl]
    print(lname, rank)
    compressed_model = get_compressed_model(compressed_model,
                                            ranks=rank,
                                            layer_names=lname,
                                            decompositions = decomposition,
                                            vbmf_weaken_factor=WEAKEN_FACTOR)

['features.17' 'features.19' 'features.21' 'features.24' 'features.26'
 'features.28'] [0 0 0 0 0 0]
Decompose layer features.17
	 new rank:  [169, 124]
Decompose layer features.19
	 new rank:  [175, 184]
Decompose layer features.21
	 new rank:  [160, 170]
Decompose layer features.24
	 new rank:  [180, 192]
Decompose layer features.26
	 new rank:  [183, 185]
Decompose layer features.28
	 new rank:  [188, 190]
['features.0' 'features.2' 'features.5' 'features.7' 'features.10'
 'features.12' 'features.14'] [None 0 0 0 0 0 0]
Skip layer features.0
Decompose layer features.2
	 new rank:  [29, 32]
Decompose layer features.5
	 new rank:  [47, 35]
Decompose layer features.7
	 new rank:  [50, 51]
Decompose layer features.10
	 new rank:  [89, 65]
Decompose layer features.12
	 new rank:  [95, 98]
Decompose layer features.14
	 new rank:  [88, 91]
['classifier.0' 'classifier.3' 'classifier.6'] [0 0 0]
Decompose layer classifier.0


  This is separate from the ipykernel package so we can avoid doing imports until


	 new rank:  947
Decompose layer classifier.3
	 new rank:  634
Decompose layer classifier.6
	 new rank:  261


In [21]:
compressed_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Sequential(
      (2-0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (2-1): Conv2d(32, 29, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (2-2): Conv2d(29, 64, kernel_size=(1, 1), stride=(1, 1))
    )
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Sequential(
      (5-0): Conv2d(64, 35, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (5-1): Conv2d(35, 47, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (5-2): Conv2d(47, 128, kernel_size=(1, 1), stride=(1, 1))
    )
    (6): ReLU(inplace)
    (7): Sequential(
      (7-0): Conv2d(128, 51, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (7-1): Conv2d(51, 50, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (7-2): Conv2d(50, 128, kernel_size=

In [18]:
from collections import defaultdict

def count_params(model):
    n_params = 0
    
    for name, param in model.named_parameters():
        n_params += param.numel()
    return n_params

In [19]:
params_count_dict_m = count_params(model)
params_count_dict_cm = count_params(compressed_model)

params_count_dict_m / params_count_dict_cm

3.7107591584709225

In [20]:
split_tuples

[array([ 7,  8,  9, 10, 11, 12]),
 array([0, 1, 2, 3, 4, 5, 6]),
 [array([13, 14, 15])]]