In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]="6" 

## Load model

In [2]:
from torchvision.models import resnet50

device = 'cuda'

model = resnet50(pretrained = True).eval().to(device)

## Get model statistics

Navigate to GitHub repository [flopco-pytorch](https://github.com/juliagusak/flopco-pytorch) to find details on statistics that are counted using FlopCo

In [3]:
from flopco import FlopCo

model_stats = FlopCo(model, device = device)

In [4]:
model_stats.total_flops,  model_stats.relative_flops

(8178370560,
 defaultdict(None,
             {'conv1': 0.02886001585137272,
              'layer1.0.conv1': 0.003141226215115398,
              'layer1.0.conv2': 0.028271035936038583,
              'layer1.0.conv3': 0.012564904860461593,
              'layer1.0.downsample.0': 0.012564904860461593,
              'layer1.1.conv1': 0.012564904860461593,
              'layer1.1.conv2': 0.028271035936038583,
              'layer1.1.conv3': 0.012564904860461593,
              'layer1.2.conv1': 0.012564904860461593,
              'layer1.2.conv2': 0.028271035936038583,
              'layer1.2.conv3': 0.012564904860461593,
              'layer2.0.conv1': 0.025129809720923185,
              'layer2.0.conv2': 0.028271035936038583,
              'layer2.0.conv3': 0.012564904860461593,
              'layer2.0.downsample.0': 0.025129809720923185,
              'layer2.1.conv1': 0.012564904860461593,
              'layer2.1.conv2': 0.028271035936038583,
              'layer2.1.conv3': 0.012564904860

## Compress the model

You can compress the model using diffrenet strategies depending on rank selection method.

- Using any of the below listed compressors, you can optionally specify:
     - which layers will NOT be compressed (```ranks = {lname : None for lname in noncompressing_lnames}```)
     - how many layers to compress before next model fine-tuning (```ft_every = 3```, i.e. compression schedule is as follows: compress 3 layers, fine-tine, compress another 3 layers, fine-tune, ... )
     - how many times to compress each layer (```nglobal_iters = 2```, by default 1)
        

- **CompressorVBMF**:  ranks are determined  by  aglobal analytic solution of variational Bayesian matrix factorization (EVBMF)
    - Tucker2 decomposition is used for nn.Conv2d layers with kernels (n, n), n > 1
    - SVD is used for nn.Linear and nn.Conv2d with kernels (1, 1)
    - You can optionally specify:
        - weakenen factor for VBMF rank(```vbmf_weakenen_factors = {lname : factor for lname in lnames}```)



- **CompressorPR**: ranks correspond to chosen fixed parameter reduction rate (specified for each layer, default: 2x for all layers)

    - Tucker2/CP3/CP4 decomposition is used for nn.Conv2d layers with kernels (n, n), n > 1
    - SVD is used for nn.Linear and nn.Conv2d with kernels (1, 1)
    - You can optionally specify:
        - which decomposition to use for nn.Conv2d layers with kernels (n, n), n > 1 (```conv2d_nn_decomposition = cp3```)
        - parameter reduction rate (```param_reduction_rates``` argument), can be different for each layer



- **CompressorManual**: manualy specified ranks are used

    - Tucker2/CP3/CP4 decomposition is used for nn.Conv2d layers with kernels (n, n), n > 1
    - SVD is used for nn.Linear and nn.Conv2d with kernels (1, 1)
    - You can optionally specify:
        - which decomposition to use for nn.Conv2d layers with kernels (n, n), n > 1 (```conv2d_nn_decomposition = tucker2```)
        - which ranks to use (```ranks = {lname : rank for lname in lnames}```, if you don't want to compress layer set ```None``` instead ```rank``` value)

In [5]:
from musco.pytorch import CompressorVBMF, CompressorPR, CompressorManual

In [10]:
all_lnames = list(model_stats.flops.keys())
print(all_lnames)

noncompressing_lnames = {lname : None for lname in all_lnames[:31] + all_lnames[33:]}

['conv1', 'layer1.0.conv1', 'layer1.0.conv2', 'layer1.0.conv3', 'layer1.0.downsample.0', 'layer1.1.conv1', 'layer1.1.conv2', 'layer1.1.conv3', 'layer1.2.conv1', 'layer1.2.conv2', 'layer1.2.conv3', 'layer2.0.conv1', 'layer2.0.conv2', 'layer2.0.conv3', 'layer2.0.downsample.0', 'layer2.1.conv1', 'layer2.1.conv2', 'layer2.1.conv3', 'layer2.2.conv1', 'layer2.2.conv2', 'layer2.2.conv3', 'layer2.3.conv1', 'layer2.3.conv2', 'layer2.3.conv3', 'layer3.0.conv1', 'layer3.0.conv2', 'layer3.0.conv3', 'layer3.0.downsample.0', 'layer3.1.conv1', 'layer3.1.conv2', 'layer3.1.conv3', 'layer3.2.conv1', 'layer3.2.conv2', 'layer3.2.conv3', 'layer3.3.conv1', 'layer3.3.conv2', 'layer3.3.conv3', 'layer3.4.conv1', 'layer3.4.conv2', 'layer3.4.conv3', 'layer3.5.conv1', 'layer3.5.conv2', 'layer3.5.conv3', 'layer4.0.conv1', 'layer4.0.conv2', 'layer4.0.conv3', 'layer4.0.downsample.0', 'layer4.1.conv1', 'layer4.1.conv2', 'layer4.1.conv3', 'layer4.2.conv1', 'layer4.2.conv2', 'layer4.2.conv3', 'fc']


##### Example 1

In [11]:
compressor = CompressorVBMF(model,
                            model_stats,
                            ranks = noncompressing_lnames,
                            ft_every=1, 
                            nglobal_compress_iters=2)

In [12]:
while not compressor.done:
    print("\n Compress")
    compressor.compression_step()
    
    print('\n Fine-tune')
    


 Compress
layer3.2.conv1 svd

 Fine-tune

 Compress
layer3.2.conv2 tucker2

 Fine-tune

 Compress
layer3.2.conv1 svd

 Fine-tune

 Compress
layer3.2.conv2 tucker2

 Fine-tune


In [18]:
compressor.compressed_model.layer3[2]

Bottleneck(
  (conv1): Sequential(
    (conv1-0): Conv2d(1024, 108, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (conv1-1): Conv2d(108, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
  )
  (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Sequential(
    (conv2-0): Conv2d(256, 61, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (conv2-1): Conv2d(61, 47, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv2-2): Conv2d(47, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
  )
  (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
)

##### Example 2

In [31]:
compressor = CompressorPR(model,
                            model_stats,
                            ranks = noncompressing_lnames,
                            conv2d_nn_decomposition='tucker2',
                            param_reduction_rates = {'layer3.2.conv2' : 1.5},
                            ft_every=1, 
                            nglobal_compress_iters=2)

In [32]:
while not compressor.done:
    print("\n Compress")
    compressor.compression_step()
    
    print('\n Fine-tune')
    


 Compress
layer3.2.conv1 svd

 Fine-tune

 Compress
layer3.2.conv2 tucker2

 Fine-tune

 Compress
layer3.2.conv1 svd

 Fine-tune

 Compress
layer3.2.conv2 tucker2

 Fine-tune


In [33]:
print(compressor)

<class 'musco.pytorch.compressor.compressor.CompressorPR'>: 
{'lnames': ['layer3.2.conv1', 'layer3.2.conv2']}
{'rank_selection': 'param_reduction'}
{'conv2d_nn_decomposition': 'tucker2'}
{'ranks': {'layer3.2.conv1': 51, 'layer3.2.conv2': (129, 129)}}
{'vbmf_wfs': None}
{'param_rrs': {'layer3.2.conv1': 2, 'layer3.2.conv2': 1.5}}
{'decompositions': defaultdict(None, {'layer3.2.conv1': 'svd', 'layer3.2.conv2': 'tucker2'})}
{'ft_every': 1}
{'nglobal_compress_iters': 2}
{'niters': 4}
{'curr_iter': 4}
{'curr_ncompr_layers': 4}
{'done': True}
{'compressed_model': ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): 