# 1. Prepare model

In [1]:
import torch
import torch.nn.functional as F

class NaiveModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch.nn.Conv2d(1, 20, 5, 1)
        self.conv2 = torch.nn.Conv2d(20, 50, 5, 1)
        self.fc1 = torch.nn.Linear(4 * 4 * 50, 500)
        self.fc2 = torch.nn.Linear(500, 10)
        self.relu1 = torch.nn.ReLU6()
        self.relu2 = torch.nn.ReLU6()
        self.relu3 = torch.nn.ReLU6()
        self.max_pool1 = torch.nn.MaxPool2d(2, 2)
        self.max_pool2 = torch.nn.MaxPool2d(2, 2)

    def forward(self, x):
        x = self.relu1(self.conv1(x))
        x = self.max_pool1(x)
        x = self.relu2(self.conv2(x))
        x = self.max_pool2(x)
        x = x.view(-1, x.size()[1:].numel())
        x = self.relu3(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [2]:
# define model, optimizer, criterion, data_loader, trainer, evaluator.

import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = NaiveModel().to(device)

optimizer = optim.Adadelta(model.parameters(), lr=1)

criterion = torch.nn.NLLLoss()

transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000)

def trainer(model, optimizer, criterion, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def evaluator(model):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    acc = 100 * correct / len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), acc))

    return acc

In [3]:
# pre-train model for 3 epoches.

scheduler = StepLR(optimizer, step_size=1, gamma=0.7)

for epoch in range(0, 3):
    trainer(model, optimizer, criterion, epoch)
    evaluator(model)
    scheduler.step()


Test set: Average loss: 0.0457, Accuracy: 9857/10000 (99%)


Test set: Average loss: 0.0302, Accuracy: 9902/10000 (99%)


Test set: Average loss: 0.0259, Accuracy: 9920/10000 (99%)



In [4]:
# show all op_name and op_type in the model.

[print('op_name: {}\nop_type: {}\n'.format(name, type(module))) for name, module in model.named_modules()]

op_name: 
op_type: <class '__main__.NaiveModel'>

op_name: conv1
op_type: <class 'torch.nn.modules.conv.Conv2d'>

op_name: conv2
op_type: <class 'torch.nn.modules.conv.Conv2d'>

op_name: fc1
op_type: <class 'torch.nn.modules.linear.Linear'>

op_name: fc2
op_type: <class 'torch.nn.modules.linear.Linear'>

op_name: relu1
op_type: <class 'torch.nn.modules.activation.ReLU6'>

op_name: relu2
op_type: <class 'torch.nn.modules.activation.ReLU6'>

op_name: relu3
op_type: <class 'torch.nn.modules.activation.ReLU6'>

op_name: max_pool1
op_type: <class 'torch.nn.modules.pooling.MaxPool2d'>

op_name: max_pool2
op_type: <class 'torch.nn.modules.pooling.MaxPool2d'>



[None, None, None, None, None, None, None, None, None, None]

In [5]:
# show the weight size of `conv1`.

print(model.conv1.weight.data.size())

torch.Size([20, 1, 5, 5])


In [6]:
# show the weight of `conv1`.

print(model.conv1.weight.data)

tensor([[[[ 1.5338e-01, -1.1766e-01, -2.6654e-01, -2.9445e-02, -1.4650e-01],
          [-1.8796e-01, -2.9882e-01,  6.9725e-02,  2.1561e-01,  6.5688e-02],
          [ 1.5274e-01, -9.8471e-03,  3.2303e-01,  1.3472e-03,  1.7235e-01],
          [ 1.1804e-01,  2.2535e-01, -8.3370e-02, -3.4553e-02, -1.2529e-01],
          [-6.6012e-02, -2.0272e-02, -1.8797e-01, -4.6882e-02, -8.3206e-02]]],


        [[[-1.2112e-01,  7.0756e-02,  5.0446e-02,  1.5156e-01, -2.7929e-02],
          [-1.9744e-01, -2.1336e-03,  7.2534e-02,  6.2336e-02,  1.6039e-01],
          [-6.7510e-02,  1.4636e-01,  7.1972e-02, -8.9118e-02, -4.0895e-02],
          [ 2.9499e-02,  2.0788e-01, -1.4989e-01,  1.1668e-01, -2.8503e-01],
          [ 8.1894e-02, -1.4489e-01, -4.2038e-02, -1.2794e-01, -5.0379e-02]]],


        [[[ 3.8332e-02, -1.4270e-01, -1.9585e-01,  2.2653e-01,  1.0104e-01],
          [-2.7956e-03, -1.4108e-01, -1.4694e-01, -1.3525e-01,  2.6959e-01],
          [ 1.9522e-01, -1.2281e-01, -1.9173e-01, -1.8910e-02,  3.15

# 2. Prepare config_list for pruning

In [7]:
# we will prune 50% weights in `conv1`.

config_list = [{
    'sparsity': 0.5,
    'op_types': ['Conv2d'],
    'op_names': ['conv1']
}]

# 3. Choose a pruner and pruning

In [8]:
# use l1filter pruner to prune the model

from nni.algorithms.compression.pytorch.pruning import L1FilterPruner

# Note that if you use a compressor that need you to pass a optimizer,
# you need a new optimizer instead of you have used above, because NNI might modify the optimizer.
# And of course this modified optimizer can not be used in finetuning.
pruner = L1FilterPruner(model, config_list)

In [9]:
# we can find the `conv1` has been wrapped, the origin `conv1` changes to `conv1.module`.
# the weight of conv1 will modify by `weight * mask` in `forward()`. The initial mask is a `ones_like(weight)` tensor.

[print('op_name: {}\nop_type: {}\n'.format(name, type(module))) for name, module in model.named_modules()]

op_name: 
op_type: <class '__main__.NaiveModel'>

op_name: conv1
op_type: <class 'nni.compression.pytorch.compressor.PrunerModuleWrapper'>

op_name: conv1.module
op_type: <class 'torch.nn.modules.conv.Conv2d'>

op_name: conv2
op_type: <class 'torch.nn.modules.conv.Conv2d'>

op_name: fc1
op_type: <class 'torch.nn.modules.linear.Linear'>

op_name: fc2
op_type: <class 'torch.nn.modules.linear.Linear'>

op_name: relu1
op_type: <class 'torch.nn.modules.activation.ReLU6'>

op_name: relu2
op_type: <class 'torch.nn.modules.activation.ReLU6'>

op_name: relu3
op_type: <class 'torch.nn.modules.activation.ReLU6'>

op_name: max_pool1
op_type: <class 'torch.nn.modules.pooling.MaxPool2d'>

op_name: max_pool2
op_type: <class 'torch.nn.modules.pooling.MaxPool2d'>



[None, None, None, None, None, None, None, None, None, None, None]

In [10]:
# compress the model, the mask will be updated.

pruner.compress()

NaiveModel(
  (conv1): PrunerModuleWrapper(
    (module): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  )
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
  (relu1): ReLU6()
  (relu2): ReLU6()
  (relu3): ReLU6()
  (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [11]:
# show the mask size of `conv1`

print(model.conv1.weight_mask.size())

torch.Size([20, 1, 5, 5])


In [12]:
# show the mask of `conv1`

print(model.conv1.weight_mask)

tensor([[[[1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1.]]],


        [[[0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.]]],


        [[[1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1.],
          [1., 1., 1., 1., 1.]]],


        [[[0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.]]],


        [[[0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.]]],


        [[[0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0.]]],


        [[[0., 0

In [13]:
# use a dummy input to apply the sparsify.

model(torch.rand(1, 1, 28, 28).to(device))

# the weights of `conv1` have been sparsified.

print(model.conv1.module.weight.data)

tensor([[[[ 1.5338e-01, -1.1766e-01, -2.6654e-01, -2.9445e-02, -1.4650e-01],
          [-1.8796e-01, -2.9882e-01,  6.9725e-02,  2.1561e-01,  6.5688e-02],
          [ 1.5274e-01, -9.8471e-03,  3.2303e-01,  1.3472e-03,  1.7235e-01],
          [ 1.1804e-01,  2.2535e-01, -8.3370e-02, -3.4553e-02, -1.2529e-01],
          [-6.6012e-02, -2.0272e-02, -1.8797e-01, -4.6882e-02, -8.3206e-02]]],


        [[[-0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00, -0.0000e+00],
          [-0.0000e+00, -0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
          [-0.0000e+00,  0.0000e+00,  0.0000e+00, -0.0000e+00, -0.0000e+00],
          [ 0.0000e+00,  0.0000e+00, -0.0000e+00,  0.0000e+00, -0.0000e+00],
          [ 0.0000e+00, -0.0000e+00, -0.0000e+00, -0.0000e+00, -0.0000e+00]]],


        [[[ 3.8332e-02, -1.4270e-01, -1.9585e-01,  2.2653e-01,  1.0104e-01],
          [-2.7956e-03, -1.4108e-01, -1.4694e-01, -1.3525e-01,  2.6959e-01],
          [ 1.9522e-01, -1.2281e-01, -1.9173e-01, -1.8910e-02,  3.15

In [14]:
# export the sparsified model state to './pruned_naive_mnist_l1filter.pth'.
# export the mask to './mask_naive_mnist_l1filter.pth'.

pruner.export_model(model_path='pruned_naive_mnist_l1filter.pth', mask_path='mask_naive_mnist_l1filter.pth')

[2021-07-26 22:26:05] INFO (nni.compression.pytorch.compressor/MainThread) Model state_dict saved to pruned_naive_mnist_l1filter.pth
[2021-07-26 22:26:05] INFO (nni.compression.pytorch.compressor/MainThread) Mask dict saved to mask_naive_mnist_l1filter.pth


# 4. Speed Up

In [15]:
# If you use a wrapped model, don't forget to unwrap it.

pruner._unwrap_model()

# the model has been unwrapped.

print(model)

NaiveModel(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
  (relu1): ReLU6()
  (relu2): ReLU6()
  (relu3): ReLU6()
  (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [16]:
from nni.compression.pytorch import ModelSpeedup

m_speedup = ModelSpeedup(model, dummy_input=torch.rand(10, 1, 28, 28).to(device), masks_file='mask_naive_mnist_l1filter.pth')
m_speedup.speedup_model()

  x = x.view(-1, x.size()[1:].numel())


[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) start to speed up the model
[2021-07-26 22:26:18] INFO (FixMaskConflict/MainThread) {'conv1': 1, 'conv2': 1}


[2021-07-26 22:26:18] INFO (FixMaskConflict/MainThread) dim0 sparsity: 0.500000
[2021-07-26 22:26:18] INFO (FixMaskConflict/MainThread) dim1 sparsity: 0.000000
[2021-07-26 22:26:18] INFO (FixMaskConflict/MainThread) Dectected conv prune dim" 0
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) infer module masks...
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for conv1
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for relu1
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for max_pool1
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for conv2
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for relu2
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update mask for max_pool2
[2021-07-26 2

[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update indirect sparsity for relu3
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the relu3


[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update indirect sparsity for fc1
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the fc1
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update indirect sparsity for .aten::view.9
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the .aten::view.9
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update indirect sparsity for max_pool2
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the max_pool2
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update indirect sparsity for relu2
[2021-07-26 22:26:18] INFO (nni.compression.pytorch.speedup.compressor/MainThread) Update the indirect sparsity for the relu2
[202

In [17]:
# the `conv1` has been replace from `Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))` to `Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))`
# and the following layer `conv2` has also changed because the input channel of `conv2` should aware the output channel of `conv1`.

print(model)

NaiveModel(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(10, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
  (relu1): ReLU6()
  (relu2): ReLU6()
  (relu3): ReLU6()
  (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)


In [18]:
# finetune the model to recover the accuracy.

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(0, 1):
    trainer(model, optimizer, criterion, epoch)
    evaluator(model)


Test set: Average loss: 0.0257, Accuracy: 9917/10000 (99%)



# 5. Prepare config_list for quantization

In [19]:
config_list = [{
    'quant_types': ['weight'],
    'quant_bits': {'weight': 8},
    'op_names': ['conv1', 'conv2']
}]

# 6. Choose a quantizer and quantizing

In [20]:
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer

quantizer = QAT_Quantizer(model, config_list, optimizer)
quantizer.compress()

NaiveModel(
  (conv1): QuantizerModuleWrapper(
    (module): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  )
  (conv2): QuantizerModuleWrapper(
    (module): Conv2d(10, 50, kernel_size=(5, 5), stride=(1, 1))
  )
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
  (relu1): ReLU6()
  (relu2): ReLU6()
  (relu3): ReLU6()
  (max_pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (max_pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

In [21]:
# finetune the model for calibration.

for epoch in range(0, 1):
    trainer(model, optimizer, criterion, epoch)
    evaluator(model)


Test set: Average loss: 0.0247, Accuracy: 9917/10000 (99%)



In [22]:
# export the sparsified model state to './quantized_naive_mnist_l1filter.pth'.
# export the calibration config to './calibration_naive_mnist_l1filter.pth'.

quantizer.export_model(model_path='quantized_naive_mnist_l1filter.pth', calibration_path='calibration_naive_mnist_l1filter.pth')

[2021-07-26 22:34:41] INFO (nni.compression.pytorch.compressor/MainThread) Model state_dict saved to quantized_naive_mnist_l1filter.pth
[2021-07-26 22:34:41] INFO (nni.compression.pytorch.compressor/MainThread) Mask dict saved to calibration_naive_mnist_l1filter.pth


{'conv1': {'weight_bit': 8,
  'tracked_min_input': -0.42417848110198975,
  'tracked_max_input': 2.8212687969207764},
 'conv2': {'weight_bit': 8,
  'tracked_min_input': 0.0,
  'tracked_max_input': 4.246923446655273}}

# 7. Speed Up

In [None]:
# speed up with tensorRT

engine = ModelSpeedupTensorRT(model, (32, 1, 28, 28), config=calibration_config, batchsize=32)
engine.compress()