In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

### Simple model for test purposes

In [2]:
class SimpleConvNet(nn.Module):
    def __init__(self):
        super(SimpleConvNet, self).__init__()
        
        # First convolutional layer: input channels = 1, output channels = 16, kernel size = 3
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        
        # Second convolutional layer: input channels = 16, output channels = 32, kernel size = 3
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        
        self.fc = nn.Linear(1568, 10)

    def forward(self, x):
        # Apply first convolution, followed by ReLU and max pooling
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        
        # Apply second convolution, followed by ReLU and max pooling
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)

        # Pass through the fully connected layer
        x = x.view(x.size(0), -1)  # Flatten the output
        x = self.fc(x)
        return x

In [3]:
model = SimpleConvNet()
input_tensor = torch.randn(1, 1, 28, 28)  # Example input tensor of shape (batch_size, channels, height, width)
output = model(input_tensor)

In [4]:
def quantize(float_model:torch.nn.Module, 
             input_shape:tuple,
             quant_dir:str, 
             quant_mode:str, 
             device:torch.device):
    """
    :param float_model: float model with loaded weights
    :param input_shape: shape of input(CH,W,H)
    :param quant_dir: path to directory with quantized model components
    :param quant_mode: quant_mode in ['calib', 'test'] 
    :param data_loader: data_loader - for 'calib' must be batch_size == 1
    """
    # available in docker or after packaging 
    # vitis-AI-tools/..../pytorch../pytorch_nndct
    # and installing the package
    print("before import")
    from pytorch_nndct.apis import torch_quantizer, dump_xmodel
    # model to device
    print("Before device")
    model = float_model.to(device)

    # Force to merge BN with CONV for better quantization accuracy
    optimize = 1

    rand_in = torch.randn(input_shape)
    print("get qunatizer start")
    try:
        quantizer = torch_quantizer(
            quant_mode, model, rand_in, output_dir=quant_dir, device=device)
    except Exception as e:
        print("exception:")
        print(e)
        return
    print("get qunatizer end")

    print("get quantized model start")
    quantized_model = quantizer.quant_model
    print("get quantized model end")

    # evaluate
    output = quantized_model(rand_in)

    # export config
    if quant_mode == 'calib':
        print("export config")
        quantizer.export_quant_config()
        print("export config end")
    # export model
    if quant_mode == 'test':
        print("export xmodel")
        quantizer.export_xmodel(deploy_check=False, output_dir=quant_dir)
        print("export xmodel end")


In [None]:
quantize(float_model=model, 
         input_shape=(1, 1, 28, 28),
         quant_dir='quant_dir_simple',
         quant_mode='calib',
         device=torch.device("cpu"),
         )

In [None]:
quantize(float_model=model, 
         input_shape=(1, 1, 28, 28),
         quant_dir='quant_dir_simple',
         quant_mode='test',
         device=torch.device("cpu"),
         )

In [None]:
# compile model
!vai_c_xir --xmodel 'quant_dir_simple/SimpleConvNet_int.xmodel' --arch arch2.json --net_name SimpleConvNet_qu --output_dir build