In [1]:
import numpy as np
import torch
import local_utils
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from local_utils import ResidualBlock
from torch import nn

### In this part of the exercise, we will load the MiniResNet floating-point model we have learned, quantise it into a fixed-point form and finally compile it into a version suitable for Kria.

# 1. Evaluation data

We start by re-creating the data generator on the MNIST database:

We only need the test part itself. We set `batch_size` to 1.

In [2]:
test_dataset  = datasets.MNIST(root='data', train=False, download=True, transform=ToTensor()) #TODO
test_loader  = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) #TODO

In addition, we will prepare a file in `.npz` format based on the test data. We will use it to evaluate the model on the target Kria platform.

Complete the `quantization_data` and `quantization_labels` vectors with data from the `test_loader`. Use the `for` loop  and `.append` to do this (an example of using the DataLoader with a `for` loop is shown in Part 1 when loading the data).

Then concatenate each vector individually with the `torch.cat` function with the `dim=0` parameter and convert them to `ndarray` format using `.numpy()`. 

Save them with the function np.savez.

In [4]:
quantization_data = []
quantization_labels = []

#TODO
#Fill quantization vectors
for data, label in test_loader:
    quantization_data.append(data)
    quantization_labels.append(label)

train_X = torch.cat(quantization_data, dim=0).numpy()   #TODO
train_Y = torch.cat(quantization_labels, dim=0).numpy() #TODO

np.savez('eval_MNIST.npz', data=train_X, targets=train_Y) #TODO

print('Done')

Done


# 2. Initialisation of the floating point model

We create the same MiniResNet class as in the first part of the exercise.

In [5]:
class MiniResNet(nn.Module):
    def __init__(self, 
                 input_shape = (1, 28, 28), #......TODO.........
                 num_of_classes = 10, #......TODO.........
                 ) -> None:
        super().__init__()
        self.CNN = nn.Sequential(
                                nn.Conv2d(input_shape[0], 16, 3, padding=1),
                                nn.ReLU(),
            
                                ResidualBlock(16,4,3),
    
                                nn.Conv2d(16, 32, 3, padding=1),
                                nn.ReLU(),
                                nn.MaxPool2d(2,2),
            
                                ResidualBlock(32,4,3),
                                ResidualBlock(32,2,3),

                                nn.Conv2d(32, 64, 3, padding=1),
                                nn.ReLU(),
                                nn.MaxPool2d(2,2),
                                

                                ResidualBlock(64,8,3),
                                ResidualBlock(64,16,3),
    

                                nn.Conv2d(64, 128, 3),
                                nn.ReLU(),
                                nn.Conv2d(128, 128, 3),
                                nn.ReLU(),
                                )
        CNN_out_shape = [
                         128,
                         input_shape[-2]//2//2 - 3//2*2 - 3//2*2,
                         input_shape[-1]//2//2 - 3//2*2 - 3//2*2
                        ]
        CNN_flatten_len = torch.prod(torch.tensor(CNN_out_shape))

        self.FC = nn.Sequential(
                                # Flatten
                                #......TODO.........
                                nn.Flatten(start_dim=1, end_dim=-1),
                                # Linear (in=CNN_flatten_len, out=number of classes)
                                #......TODO.........
                                nn.Linear(in_features=CNN_flatten_len, out_features=num_of_classes),
                                # Softmax (dimension = 1)
                                #......TODO.........
                                nn.Softmax(dim=1),
                               )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.FC(self.CNN(x))

We create the model and upload the weights from the MNIST.pth file. We write it to the device (only the CPU is available in the docker!) and set it to `.eval()`.

In [7]:
device = torch.device('cpu') #TODO

input_shape = test_loader.dataset[0][0].shape     # CHW format for MNIST
num_of_classes = len(test_loader.dataset.classes) # Number of classes in MNIST
net = MiniResNet(input_shape, num_of_classes) #TODO

pretrainedModel = torch.load('MNIST.pth', map_location=device)  #TODO
net.load_state_dict(pretrainedModel['model'])
net.to(device)
#TODO change to eval
net.eval()

print(device)

cpu


# 3. Evaluation of the floating point model

Before proceeding with quantisation, we will perform a quick evaluation of the floating point model. We will check that the data is properly prepared and that the model has been correctly stored and loaded.

In [8]:
def evaluate(model,
             dataloader,
             evaluator
             ):
    tm = local_utils.TimeMeasurement("Evaluation", len(dataloader))
    with torch.no_grad(), tm:
        score = 0.0
        cntr = 0
        for i, XY in enumerate(dataloader):
            X = XY[0]
            Y = XY[1:]
            y_pred = model(X)
            score = score*cntr + X.shape[0]*evaluator(y_pred, *Y)
            cntr += X.shape[0]
            score /= cntr
            print("\rEvaluation {}/{}. Score = {}".format(i,len(dataloader), score),end='')
        
        print("\rEvaluation {}/{}. Score = {}".format(len(dataloader),len(dataloader), score),end='\n')
    print(tm)

We will load the Accuracy metric from `local_utils`.

In [9]:
metric = local_utils.AccuracyMetric() #TODO

# You can evaluate your floating point model first 
evaluate(net, test_loader, metric)

Evaluation 10000/10000. Score = 0.9830999970436096
Execution time: 7.0:0.0:24:104, processed 10000 frames, throughput: 22.517220101870432 fps.


#### If everything is working correctly and the accuracy obtained is at a similar level to the training, we can move on to quantisation.

# 4 Quantisation of the floating point model

### Vitis AI Quantizer for Post Training Quantisation has two parts.
The first is calibration (mode='calib') - Vitis AI Quantizer analyses the model and adjusts the quantisation parameters.
 
The second part is evaluation/testing (mode='test') - the accuracy of the model is checked (there should not be much change) and the model is exported to .xmodel format.

### For both parts we will use the quantize function.

The function uses the quantizer for PyTorch from the Vitis AI github: https://github.com/Xilinx/Vitis-AI/tree/1.4/tools/Vitis-AI-Quantizer/vai_q_pytorch

In [8]:
def quantize(float_model:torch.nn.Module, 
             input_shape:tuple,
             quant_dir:str, 
             quant_mode:str, 
             device:torch.device,
             dataloader,
             evaluator):
    """
    :param float_model: float model with loaded weights
    :param input_shape: shape of input(CH,W,H)
    :param quant_dir: path to directory with quantized model components
    :param quant_mode: quant_mode in ['calib', 'test'] 
    :param data_loader: data_loader - for 'calib' must be batch_size == 1
    :param evaluator: fcn/obj like: fcn(y_pred, y_ref) -> float 
    """
    tm = local_utils.TimeMeasurement("Quantization", len(dataloader))
    with tm:
        # available in docker or after packaging 
        # vitis-AI-tools/..../pytorch../pytorch_nndct
        # and installing the package
        from pytorch_nndct.apis import torch_quantizer, dump_xmodel
        # model to device
        model = float_model.to(device)

        # Force to merge BN with CONV for better quantization accuracy
        optimize = 1

        rand_in = torch.randn(input_shape)
        print("get qunatizer start")
        try:
            quantizer = torch_quantizer(
                quant_mode, model, rand_in, output_dir=quant_dir, device=device)
        except Exception as e:
            print("exception:")
            print(e)
            return
        print("get qunatizer end")

        print("get quantized model start")
        quantized_model = quantizer.quant_model
        print("get quantized model end")

        # evaluate
        print("testing st")
        evaluate(quantized_model, dataloader, evaluator)
        print("testing end")

        # export config
        if quant_mode == 'calib':
            print("export config")
            quantizer.export_quant_config()
            print("export config end")
        # export model
        if quant_mode == 'test':
            print("export xmodel")
            quantizer.export_xmodel(deploy_check=False, output_dir=quant_dir)
            print("export xmodel end")
    print(tm)

We start with calibration. We specify as input of the function:
- float_model - the floating point model we obtained during training,
- input_shape - the dimension of the input data in [batch, CH, W, H] format,
- quant_dir - folder to which the quantisation result will be saved,
- quant_mode - to choose 'calib' or 'test',
- device - device on which the calculations will be performed (CPU),
- dataloader - the data on which the calculations will be performed,
- evaluator - metric according to which the accuracy will be checked

### Note that quantisation in the calibration mode is slow. For large models and large data dimensions, the amount of data cannot be exaggerated.

In [9]:
# Quantize model - calib - is slow

#TODO
quantize(float_model=..., 
         input_shape=...,
         quant_dir='quant_dir',
         quant_mode='calib',
         device=...,
         dataloader=...,
         evaluator=...
         )

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'

[0;32m[VAIQ_NOTE]: Loading NNDCT kernels...[0m
get qunatizer start

[0;32m[VAIQ_NOTE]: Quantization calibration process start up...[0m

[0;32m[VAIQ_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[VAIQ_NOTE]: =>Parsing MiniResNet...[0m

[0;32m[VAIQ_NOTE]: =>Doing weights equalization...[0m

[0;32m[VAIQ_NOTE]: =>Quantizable module is generated.(quant_dir/MiniResNet.py)[0m
get qunatizer end
get quantized model start

[0;32m[VAIQ_NOTE]: =>Get module with quantization.[0m
get quantized model end
testing st
Evaluation 10000/10000. Score = 0.9817000031471252
Execution time: 5.0:0.0:30:416, processed 10000 frames, throughput: 30.264823388501018 fps.
testing end
export config

[0;32m[VAIQ_NOTE]: =>Exporting quant config.(quant_dir/quant_info.json)[0m
export config end
Execution time: 5.0:0.0:31:397, processed 10000 frames, throughput: 30.175226697334523 fps.


After successful calibration, it is time to test and save the model. We start the function with the mode parameter changed to 'test'.

This process is faster than calibration.

In [10]:
# Quantize model - test - is faster

#TODO
quantize(float_model=..., 
         input_shape=...,
         quant_dir='quant_dir', # directory for quantizer results
         quant_mode='test',
         device=...,
         dataloader=...,
         evaluator=...
         )

get qunatizer start

[0;32m[VAIQ_NOTE]: Quantization test process start up...[0m

[0;32m[VAIQ_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[VAIQ_NOTE]: =>Parsing MiniResNet...[0m

[0;32m[VAIQ_NOTE]: =>Doing weights equalization...[0m

[0;32m[VAIQ_NOTE]: =>Quantizable module is generated.(quant_dir/MiniResNet.py)[0m
get qunatizer end
get quantized model start

[0;32m[VAIQ_NOTE]: =>Get module with quantization.[0m
get quantized model end
testing st
Evaluation 10000/10000. Score = 0.9824000000953674
Execution time: 0.0:0.0:57:169, processed 10000 frames, throughput: 174.9184548731919 fps.
testing end
export xmodel

[0;32m[VAIQ_NOTE]: =>Converting to xmodel ...[0m

[0;32m[VAIQ_NOTE]: =>Successfully convert 'MiniResNet' to xmodel.(quant_dir/MiniResNet_int.xmodel)[0m
export xmodel end
Execution time: 0.0:0.0:57:426, processed 10000 frames, throughput: 174.1343355669612 fps.


After testing, the model should be compiled. Specify the parameters accordingly:

--xmodel - path to the saved model (it is saved in the folder specified during quantisation as parameter 'quant_dir')
--arch - we specify the arch.json file that was in the file. This is the number (fingerprint) that identifies the DPU type of the target hardware
--net_name - the name of our model after compilation - any name
--output_dir - the folder where the model will be saved - any name

In [11]:
# compile model
!vai_c_xir --xmodel 'quant_dir/MiniResNet_int.xmodel' --arch arch.json --net_name MiniResNet_qu --output_dir build

**************************************************
* VITIS_AI Compilation - Xilinx Inc.
**************************************************
[UNILOG][INFO] Compile mode: dpu
[UNILOG][INFO] Debug mode: function
[UNILOG][INFO] Target architecture: DPUCZDX8G_ISA0_B4096_MAX_BG2
[UNILOG][INFO] Graph name: MiniResNet, with op num: 130
[UNILOG][INFO] Begin to compile...
[UNILOG][INFO] Total device subgraph number 3, DPU subgraph number 1
[UNILOG][INFO] Compile done.
[UNILOG][INFO] The meta json is saved to "/workspace/build/meta.json"
[UNILOG][INFO] The compiled xmodel is saved to "/workspace/build/MiniResNet_qu.xmodel"
[UNILOG][INFO] The compiled xmodel's md5sum is 6f353093d80acd35dd41b979bfe4e8e1, and has been saved to "/workspace/build/md5sum.txt"


We will now move on to testing the model on the target hardware.

#### Version 1: Connecting to the network
Before connecting power to the Kria, plug the USB cable into the computer and the Ethernet cable into the network where the computer is located.

Connect the Kria to the power supply and wait one minute for the system to boot up.

Start `cutecom` with `sudo`. Switch on the port corresponding to the Kria. If `kria login:` appears, log in:

`login: ubuntu`.

`password: Xilinx123`.

Once logged in, system information should appear. We are interested in the `IPv4` address for `eth0`. Copy it and add `:9090` to it - example value `192.168.1.26:9090`. Paste this in your browser. Another Jupyter should appear. Log into it with the password:

`xilinx`.

#### Version 2: Connecting to your computer
Before connecting the power to the Kria, connect the USB cable to the PC and the Ethernet cable between the Kria and the PC. On the PC, change the network settings `Wired Setting` -> `IPv4` -> `Shared to other computers`. Turn on power to the board.
Start `cutecom` with `sudo`. Enable the port corresponding to Kria. If `kria login:` appears, log in:

`login: ubuntu`.

`password: Xilinx123`.

Once logged in, system information should appear. We are interested in the `inet` address for `eth0`. Use `ifconfig` if there is no information about ethernet connection. Copy it and add `:9090` to it - example value `10.42.0.47:9090`. Paste this in your browser. Another Jupyter should appear. Log into it with the password:

`xilinx`.

#### Uploading files
Create a new folder and name it `ES_Lab`. Transfer the files to it accordingly:
- dpu.bit, 
- dpu.hwh, 
- dpu.xclbin, 
- eval_MNIST.npz or as you named your evaluation data file
- MiniResNet_compiled.xmodel or as you named your compiled file

This can be done with the `scp` command, but it is easier to drag the files from the folder into Jupyter Notebook.

Translated with www.DeepL.com/Translator (free version)

# ATTENTION! 

If there is a problem with Kria's IPv4 addresses (when several boards are connected to the same network and everyone has the same address), first check that the command `ifconfig` in the `cutecom` console works:

`ifconfig`.

If not, install via:

 `sudo apt install net-tools`.  
 
Password = Xilinx123

After that, unplug the Ethernet cable from the Kria, wait a few seconds and type in the `cutecom` console:

`hostname -I`.

If the console does not return any error and no IP address then type into the `cutecom` console:

`sudo ifconfig eth0 192.168.1.x netmask 255.255.255.0`.

Here the address given should be the same as the example value above. We set the value `x` to a different value than it was, e.g. 123. We want to avoid conflicts between boards and also computers. After that we repeat again:

`hostname -I`.

The address you set should appear. Plug in the Ethernet cable and start Jupyter in the browser with the set IP address. After unplugging the power supply the settings will be reset.