In [10]:
import torchvision
import torch
from PIL import Image
import os
import numpy as np
import onnxruntime
# load the calibrated model
# state_dict = torch.load("quant_resnet50-entropy-1024.pth", map_location="cpu")
# model.load_state_dict(state_dict)

In [11]:
from onnxruntime.quantization import CalibrationDataReader, create_calibrator, write_calibration_table


In [12]:
class ImageNetDataReader(CalibrationDataReader):
    def __init__(self,
                 image_folder,
                 width=1024,
                 height=1024,
                 start_index=0,
                 end_index=0,
                 stride=1,
                 batch_size=1,
                 model_path='augmented_model.onnx',
                 input_name='data'):
        '''
        :param image_folder: image dataset folder
        :param width: image width
        :param height: image height 
        :param start_index: start index of images
        :param end_index: end index of images
        :param stride: image size of each data get 
        :param batch_size: batch size of inference
        :param model_path: model name and path
        :param input_name: model input name
        '''

        self.image_folder = image_folder
        self.model_path = model_path
        self.preprocess_flag = True
        self.enum_data_dicts = iter([])
        self.datasize = 0
        self.width = width
        self.height = height
        self.start_index = start_index
        self.end_index = len(os.listdir(self.image_folder)) if end_index == 0 else end_index
        self.stride = stride if stride >= 1 else 1
        self.batch_size = batch_size
        self.input_name = input_name
        self.sess_options = onnxruntime.SessionOptions()
        self.sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
        self.sess_options.enable_mem_pattern = False
        self.sess_options.use_deterministic_compute = True
        self.sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
        self.sess_options.enable_cpu_mem_arena = False
    def get_dataset_size(self):
        return len(os.listdir(self.image_folder))

    def get_input_name(self):
        if self.input_name:
            return
        session = onnxruntime.InferenceSession(self.model_path, self.sess_options,providers=['CPUExecutionProvider'])
        self.input_name = session.get_inputs()[0].name

    def get_next(self):
        iter_data = next(self.enum_data_dicts, None)
        if iter_data:
            return iter_data

        self.enum_data_dicts = None
        if self.start_index < self.end_index:
            if self.batch_size == 1:
                data = self.load_serial()
            else:
                data = self.load_batches()

            self.start_index += self.stride
            self.enum_data_dicts = iter(data)

            return next(self.enum_data_dicts, None)
        else:
            return None

    def load_serial(self):
        width = self.width
        height = self.width
        nchw_data_list, filename_list, image_size_list = self.preprocess_imagenet(self.image_folder, height, width,
                                                                                  self.start_index, self.stride)
        input_name = self.input_name

        data = []
        for i in range(len(nchw_data_list)):
            nhwc_data = nchw_data_list[i]
            file_name = filename_list[i]
            data.append({input_name: nhwc_data})
        return data

    def load_batches(self):
        width = self.width
        height = self.height
        batch_size = self.batch_size
        stride = self.stride
        input_name = self.input_name

        batches = []
        for index in range(0, stride, batch_size):
            start_index = self.start_index + index
            nchw_data_list, filename_list, image_size_list = self.preprocess_imagenet(
                self.image_folder, height, width, start_index, batch_size)

            if nchw_data_list.size == 0:
                break

            nchw_data_batch = []
            for i in range(len(nchw_data_list)):
                nhwc_data = np.squeeze(nchw_data_list[i], 0)
                nchw_data_batch.append(nhwc_data)
            batch_data = np.concatenate(np.expand_dims(nchw_data_batch, axis=0), axis=0)
            data = {input_name: batch_data}

            batches.append(data)

        return batches

    def preprocess_imagenet(self, images_folder, height, width, start_index=0, size_limit=0):
        '''
        Loads a batch of images and preprocess them
        parameter images_folder: path to folder storing images
        parameter height: image height in pixels
        parameter width: image width in pixels
        parameter start_index: image index to start with   
        parameter size_limit: number of images to load. Default is 0 which means all images are picked.
        return: list of matrices characterizing multiple images
        '''
        def preprocess_images(input, channels=3, height=1024, width=1024):
            image = input.resize((width, height), Image.Resampling.LANCZOS)
            input_data = np.asarray(image).astype(np.float32)
            if len(input_data.shape) != 2:
                input_data = input_data.transpose([2, 0, 1])
            else:
                input_data = np.stack([input_data] * 3)
            mean = np.array([0.079, 0.05, 0]) + 0.406
            std = np.array([0.005, 0, 0.001]) + 0.224
            for channel in range(input_data.shape[0]):
                input_data[channel, :, :] = (input_data[channel, :, :] / 255 - mean[channel]) / std[channel]
            return input_data

        image_names = os.listdir(images_folder)
        image_names.sort()
        if size_limit > 0 and len(image_names) >= size_limit:
            end_index = start_index + size_limit
            if end_index > len(image_names):
                end_index = len(image_names)
            batch_filenames = [image_names[i] for i in range(start_index, end_index)]
        else:
            batch_filenames = image_names

        unconcatenated_batch_data = []
        image_size_list = []

        for image_name in batch_filenames:
            image_filepath = images_folder + '/' + image_name
            img = Image.open(image_filepath)
            
            image_data = preprocess_images(img)
            image_data = np.expand_dims(image_data, 0)
            print(f'img.shape = {image_data.shape}')
            unconcatenated_batch_data.append(image_data)
            image_size_list.append(np.array([img.size[1], img.size[0]], dtype=np.float32).reshape(1, 2))

        batch_data = np.concatenate(np.expand_dims(unconcatenated_batch_data, axis=0), axis=0)
        return batch_data, batch_filenames, image_size_list

In [13]:
from pathlib import Path


In [14]:
checkpoint_dir = Path('')
trt_cache_dir = checkpoint_dir/'trt_engine_cache'
trt_cache_dir.mkdir(parents=True, exist_ok=True)
trt_cache_dir

PosixPath('trt_engine_cache')

In [15]:
from onnxruntime.quantization import CalibrationDataReader, CalibrationMethod, create_calibrator, write_calibration_table

In [16]:
datareader = ImageNetDataReader()

TypeError: ImageNetDataReader.__init__() missing 1 required positional argument: 'image_folder'

In [25]:
class CalibrationDataReaderCV(CalibrationDataReader):
    """
    A subclass of CalibrationDataReader specifically designed for handling
    image data for calibration in computer vision tasks. This reader loads,
    preprocesses, and provides images for model calibration.
    """
    
    def __init__(self, img_file_paths, target_sz, input_name='input'):
        """
        Initializes a new instance of the CalibrationDataReaderCV class.
        
        Args:
            img_file_paths (list): A list of image file paths.
            target_sz (tuple): The target size (width, height) to resize images to.
            input_name (str, optional): The name of the input node in the ONNX model. Default is 'input'.
        """
        super().__init__()  # Initialize the base class
        
        # Initialization of instance variables
        self._img_file_paths = img_file_paths
        self.input_name = input_name
        self.enum = iter(img_file_paths)  # Create an iterator over the image paths
        self.target_sz = target_sz
        
    def get_next(self):
        """
        Retrieves, processes, and returns the next image in the sequence as a NumPy array suitable for model input.
        
        Returns:
            dict: A dictionary with a single key-value pair where the key is `input_name` and the value is the
                  preprocessed image as a NumPy array, or None if there are no more images.
        """
        
        img_path = next(self.enum, None)  # Get the next image path
        if not img_path:
            return None  # If there are no more paths, return None

        # Load the image from the filepath and convert to RGB
        image = Image.open(img_path).convert('RGB')

        # Resize the image to the target size
        input_img = resize_img(image, target_sz=self.target_sz, divisor=1)
        
        # Convert the image to a NumPy array, normalize, and add a batch dimension
        input_tensor_np = np.array(input_img, dtype=np.float32).transpose((2, 0, 1))[None] / 255

        # Return the image in a dictionary under the specified input name
        return {self.input_name: input_tensor_np}

In [21]:
onnx_file_path = '/home/ubuntu/transformer-distillation/ps1-self-no-window.onnx'

In [24]:
# Save path for temporary ONNX model used during calibration process
augmented_model_path = "augmented.onnx"

try:
    # Create a calibrator object for the ONNX model.
    calibrator = create_calibrator(
        model=onnx_file_path, 
        op_types_to_calibrate=None, 
        augmented_model_path=augmented_model_path, 
        calibrate_method=CalibrationMethod.MinMax
    )

    # Set the execution providers for the calibrator.
    calibrator.set_execution_providers(["CUDAExecutionProvider", "CPUExecutionProvider"])

    # Initialize the custom CalibrationDataReader object
    calibration_data_reader = CalibrationDataReaderCV(img_file_paths=sample_img_paths, 
                                                      target_sz=target_sz, 
                                                      input_name=calibrator.model.graph.input[0].name)

    # Collect calibration data using the specified data reader.
    calibrator.collect_data(data_reader=calibration_data_reader)

    # Initialize an empty dictionary to hold the new compute range values.
    new_compute_range = {}

    # Compute data and update the compute range for each key in the calibrator's data.
    for k, v in calibrator.compute_data().data.items():
        # Extract the min and max values from the range_value.
        v1, v2 = v.range_value
        # Convert the min and max values to float and store them in the new_compute_range dictionary.
        new_compute_range[k] = (float(v1.item()), float(v2.item()))
        
    # Write the computed calibration table to the specified directory.
    write_calibration_table(new_compute_range, dir=str(trt_cache_dir))

    
except Exception as e:
    # Catch any exceptions that occur during the calibration process.
    print("An error occurred:", e)

# finally:
    # Remove temporary ONNX file created during the calibration process
    # if augmented_model_path.exists():
    #     augmented_model_path.unlink()

An error occurred: name 'CalibrationDataReaderCV' is not defined


In [18]:
loader = ImageNetDataReader('../input_rotation/testA',width=1024,
                            height=1024,start_index=0,end_index=0,
                            stride=1,batch_size=1,
                            model_path='/home/ubuntu/transformer-distillation/width1.onnx',
                            input_name='input')

In [5]:
import os

In [6]:
!python -m onnxruntime.quantization.preprocess --input /home/ubuntu/transformer-distillation/ps1.onnx --output /home/ubuntu/transformer-distillation/width2-pre2.onnx --skip_optimization True --verbose 3

In [7]:
# !python -m onnxruntime.quantization.preprocess --help

# Checking that onnxruntime works on the model

In [8]:
import onnxruntime

In [9]:
input = torch.randn(1,3,1024,1024)
providers = [
    ('CPUExecutionProvider')]

sess_options = onnxruntime.SessionOptions()
sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
sess_options.enable_mem_pattern = False
sess_options.use_deterministic_compute = True
sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
sess_options.enable_cpu_mem_arena = False


session = onnxruntime.InferenceSession("/home/ubuntu/transformer-distillation/width2-pre.onnx",sess_options,providers=providers)

input_data = input.cpu().numpy().astype(np.float32)
ort_inputs = {session.get_inputs()[0].name: input_data}
onnx_output = session.run(None, ort_inputs)

# Attempting to quantize the model

In [10]:
onnxruntime.quantization.shape_inference.quant_pre_process('/home/ubuntu/transformer-distillation/ps1.onnx', '/home/ubuntu/transformer-distillation/ps2.onnx', skip_symbolic_shape=False, skip_optimization=True)

In [11]:
def quantize_onnx_model(onnx_model_path, quantized_model_path):
    from onnxruntime.quantization import quantize_static,quantize_dynamic, QuantType
    import onnx
    onnx_opt_model = onnx.load(onnx_model_path)
    quantize_static(onnx_model_path,
                     quantized_model_path,
                     calibration_data_reader=loader,
                     weight_type=QuantType.QInt8,extra_options={'MatMulConstBOnly':True})

    print(f"quantized model saved to:{quantized_model_path}")
    print('ONNX full precision model size (MB):', os.path.getsize(onnx_model_path)/(1024*1024))
    print('ONNX quantized model size (MB):', os.path.getsize(quantized_model_path)/(1024*1024))

quantize_onnx_model('/home/ubuntu/transformer-distillation/ps2.onnx', '/home/ubuntu/transformer-distillation/width2-q.onnx')

img.shape = (1, 3, 1024, 1024)


[1;31m2024-07-23 15:54:42.119576678 [E:onnxruntime:, sequential_executor.cc:516 ExecuteKernel] Non-zero status code returned while running ReduceMax node. Name:'/model/up_levels.2/up_levels.2.0/self_attn/Slice_18_output_0_ReduceMax' Status Message: [m


RuntimeException: [ONNXRuntimeError] : 6 : RUNTIME_EXCEPTION : Non-zero status code returned while running ReduceMax node. Name:'/model/up_levels.2/up_levels.2.0/self_attn/Slice_18_output_0_ReduceMax' Status Message: 

In [None]:
onnxruntime.__version__

'1.16.3'