In [1]:
# Import Python Standard Library dependencies
import json
import os
from pathlib import Path
import random

# Import utility functions
# from cjm_psl_utils.core import download_file, file_extract
# from cjm_pil_utils.core import resize_img, get_img_files
from PIL import Image
# Import numpy
import numpy as np

# Import the pandas package
import pandas as pd

# Do not truncate the contents of cells and display all rows and columns
pd.set_option('max_colwidth', None, 'display.max_rows', None, 'display.max_columns', None)

# Import PIL for image manipulation
from PIL import Image

# Import ONNX dependencies
import onnxruntime as ort # Import the ONNX Runtime
from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
from onnxruntime.quantization import CalibrationDataReader, CalibrationMethod, create_calibrator, write_calibration_table


In [2]:
onnx_file_path = '/home/ubuntu/transformer-distillation/ps1-self-no-window.onnx'
sample_img_paths = ['img_5001.png','img_5002.png']
trt_cache_dir = 'cache'

In [3]:
class CalibrationDataReaderCV(CalibrationDataReader):
    """
    A subclass of CalibrationDataReader specifically designed for handling
    image data for calibration in computer vision tasks. This reader loads,
    preprocesses, and provides images for model calibration.
    """
    
    def __init__(self, img_file_paths, target_sz, input_name='input'):
        """
        Initializes a new instance of the CalibrationDataReaderCV class.
        
        Args:
            img_file_paths (list): A list of image file paths.
            target_sz (tuple): The target size (width, height) to resize images to.
            input_name (str, optional): The name of the input node in the ONNX model. Default is 'input'.
        """
        super().__init__()  # Initialize the base class
        
        # Initialization of instance variables
        self._img_file_paths = img_file_paths
        self.input_name = input_name
        self.enum = iter(img_file_paths)  # Create an iterator over the image paths
        self.target_sz = target_sz
        
    def get_next(self):
        """
        Retrieves, processes, and returns the next image in the sequence as a NumPy array suitable for model input.
        
        Returns:
            dict: A dictionary with a single key-value pair where the key is `input_name` and the value is the
                  preprocessed image as a NumPy array, or None if there are no more images.
        """
        
        img_path = next(self.enum, None)  # Get the next image path
        if not img_path:
            return None  # If there are no more paths, return None

        # Load the image from the filepath and convert to RGB
        image = Image.open(img_path).convert('RGB')

        # Resize the image to the target size
        input_img = image #resize_img(image, target_sz=self.target_sz, divisor=1)
        
        # Convert the image to a NumPy array, normalize, and add a batch dimension
        input_tensor_np = np.array(input_img, dtype=np.float32).transpose((2, 0, 1))[None] / 255

        # Return the image in a dictionary under the specified input name
        return {self.input_name: input_tensor_np}

In [4]:
%%time

# Save path for temporary ONNX model used during calibration process
augmented_model_path = onnx_file_path.replace('.onnx', '') + '-augmented.onnx'

try:
    # Create a calibrator object for the ONNX model.
    calibrator = create_calibrator(
        model=onnx_file_path, 
        op_types_to_calibrate=None, 
        augmented_model_path=augmented_model_path, 
        calibrate_method=CalibrationMethod.MinMax
    )

    # Set the execution providers for the calibrator.
    calibrator.set_execution_providers(["CUDAExecutionProvider", "CPUExecutionProvider"])

    # Initialize the custom CalibrationDataReader object
    calibration_data_reader = CalibrationDataReaderCV(img_file_paths=sample_img_paths, 
                                                      target_sz=1024, 
                                                      input_name=calibrator.model.graph.input[0].name)

    # Collect calibration data using the specified data reader.
    calibrator.collect_data(data_reader=calibration_data_reader)

    # Initialize an empty dictionary to hold the new compute range values.
    new_compute_range = {}

    # Compute data and update the compute range for each key in the calibrator's data.
    for k, v in calibrator.compute_data().data.items():
        # Extract the min and max values from the range_value.
        v1, v2 = v.range_value
        # Convert the min and max values to float and store them in the new_compute_range dictionary.
        new_compute_range[k] = (float(v1.item()), float(v2.item()))
        
    # Write the computed calibration table to the specified directory.
    write_calibration_table(new_compute_range, dir=str(trt_cache_dir))
    
except Exception as e:
    # Catch any exceptions that occur during the calibration process.
    print("An error occurred:", e)

# finally:
#     Remove temporary ONNX file created during the calibration process
#     if augmented_model_path.exists():
#         augmented_model_path.unlink()

[0;93m2024-07-24 13:36:48.805163757 [W:onnxruntime:, transformer_memcpy.cc:74 ApplyImpl] 650 Memcpy nodes are added to the graph main_graph for CUDAExecutionProvider. It might have negative impact on performance (including unable to run CUDA graph). Set session_options.log_severity_level=1 to see the detail logs before this message.[m


CPU times: user 44.9 s, sys: 3.6 s, total: 48.5 s
Wall time: 47.1 s


In [5]:
pd.DataFrame(['/home/ubuntu/transformer-distillation/cache/calibration.flatbuffers','/home/ubuntu/transformer-distillation/cache/calibration.cache','/home/ubuntu/transformer-distillation/cache/calibration.json'])

Unnamed: 0,0
0,/home/ubuntu/transformer-distillation/cache/calibration.flatbuffers
1,/home/ubuntu/transformer-distillation/cache/calibration.cache
2,/home/ubuntu/transformer-distillation/cache/calibration.json


In [6]:
ort.get_available_providers()

['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']

In [7]:
import subprocess

In [8]:
import onnx

In [9]:
onnx_file_path = 'ps1-self-no-window.onnx'
from onnx import shape_inference

# Load the ONNX model
model_path ='ps1-self-no-window.onnx'
model = onnx.load(model_path)

# Perform shape inference
inferred_model = shape_inference.infer_shapes(model)

# Save the inferred model (optional)
onnx.save(inferred_model, 'ps1-self-no-window-nothing.onnx')

In [10]:
onnx_file_path = 'ps1-self-no-window-nothing.onnx'

In [11]:
# command2 = ["python", "-m", "onnxruntime.quantization.preprocess","--input", onnx_file_path,"--output", onnx_file_path]
# # Run the command
# _ = subprocess.run(command2)


In [12]:
# import onnx
# from onnx import numpy_helper

# # Load the ONNX model
# model_path = onnx_file_path
# model = onnx.load(model_path)

# # Print the model's input and output types
# def print_value_info(value_info):
#     for value in value_info:
#         print(f"Name: {value.name}, Type: {onnx.helper.printable_type(value.type)}")

# # Print the data types of all inputs
# print("Model Inputs:")
# print_value_info(model.graph.input)

# # Print the data types of all outputs
# print("Model Outputs:")
# print_value_info(model.graph.output)

# # Print the data types of all intermediate tensors
# print("Intermediate Tensors:")
# print_value_info(model.graph.value_info)

: 

In [13]:
providers = [
    ('TensorrtExecutionProvider', {
        'device_id': 0, # The device ID
        'trt_max_workspace_size': 24e9, # Maximum workspace size for TensorRT engine (1e9 ≈ 1GB)
        'trt_engine_cache_enable': False, # Enable TensorRT engine caching
        'trt_engine_cache_path': str(trt_cache_dir), # Path for TensorRT engine, profile files, and INT8 calibration table
        'trt_int8_enable': True, # Enable INT8 mode in TensorRT
        'trt_int8_calibration_table_name': 'calibration.flatbuffers', # INT8 calibration table file for non-QDQ models in INT8 mode
    })
]

sess_opt = ort.SessionOptions()
sess_opt.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
sess_opt.enable_mem_pattern = False
sess_opt.use_deterministic_compute = True
sess_opt.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
sess_opt.enable_cpu_mem_arena = False

# Load the model and create an InferenceSession
session = ort.InferenceSession(onnx_file_path, sess_options=sess_opt, providers=providers)