In [9]:
import tensorrt as trt 
import os 
import numpy as np 
import engine
import pycuda.driver as cuda
import torch 
import pycuda.autoinit

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [1]:
def convert_bytes(num):
    """
    this function will convert bytes to MB.... GB... etc
    """
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if num < 1024.0:
            return "%3.1f %s" % (num, x)
        num /= 1024.0


def file_size(file_path):
    """
    this function will return the file size
    """
    if os.path.isfile(file_path):
        file_info = os.stat(file_path)
        return convert_bytes(file_info.st_size)

In [24]:
# Lets check the file size of MS Paint exe 
# or you can use any file path
file_path = r"/home/airi/yolo/ONNX-TensorRT-Pytorch-Tensorflow-Face-Detection-Models-Quantization/models/yolov8n.engine"
print(file_size(file_path))

14.2 MB


### Is it possible to see tensorrt model weights?
- No, it is not possible to directly view the weights of a TensorRT model. TensorRT is an optimization and inference acceleration library provided by NVIDIA, and it performs various optimizations on the model to improve its runtime performance on NVIDIA GPUs. During the optimization process, TensorRT quantizes and compresses the model, prunes unnecessary operations, and applies other optimizations that can significantly reduce the model size and improve inference speed.
- As a result of these optimizations, the model weights are transformed into a format that is specific to TensorRT and not directly human-readable. The optimized model is typically saved in a binary format, such as a serialized engine file (.engine) or a frozen model file (.uff), which contains the compressed and optimized representation of the model.
- If you need to inspect the weights or parameters of a model, you would typically need to access the original, unoptimized model format, such as a TensorFlow SavedModel or a PyTorch model, before it has been converted and optimized using TensorRT. Once the model has been optimized with TensorRT, it is not straightforward to retrieve the original weights in their original form.
- However, if you have access to the original model and want to explore its weights, you can examine the model using the corresponding deep learning framework's tools and functions. For example, in TensorFlow, you can use the model's APIs to access and inspect individual layer weights. Similarly, in PyTorch, you can inspect the model parameters using the model's state_dict() method.


In [6]:

engine_build = engine.EngineBuilder('/home/airi/yolo/ONNX-TensorRT-Pytorch-Tensorflow-Face-Detection-Models-Quantization/models/yolov8n.onnx' , device)
engine_build.seg = True
engine_build.build(fp16=True,
              input_shape=[1, 3, 640, 640],
              iou_thres=0.65,
              conf_thres=0.25,
              topk=100)

[09/30/2023-13:33:24] [TRT] [W] onnx2trt_utils.cpp:374: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[09/30/2023-13:33:24] [TRT] [W] input "input" with shape: (1, 3, 640, 640) dtype: DataType.FLOAT
[09/30/2023-13:33:24] [TRT] [W] output "output" with shape: (1, 5, 8400) dtype: DataType.FLOAT
[09/30/2023-13:33:24] [TRT] [W] output "onnx::Reshape_699" with shape: (1, 65, 80, 80) dtype: DataType.FLOAT
[09/30/2023-13:33:24] [TRT] [W] output "onnx::Reshape_718" with shape: (1, 65, 40, 40) dtype: DataType.FLOAT
[09/30/2023-13:33:24] [TRT] [W] output "onnx::Reshape_737" with shape: (1, 65, 20, 20) dtype: DataType.FLOAT
[09/30/2023-13:35:53] [TRT] [W] TensorRT encountered issues when converting weights between types and that could affect accuracy.
[09/30/2023-13:35:53] [TRT] [W] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the w

In [19]:
# Quantized file
file_path = r"/home/airi/yolo/ONNX-TensorRT-Pytorch-Tensorflow-Face-Detection-Models-Quantization/quant_models/quantized_yolov8n.engine"
print(file_size(file_path))

8.0 MB
