In [1]:
import torch

def check_cuda():
    if torch.cuda.is_available():
        print(f"CUDA is available. GPU: {torch.cuda.get_device_name(0)}")
        print("CUDA version:", torch.version.cuda)
    else:
        print("CUDA is not available. Please activate GPY")

check_cuda()

CUDA is available. GPU: Tesla T4
CUDA version: 12.1


In [2]:
!pip install roboflow
!pip install ultralytics
!pip install torch torchvision torchaudio

Collecting roboflow
  Downloading roboflow-1.1.29-py3-none-any.whl.metadata (9.3 kB)
Collecting certifi==2023.7.22 (from roboflow)
  Downloading certifi-2023.7.22-py3-none-any.whl.metadata (2.2 kB)
Collecting chardet==4.0.0 (from roboflow)
  Downloading chardet-4.0.0-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting cycler==0.10.0 (from roboflow)
  Downloading cycler-0.10.0-py2.py3-none-any.whl.metadata (722 bytes)
Collecting idna==2.10 (from roboflow)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting opencv-python-headless==4.8.0.74 (from roboflow)
  Downloading opencv_python_headless-4.8.0.74-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting python-magic (from roboflow)
  Downloading python_magic-0.4.27-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading roboflow-1.1.29-py3-none-any.whl (74 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.9/74.9 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0

# Data preparing

In [3]:
import os
import shutil
import numpy as np

# Paths to VOC directories
base_dir = '/kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val'
images_dir = os.path.join(base_dir, 'JPEGImages')
annotations_dir = os.path.join(base_dir, 'Annotations')
sets_dir = os.path.join(base_dir, 'ImageSets/Main')

# Setup new directory structure
split_base = '/kaggle/working/yolo_dataset'
train_images_dir = os.path.join(split_base, 'images/train')
val_images_dir = os.path.join(split_base, 'images/val')
train_labels_dir = os.path.join(split_base, 'labels/train')
val_labels_dir = os.path.join(split_base, 'labels/val')

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

# Function to read predefined splits
def get_files_from_split(file_path):
    with open(file_path, 'r') as f:
        files = [x.strip().split()[0] for x in f.readlines() if not x.startswith('#')]
    return files

# Get all files from trainval.txt
trainval_files = get_files_from_split(os.path.join(sets_dir, 'trainval.txt'))
np.random.shuffle(trainval_files)  # Shuffle to randomize the split

split_index = int(len(trainval_files) * 0.9)
train_files = trainval_files[:split_index]
val_files = trainval_files[split_index:]

# Function for copying files remains the same
def copy_files(files, source_dir, target_dir, file_ext):
    for f in files:
        shutil.copy(os.path.join(source_dir, f + file_ext), os.path.join(target_dir, f + file_ext))

# Proceed with copying images and annotations as before
copy_files(train_files, images_dir, train_images_dir, '.jpg')
copy_files(val_files, images_dir, val_images_dir, '.jpg')
copy_files(train_files, annotations_dir, train_labels_dir, '.xml')
copy_files(val_files, annotations_dir, val_labels_dir, '.xml')

print(f"Total images (trainval): {len(trainval_files)}")
print(f"Training images: {len(train_files)}")
print(f"Validation images: {len(val_files)}")


Total images (trainval): 11540
Training images: 10386
Validation images: 1154


In [4]:
import xml.etree.ElementTree as ET
import os
import shutil
import numpy as np
# Add the convert_annotation function here (assuming you've defined class_mapping)
class_mapping = {
    'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3,
    'bottle': 4, 'bus': 5, 'car': 6, 'cat': 7,
    'chair': 8, 'cow': 9, 'diningtable': 10, 'dog': 11,
    'horse': 12, 'motorbike': 13, 'person': 14, 'pottedplant': 15,
    'sheep': 16, 'sofa': 17, 'train': 18, 'tvmonitor': 19
}

def convert_annotation(annotation_path, output_path):
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    with open(output_path, 'w') as out_file:
        for obj in root.iter('object'):
            difficult = obj.find('difficult').text
            cls_name = obj.find('name').text
            if int(difficult) == 1 or cls_name not in class_mapping:
                continue
            cls_id = class_mapping[cls_name]
            xmlbox = obj.find('bndbox')
            b = (float(xmlbox.find('xmin').text), float(xmlbox.find('ymin').text),
                 float(xmlbox.find('xmax').text), float(xmlbox.find('ymax').text))
            x_center = ((b[0] + b[2]) / 2) / w
            y_center = ((b[1] + b[3]) / 2) / h
            bbox_width = (b[2] - b[0]) / w
            bbox_height = (b[3] - b[1]) / h
            out_file.write(f"{cls_id} {x_center} {y_center} {bbox_width} {bbox_height}\n")

# Adjusted function to handle XML to TXT conversion
def process_and_convert_files(files, img_source_dir, img_target_dir, ann_source_dir, ann_target_dir):
    for f in files:
        # Copy images
        img_path = os.path.join(img_source_dir, f + '.jpg')
        shutil.copy(img_path, img_target_dir)
        
        # Convert annotations
        xml_path = os.path.join(ann_source_dir, f + '.xml')
        txt_path = os.path.join(ann_target_dir, f + '.txt')
        convert_annotation(xml_path, txt_path)

# Process training files
process_and_convert_files(train_files, images_dir, train_images_dir, annotations_dir, train_labels_dir)

# Process validation files
process_and_convert_files(val_files, images_dir, val_images_dir, annotations_dir, val_labels_dir)

print(f"Total images (trainval): {len(trainval_files)}")
print(f"Training images: {len(train_files)}")
print(f"Validation images: {len(val_files)}")

Total images (trainval): 11540
Training images: 10386
Validation images: 1154


In [5]:
import wandb
from ultralytics import YOLO

API_KEY="7830bc715eef67f16fd7b2db8966fdf7b7b53c9d"
wandb.login(key=API_KEY)
# Initialize wandb
run = wandb.init()

# Replace 'your_project_name/model_name:version' with the actual artifact name
artifact = run.use_artifact('uncategorized/run_rzzkherx_model:v0', type='model')
artifact_dir = artifact.download()

# Assuming the model is saved as 'best.pt' in the artifact
model_path = artifact_dir + '/best.pt'
model = YOLO(model_path)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mvenkatasatish4499[0m ([33mvenkatasatish[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m:   1 of 1 files downloaded.  


In [6]:
from ultralytics import YOLO

# Load the model
#model_path='/kaggle/working/artifacts/run_wivfpt02_model:v0/best.pt'
model = YOLO(model_path)

In [7]:
path = model.export(format="onnx")

Ultralytics YOLOv8.2.19 🚀 Python-3.10.13 torch-2.1.2 CPU (Intel Xeon 2.00GHz)
Model summary (fused): 168 layers, 3009548 parameters, 0 gradients, 8.1 GFLOPs

[34m[1mPyTorch:[0m starting from '/kaggle/working/artifacts/run_rzzkherx_model:v0/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 24, 8400) (6.0 MB)

[34m[1mONNX:[0m starting export with onnx 1.16.0 opset 17...
[34m[1mONNX:[0m export success ✅ 3.6s, saved as '/kaggle/working/artifacts/run_rzzkherx_model:v0/best.onnx' (11.7 MB)

Export complete (7.3s)
Results saved to [1m/kaggle/working/artifacts/run_rzzkherx_model:v0[0m
Predict:         yolo predict task=detect model=/kaggle/working/artifacts/run_rzzkherx_model:v0/best.onnx imgsz=640  
Validate:        yolo val task=detect model=/kaggle/working/artifacts/run_rzzkherx_model:v0/best.onnx imgsz=640 data=/kaggle/input/dataset-yaml/data.yaml  
Visualize:       https://netron.app


In [8]:
onnx_model = YOLO(path,task='detect')

In [9]:
metrics = onnx_model.val(data='/kaggle/input/data-yaml/data.yaml')  # no arguments needed, dataset and settings remembered
metrics.box.map  # map50-95
metrics.box.map50  # map50
metrics.box.map75  # map75
metrics.box.maps  # a list contains map50-95 of each category

Ultralytics YOLOv8.2.19 🚀 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15102MiB)
Loading /kaggle/working/artifacts/run_rzzkherx_model:v0/best.onnx for ONNX Runtime inference...
[31m[1mrequirements:[0m Ultralytics requirement ['onnxruntime-gpu'] not found, attempting AutoUpdate...
Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.18.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.3 kB)
Collecting coloredlogs (from onnxruntime-gpu)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime_gpu-1.18.0-cp310-cp310-manylinux_2_28_x86_64.whl (199.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 MB[0m [31m118.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

[1;31m2024-05-23 01:23:52.714617364 [E:onnxruntime:Default, provider_bridge_ort.cc:1744 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1426 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.11: cannot open shared object file: No such file or directory
[m
[0;93m2024-05-23 01:23:52.714646559 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:870 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Please reference https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirementsto ensure all dependencies are met.[m


Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


100%|██████████| 755k/755k [00:00<00:00, 13.8MB/s]
[34m[1mval: [0mScanning /kaggle/working/yolo_dataset/labels/val... 1154 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1154/1154 [00:00<00:00, 1204.25it/s]

[34m[1mval: [0mNew cache created: /kaggle/working/yolo_dataset/labels/val.cache



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1154/1154 [01:46<00:00, 10.79it/s]


                   all       1154       2663      0.934      0.879      0.948      0.805
             aeroplane       1154         85          1      0.965      0.983       0.88
               bicycle       1154         85      0.962      0.905      0.961      0.844
                  bird       1154        108      0.925       0.91      0.965      0.829
                  boat       1154         86      0.941      0.748      0.885      0.657
                bottle       1154        123      0.936      0.714      0.867      0.674
                   bus       1154         55          1      0.874      0.969      0.865
                   car       1154        187      0.962      0.814      0.937      0.771
                   cat       1154        136      0.989      0.956      0.993      0.911
                 chair       1154        198      0.915      0.818       0.93      0.779
                   cow       1154         52      0.856      0.962      0.977      0.881
           diningtabl

array([    0.88018,     0.84438,     0.82915,     0.65743,     0.67448,     0.86504,     0.77115,      0.9113,     0.77879,     0.88135,     0.79849,     0.88468,     0.91242,       0.781,     0.79642,     0.63738,     0.71707,     0.85589,     0.85876,       0.774])

In [10]:
import cv2
from ultralytics import YOLO

# Load the YOLOv8 model

# Open the video file
video_path = "/kaggle/input/people-video/people.mp4"
output_video_path = "/kaggle/working/output_ONNX.mp4"
cap = cv2.VideoCapture(video_path)

# Initialize the video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = onnx_model.track(frame, persist=True)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Write the annotated frame to the output video
        out.write(annotated_frame)
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture and writer objects
cap.release()
out.release()

print(f"Output video saved to {output_video_path}")


[31m[1mrequirements:[0m Ultralytics requirement ['lapx>=0.5.2'] not found, attempting AutoUpdate...
Collecting lapx>=0.5.2
  Downloading lapx-0.5.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.9 kB)
Downloading lapx-0.5.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: lapx
Successfully installed lapx-0.5.9

[31m[1mrequirements:[0m AutoUpdate success ✅ 11.8s, installed 1 package: ['lapx>=0.5.2']
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m

Loading /kaggle/working/artifacts/run_rzzkherx_model:v0/best.onnx for ONNX Runtime inference...



[1;31m2024-05-23 01:26:02.374386347 [E:onnxruntime:Default, provider_bridge_ort.cc:1744 TryGetProviderInfo_CUDA] /onnxruntime_src/onnxruntime/core/session/provider_bridge_ort.cc:1426 onnxruntime::Provider& onnxruntime::ProviderLibrary::Get() [ONNXRuntimeError] : 1 : FAIL : Failed to load library libonnxruntime_providers_cuda.so with error: libcublasLt.so.11: cannot open shared object file: No such file or directory
[m
[0;93m2024-05-23 01:26:02.374433836 [W:onnxruntime:Default, onnxruntime_pybind_state.cc:870 CreateExecutionProviderInstance] Failed to create CUDAExecutionProvider. Please reference https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirementsto ensure all dependencies are met.[m


0: 640x640 1 bird, 2 persons, 87.3ms
Speed: 3.4ms preprocess, 87.3ms inference, 5.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 bird, 2 persons, 76.2ms
Speed: 2.1ms preprocess, 76.2ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 bird, 2 persons, 119.6ms
Speed: 2.6ms preprocess, 119.6ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 birds, 2 persons, 69.1ms
Speed: 2.3ms preprocess, 69.1ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 birds, 2 persons, 115.3ms
Speed: 2.4ms preprocess, 115.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 birds, 2 persons, 113.8ms
Speed: 2.2ms preprocess, 113.8ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 birds, 2 persons, 113.8ms
Speed: 2.2ms preprocess, 113.8ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 3 birds, 2 persons, 72.3ms
Speed

# TensorRt Evaluation

In [11]:
tensor = model.export(format="engine")

Ultralytics YOLOv8.2.19 🚀 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 168 layers, 3009548 parameters, 0 gradients, 8.1 GFLOPs

[34m[1mPyTorch:[0m starting from '/kaggle/working/artifacts/run_rzzkherx_model:v0/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 24, 8400) (6.0 MB)
[31m[1mrequirements:[0m Ultralytics requirement ['onnxsim>=0.4.33'] not found, attempting AutoUpdate...
Collecting onnxsim>=0.4.33
  Downloading onnxsim-0.4.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Downloading onnxsim-0.4.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m0m
[?25hInstalling collected packages: onnxsim
Successfully installed onnxsim-0.4.36

[31m[1mrequirements:[0m AutoUpdate success ✅ 12.0s, installed 1 package: ['onnxsim>=0.4.33']
[31m[1mreq

In [12]:
tensor

'/kaggle/working/artifacts/run_rzzkherx_model:v0/best.engine'

In [13]:
# Load the exported TensorRT model
tensorrt_model = YOLO(tensor,task='detect')

In [15]:
import numpy as np

if not hasattr(np, 'bool'):
    np.bool = np.bool_

  if not hasattr(np, 'bool'):


In [16]:
metrics = tensorrt_model.val(data='/kaggle/input/data-yaml/data.yaml')  # no arguments needed, dataset and settings remembered
metrics.box.map  # map50-95
metrics.box.map50  # map50
metrics.box.map75  # map75
metrics.box.maps  # a list contains map50-95 of each category

Ultralytics YOLOv8.2.19 🚀 Python-3.10.13 torch-2.1.2 CUDA:0 (Tesla T4, 15102MiB)
Loading /kaggle/working/artifacts/run_rzzkherx_model:v0/best.engine for TensorRT inference...
[05/23/2024-01:39:12] [TRT] [I] The logger passed into createInferRuntime differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.

[05/23/2024-01:39:12] [TRT] [I] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 2772, GPU 833 (MiB)
[05/23/2024-01:39:12] [TRT] [I] Loaded engine size: 14 MiB
[05/23/2024-01:39:12] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +0, GPU +32, now: CPU 2798, GPU 879 (MiB)
[05/23/2024-01:39:12] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +0, now: CPU 0, GPU 0 (MiB)
[05/23/2024-01:39:12] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +0, GPU +32, now: CPU 2783, GPU 879 (MiB)
[05/23/2024-01:39:12] [TRT] [I] [MemUsageChange] Tens

[34m[1mval: [0mScanning /kaggle/working/yolo_dataset/labels/val.cache... 1154 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1154/1154 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1154/1154 [00:11<00:00, 101.68it/s]


                   all       1154       2663      0.934      0.879      0.948      0.805
             aeroplane       1154         85          1      0.965      0.983       0.88
               bicycle       1154         85      0.962      0.905      0.961      0.844
                  bird       1154        108      0.925       0.91      0.965      0.829
                  boat       1154         86      0.941      0.748      0.885      0.657
                bottle       1154        123      0.936      0.714      0.867      0.674
                   bus       1154         55          1      0.874      0.969      0.865
                   car       1154        187      0.962      0.814      0.937      0.771
                   cat       1154        136      0.989      0.956      0.993      0.911
                 chair       1154        198      0.915      0.818       0.93      0.779
                   cow       1154         52      0.856      0.962      0.977      0.881
           diningtabl

array([    0.88018,     0.84438,     0.82915,     0.65743,     0.67448,     0.86504,     0.77115,      0.9113,     0.77879,     0.88135,     0.79849,     0.88468,     0.91242,       0.781,     0.79642,     0.63738,     0.71707,     0.85589,     0.85876,       0.774])

In [17]:
import cv2
from ultralytics import YOLO

# Load the YOLOv8 model

# Open the video file
video_path = "/kaggle/input/people-video/people.mp4"
output_video_path = "/kaggle/working/output_tensorRt.mp4"
cap = cv2.VideoCapture(video_path)

# Initialize the video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for the output video
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = tensorrt_model.track(frame, persist=True)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Write the annotated frame to the output video
        out.write(annotated_frame)
    else:
        # Break the loop if the end of the video is reached
        break

# Release the video capture and writer objects
cap.release()
out.release()

print(f"Output video saved to {output_video_path}")


Loading /kaggle/working/artifacts/run_rzzkherx_model:v0/best.engine for TensorRT inference...
[05/23/2024-01:39:29] [TRT] [I] The logger passed into createInferRuntime differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.

[05/23/2024-01:39:29] [TRT] [I] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 2931, GPU 1041 (MiB)
[05/23/2024-01:39:29] [TRT] [I] Loaded engine size: 14 MiB
[05/23/2024-01:39:29] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +1, GPU +36, now: CPU 2958, GPU 1093 (MiB)
[05/23/2024-01:39:29] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +0, now: CPU 0, GPU 0 (MiB)
[05/23/2024-01:39:29] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +0, GPU +32, now: CPU 2943, GPU 1095 (MiB)
[05/23/2024-01:39:29] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +0, now: CP