## 0. Setup

Install ultralytics to easily get access to YOLO tools.


In [1]:
!pip install ultralytics onnx onnx-simplifier onnxruntime

Collecting ultralytics
  Downloading ultralytics-8.3.58-py3-none-any.whl.metadata (35 kB)
Collecting onnx
  Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnx-simplifier
  Downloading onnx_simplifier-0.4.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading ultralytics-8.3.58-py3-none-any.whl (905 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m905.3/905.3 kB[0m [31m

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1. Convert model
❗If you don't have a Nvidia GPU, set `optimize_cpu=True` to optimize for CPU inference.

In [20]:
from ultralytics import YOLO

model_name = '/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11x.pt'
input_width = 640
input_height = 640

model = YOLO(model_name)
model.export(format="onnx", imgsz=[input_height,input_width], optimize=True)

Ultralytics 8.3.58 🚀 Python-3.10.12 torch-2.5.1+cu121 CPU (Intel Xeon 2.00GHz)
YOLO11x-seg summary (fused): 491 layers, 62,004,438 parameters, 0 gradients, 318.5 GFLOPs

[34m[1mPyTorch:[0m starting from '/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11x.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 38, 8400), (1, 32, 160, 160)) (119.0 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.47...
[34m[1mONNX:[0m export success ✅ 12.2s, saved as '/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11x.onnx' (236.9 MB)

Export complete (22.6s)
Results saved to [1m/content/drive/MyDrive/Aerial_River_Plastic_Wastes[0m
Predict:         yolo predict task=segment model=/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11x.onnx imgsz=640  
Validate:        yolo val task=segment model=/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11x.onnx imgsz=640 data=/home/anokitv/anoubhav/ac

'/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11x.onnx'

In [22]:
!onnxsim /content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11x.onnx /content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11x.onnx


Simplifying[33m...[0m
Finish! Here is the difference:
┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
┃[1m [0m[1m             [0m[1m [0m┃[1m [0m[1mOriginal Model[0m[1m [0m┃[1m [0m[1mSimplified Model[0m[1m [0m┃
┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
│ Add           │ 40             │ 40               │
│ Concat        │ 37             │ 37               │
│ Constant      │ 394            │ [1;32m393             [0m │
│ Conv          │ 186            │ 186              │
│ ConvTranspose │ 1              │ 1                │
│ Div           │ 1              │ 1                │
│ MatMul        │ 4              │ 4                │
│ MaxPool       │ 3              │ 3                │
│ Mul           │ 171            │ 171              │
│ Reshape       │ 14             │ 14               │
│ Resize        │ 2              │ 2                │
│ Sigmoid       │ 169            │ 169              │
│ Slice         │ 2              │ 2               

## 2. Sanity Check

In [23]:
# Loading model using ONNX-Runtime
import onnxruntime as ort

model_path = "/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov11L.onnx"
session = ort.InferenceSession(model_path)

In [24]:
import cv2

def pad_and_resize(image, target_size, pad_color=(0, 0, 0)):
    """
    Resizes the image while preserving the aspect ratio, padding the shorter side.

    Parameters:
    - image: input image (numpy array)
    - target_size: tuple (width, height) of the target size
    - pad_color: color to use for padding, default is black (0, 0, 0)

    Returns:
    - resized image with padding
    """
    original_height, original_width = image.shape[:2]
    target_width, target_height = target_size

    # Calculate the aspect ratio of the image and the target size
    aspect_ratio_image = original_width / original_height
    aspect_ratio_target = target_width / target_height

    if aspect_ratio_image > aspect_ratio_target:
        # Wider than target, resize based on width
        new_width = target_width
        new_height = int(new_width / aspect_ratio_image)
    else:
        # Taller than target, resize based on height
        new_height = target_height
        new_width = int(new_height * aspect_ratio_image)

    resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)

    # Calculate padding
    pad_top = (target_height - new_height) // 2
    pad_bottom = target_height - new_height - pad_top
    pad_left = (target_width - new_width) // 2
    pad_right = target_width - new_width - pad_left

    # Pad the image
    padded_image = cv2.copyMakeBorder(resized_image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=pad_color)

    return padded_image

In [7]:
import numpy as np

def image_loader(image_path, target_size=(640, 640)):
  img = cv2.imread(image_path) #BGR
  img = pad_and_resize(img, target_size)
  img = img[::-1] #RGB
  img = img[np.newaxis, ...].astype(np.float32)
  img = img.transpose(0, 3, 1, 2)
  return img

In [9]:
input_img_path = "/content/Aerial_Location_1_14.jpg"
img = image_loader(input_img_path)
print(img.shape)

(1, 3, 640, 640)


In [25]:
outputs = session.run(None, {"images": img})

In [26]:
outputs

[array([[[     3.8478,      21.678,      32.963, ...,       472.9,      490.11,       553.1],
         [     13.247,      13.032,      12.548, ...,      635.88,      619.06,      614.56],
         [     8.1469,      44.121,      127.95, ...,      420.77,      318.26,       182.7],
         ...,
         [  0.0073061,    0.035067,     0.11438, ...,     0.53436,     0.36118,     0.11159],
         [   -0.75675,     -1.1063,     -1.3782, ...,     -0.1679,    -0.12727,   -0.057011],
         [    -0.3512,    -0.37302,    -0.52974, ...,    -0.66083,    -0.43083,    -0.13249]]], dtype=float32),
 array([[[[   0.092001,    0.058503,     0.01511, ...,     0.15329,     0.17032,     0.22558],
          [    0.16392,     0.13497,     0.15448, ...,     0.21817,     0.19956,     0.15816],
          [    0.16572,     0.15756,     0.21512, ...,     0.21896,     0.24167,     0.20227],
          ...,
          [    0.35349,     0.27386,     0.48055, ...,     0.27963,     0.40457,     0.37733],
         