## 0. Setup

Install ultralytics to easily get access to YOLO tools.


In [1]:
!pip install ultralytics onnx onnx-simplifier onnxruntime

Collecting ultralytics
  Downloading ultralytics-8.3.58-py3-none-any.whl.metadata (35 kB)
Collecting onnx
  Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnx-simplifier
  Downloading onnx_simplifier-0.4.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading ultralytics-8.3.58-py3-none-any.whl (905 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m905.3/905.3 kB[0m [31m

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [20]:
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
%pip install -qr requirements.txt  # install

Cloning into 'yolov5'...
remote: Enumerating objects: 17093, done.[K
remote: Counting objects: 100% (36/36), done.[K
remote: Compressing objects: 100% (29/29), done.[K
remote: Total 17093 (delta 23), reused 8 (delta 7), pack-reused 17057 (from 5)[K
Receiving objects: 100% (17093/17093), 15.70 MiB | 16.63 MiB/s, done.
Resolving deltas: 100% (11728/11728), done.
/content/yolov5


In [21]:
!ls

benchmarks.py	 data	     LICENSE	     README.zh-CN.md   tutorial.ipynb
CITATION.cff	 detect.py   models	     requirements.txt  utils
classify	 export.py   pyproject.toml  segment	       val.py
CONTRIBUTING.md  hubconf.py  README.md	     train.py


## 1. Convert model
❗If you don't have a Nvidia GPU, set `optimize_cpu=True` to optimize for CPU inference.

In [27]:
!python export.py --weights /content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov5x.pt --include onnx

[34m[1mexport: [0mdata=data/coco128.yaml, weights=['/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov5x.pt'], imgsz=[640, 640], batch_size=1, device=cpu, half=False, inplace=False, keras=False, optimize=False, int8=False, per_tensor=False, dynamic=False, cache=, simplify=False, mlmodel=False, opset=17, verbose=False, workspace=4, nms=False, agnostic_nms=False, topk_per_class=100, topk_all=100, iou_thres=0.45, conf_thres=0.25, include=['onnx']
YOLOv5 🚀 v7.0-393-g6981c274 Python-3.10.12 torch-2.5.1+cu121 CPU

Fusing layers... 
Model summary: 330 layers, 88249583 parameters, 0 gradients, 264.0 GFLOPs

[34m[1mPyTorch:[0m starting from /content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov5x.pt with output shape (1, 25200, 39) (169.1 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0...
[34m[1mONNX:[0m export success ✅ 16.0s, saved as /content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov5x.onnx (337.1 MB)

Export complete (26.0s)
Results saved to [1m/content/driv

In [28]:
!onnxsim /content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov5x.onnx /content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov5x.onnx

Simplifying[33m...[0m
Finish! Here is the difference:
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
┃[1m [0m[1m          [0m[1m [0m┃[1m [0m[1mOriginal Model[0m[1m [0m┃[1m [0m[1mSimplified Model[0m[1m [0m┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
│ Add        │ 31             │ 31               │
│ Concat     │ 17             │ 17               │
│ Constant   │ 286            │ [1;32m276             [0m │
│ Conv       │ 129            │ 129              │
│ MaxPool    │ 3              │ 3                │
│ Mul        │ 138            │ 138              │
│ Pow        │ 3              │ 3                │
│ Reshape    │ 6              │ 6                │
│ Resize     │ 3              │ 3                │
│ Sigmoid    │ 135            │ 135              │
│ Split      │ 3              │ 3                │
│ Transpose  │ 3              │ 3                │
│ Model Size │ 337.1MiB       │ 337.1MiB         │
└────────────┴────────────────┴──────────────

## 2. Sanity Check

In [29]:
# Loading model using ONNX-Runtime
import onnxruntime as ort

model_path = "/content/drive/MyDrive/Aerial_River_Plastic_Wastes/yolov5x.onnx"
session = ort.InferenceSession(model_path)

In [30]:
import cv2

def pad_and_resize(image, target_size, pad_color=(0, 0, 0)):
    """
    Resizes the image while preserving the aspect ratio, padding the shorter side.

    Parameters:
    - image: input image (numpy array)
    - target_size: tuple (width, height) of the target size
    - pad_color: color to use for padding, default is black (0, 0, 0)

    Returns:
    - resized image with padding
    """
    original_height, original_width = image.shape[:2]
    target_width, target_height = target_size

    # Calculate the aspect ratio of the image and the target size
    aspect_ratio_image = original_width / original_height
    aspect_ratio_target = target_width / target_height

    if aspect_ratio_image > aspect_ratio_target:
        # Wider than target, resize based on width
        new_width = target_width
        new_height = int(new_width / aspect_ratio_image)
    else:
        # Taller than target, resize based on height
        new_height = target_height
        new_width = int(new_height * aspect_ratio_image)

    resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)

    # Calculate padding
    pad_top = (target_height - new_height) // 2
    pad_bottom = target_height - new_height - pad_top
    pad_left = (target_width - new_width) // 2
    pad_right = target_width - new_width - pad_left

    # Pad the image
    padded_image = cv2.copyMakeBorder(resized_image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=pad_color)

    return padded_image

In [31]:
import numpy as np

def image_loader(image_path, target_size=(640, 640)):
  img = cv2.imread(image_path) #BGR
  img = pad_and_resize(img, target_size)
  img = img[::-1] #RGB
  img = img[np.newaxis, ...].astype(np.float32)
  img = img.transpose(0, 3, 1, 2)
  return img

In [32]:
input_img_path = "/content/Aerial_Location_1_14.jpg"
img = image_loader(input_img_path)
print(img.shape)

(1, 3, 640, 640)


In [33]:
outputs = session.run(None, {"images": img})

In [34]:
outputs

[array([[[     1.1087,      4.4789,      2.7878, ...,     -1.5955,     0.60007,     0.45965],
         [     11.364,      3.9968,      4.8552, ...,     -1.2081,     0.37581,     0.26707],
         [     18.861,      3.6331,      5.1038, ...,     -1.1313,     0.64896,     0.29261],
         ...,
         [        558,      619.54,         455, ...,     0.42701,     -1.8159,     0.75374],
         [     587.31,      617.47,      456.54, ...,     0.45112,     -1.7365,     0.71934],
         [     618.18,       624.7,      466.73, ...,     0.47588,     -1.6322,     0.73714]]], dtype=float32),
 array([[[[  -0.089977,    -0.12742,    -0.13962, ...,    -0.10317,    -0.13129,    -0.11135],
          [   -0.11563,    -0.15957,    -0.17358, ...,    -0.09857,    -0.14241,       -0.14],
          [     -0.106,    -0.16191,    -0.18623, ...,    -0.11213,     -0.1564,      -0.159],
          ...,
          [   -0.10583,    -0.15212,    -0.20448, ...,    -0.19162,    -0.22708,     -0.2087],
         