[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rosamarco/plastic_in_river_detector/blob/main/explore_dataset.ipynb)

<div class="markdown-google-sans">
  <h1>Plastic in River Detector</h1>
</div>


# Mount drive and navigate to /mydrive/yolov8 folder

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

# This creates a symbolic link so that now the path /content/gdrive/My\ Drive/ is equal to /mydrive
!ln -s /content/gdrive/My\ Drive/Plastic\ in\ River\ Detector /mydrive

# Navigate to /mydrive/yolov8
%cd /mydrive/yolov8

Mounted at /content/gdrive


# Install YOLOv8

In [3]:
!pip install ultralytics
!pip install supervision

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.0.203 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla T4, 15102MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 27.1/78.2 GB disk)


# Train the model

In [None]:
from ultralytics import YOLO

# build a new model from scratch
model = YOLO("yolov8x.yaml")

# resume training
# model = YOLO("/mydrive/yolov8/runs/detect/train/weights/last.pt")

# train the model
results = model.train(data='/mydrive/dataset/yolov8/config.yaml', epochs=350, imgsz=1024, save_period=50)

Ultralytics YOLOv8.0.200 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla V100-SXM2-16GB, 16151MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/mydrive/yolov8/runs/detect/train/weights/last.pt, data=/mydrive/yolov8/config.yaml, epochs=200, patience=50, batch=16, imgsz=960, save=True, save_period=50, cache=False, device=None, workers=8, project=None, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, re

In [None]:
# synchronization between the colab VM and the Drive backend happens asynchronously
# this ensures that the model weights are persisted on the mounted drive
drive.flush_and_unmount()

# Inference

In [12]:
import pathlib
import os

import supervision as sv
import numpy as np
from ultralytics import YOLO

video_paths = list(map(lambda path: str(path), pathlib.Path('/mydrive/video').glob('*.mp4')))
print(f"{video_paths=}")

model = YOLO(f"/mydrive/yolov8/runs/detect/train4/weights/best.pt")

video_paths=['/mydrive/video/IMG_2917.mp4', '/mydrive/video/IMG_2920.mp4', '/mydrive/video/IMG_2921.mp4', '/mydrive/video/IMG_2925.mp4', '/mydrive/video/IMG_2927.mp4']


In [None]:
def process_frame(frame: np.ndarray, _) -> np.ndarray:
    results = model.predict(frame, conf=0.35, imgsz=1024)[0]

    detections = sv.Detections.from_ultralytics(results)

    box_annotator = sv.BoxAnnotator(thickness=2, text_thickness=2, text_scale=1)

    labels = [f"{model.names[class_id]} {confidence:0.2f}" for _, _, confidence, class_id, _ in detections]
    frame = box_annotator.annotate(scene=frame, detections=detections, labels=labels)

    return frame

for video in video_paths:
  name = os.path.basename(video)
  sv.process_video(source_path=video, target_path=f"/mydrive/yolov8/video_result/{name}.mp4", callback=process_frame)

# Metrics

In [None]:
from ultralytics import YOLO

# Load a model
model = YOLO('/mydrive/yolov8/runs/detect/train/weights/best.pt')

# Validate the model
metrics = model.val(data='/mydrive/dataset/yolov8/config.yaml', split='test', conf=0.35, iou=0.85)

Ultralytics YOLOv8.0.203 🚀 Python-3.10.12 torch-2.1.0+cu118 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8x summary (fused): 268 layers, 68125494 parameters, 0 gradients, 257.4 GFLOPs
[34m[1mval: [0mScanning /content/gdrive/My Drive/Unibas/Visione e Percezione/yolov8/WCB5G.v17-rocky-river-processed.yolov8/test/labels.cache... 86 images, 3 backgrounds, 0 corrupt: 100%|██████████| 86/86 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:11<00:00,  1.84s/it]
                   all         86        308      0.425      0.426      0.414      0.203
                DEBRIS         86         94      0.218      0.202      0.138     0.0447
                 WASTE         86        214      0.632       0.65      0.691      0.361
Speed: 3.1ms preprocess, 83.4ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1mruns/detect/val5[0m


# Inference with SAHI

In [None]:
!pip install -U torch sahi ultralytics

In [None]:
import cv2
from pathlib import Path
from sahi import AutoDetectionModel
from sahi.predict import get_prediction, get_sliced_prediction, predict

VIDEO = "IMG_2921"

source = f'/mydrive/video/{VIDEO}.mp4'
view_img = False
save_img = True

detection_model = AutoDetectionModel.from_pretrained(
    model_type='yolov8',
    model_path='/mydrive/yolov8/runs/detect/train/weights/best.pt',
    confidence_threshold=0.5,
    device="cuda:0",
)

# Video setup
videocapture = cv2.VideoCapture(source)
frame_width, frame_height = int(videocapture.get(3)), int(videocapture.get(4))
fps, fourcc = int(videocapture.get(5)), cv2.VideoWriter_fourcc(*'mp4v')

# Output setup
video_writer = cv2.VideoWriter(f'/mydrive/yolov8/video_result_sahi/{VIDEO}_sliced.mp4', fourcc, fps, (frame_width, frame_height))

while videocapture.isOpened():
    success, frame = videocapture.read()
    if not success:
        break

    results = get_sliced_prediction(frame,
                                    detection_model,
                                    slice_height=512,
                                    slice_width=512,
                                    overlap_height_ratio=0.1,
                                    overlap_width_ratio=0.1)
    object_prediction_list = results.object_prediction_list

    boxes_list = []
    clss_list = []
    score_list = []
    for ind, _ in enumerate(object_prediction_list):
        boxes = object_prediction_list[ind].bbox.minx, object_prediction_list[ind].bbox.miny, \
            object_prediction_list[ind].bbox.maxx, object_prediction_list[ind].bbox.maxy
        clss = object_prediction_list[ind].category.name
        score = object_prediction_list[ind].score.value
        print(clss, score)
        boxes_list.append(boxes)
        clss_list.append(clss)
        score_list.append(score)

    color_dict = {
      "WASTE":  (56, 56, 255), #red (bgr)
      "DEBRIS": (0, 204, 255)  #yellow
    }

    for box, cls, score in zip(boxes_list, clss_list, score_list):
        color = color_dict[str(cls)]
        x1, y1, x2, y2 = box
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
        label = str(f"{cls} {score:0.2f}")
        t_size = cv2.getTextSize(label, 0, fontScale=1, thickness=1)[0]
        cv2.rectangle(frame, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), color, -1)
        cv2.putText(frame,
                    label, (int(x1), int(y1) - 2),
                    0,
                    0.6, [255, 255, 255],
                    thickness=1,
                    lineType=cv2.LINE_AA)

    if view_img:
        cv2.imshow(Path(source).stem, frame)
    if save_img:
        video_writer.write(frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
video_writer.release()
videocapture.release()
cv2.destroyAllWindows()