# Jupyter notebook for debugging

In [4]:
%load_ext autoreload
%autoreload 2

# Copied from `train` function in train_simple.py:L78
import yaml

device = 'cpu'
hyp = 'data/hyps/hyp.scratch-low.yaml'

with open(hyp, errors="ignore") as f:
    hyp = yaml.safe_load(f)  # load hyps dict

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
from models.yolo import Model
from utils.general import check_dataset

cfg = 'models/yolov5n_nuscenes.yaml'
data = 'data/nuscenes.yaml'
data_dict = check_dataset(data)

nc = int(data_dict["nc"])  # number of classes
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create

Overriding model.yaml nc=4 with nc=15

                 from  n    params  module                                  arguments                     
  0                -1  1      1760  models.common.Conv                      [3, 16, 6, 2, 2]              
  1                -1  1      4672  models.common.Conv                      [16, 32, 3, 2]                
  2                -1  1      4800  models.common.C3                        [32, 32, 1]                   
  3                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  4                -1  2     29184  models.common.C3                        [64, 64, 2]                   
  5                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  6                -1  3    156928  models.common.C3                        [128, 128, 3]                 
  7                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]             

In [6]:
anchors = model.model[-1].anchors
nl = model.model[-1].nl


# [TODO] Draw anchors
# Code below data loader cell

In [7]:
from utils.dataloaders import create_dataloader
from utils.general import check_img_size, colorstr

imgsz = 416
batch_size = 1
single_cls = False
seed = 0

train_path = data_dict["train"]
gs = max(int(model.stride.max()), 32)  # grid size (max stride)
imgsz = check_img_size(imgsz, gs, floor=gs * 2)  # verify imgsz is gs-multiple

train_loader, dataset = create_dataloader(
    train_path,
    imgsz,
    batch_size,
    gs,
    single_cls,
    hyp=hyp,
    augment=True,
    cache=None,
    rect=False,
    rank=-1,
    workers=8,
    image_weights=False,
    quad=False,
    prefix=colorstr("train: "),
    shuffle=True,
    seed=seed,
)

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))
[34m[1mtrain: [0mScanning /home/sung/nuscenes_det2d/train.cache... 28130 images, 1425 backgrounds, 0 corrupt: 100%|██████████| 28130/28130 [00:00<?, ?it/s]


In [8]:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

object_sizes = []

for imgs, targets, paths, _ in train_loader:
    imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
    break

#accumulating width and height of targets to analyze distribution
batch_size = 16
train_loader, _ = create_dataloader(
    train_path,
    imgsz,
    batch_size,
    gs,
    single_cls,
    hyp=hyp,
    augment=True,
    cache=None,
    rect=False,
    rank=-1,
    workers=8,
    image_weights=False,
    quad=False,
    prefix=colorstr("train: "),
    shuffle=True,
    seed=seed,
)

#Applying KMeans clustering to find centroids of all object sizes

# for _, targets, _, _ in train_loader:
#     object_sizes.extend(targets[:, 4:6].numpy())

# object_sizes = np.array(object_sizes)

# kmeans = KMeans(n_clusters=6)
# kmeans.fit(object_sizes)

# centroids = kmeans.cluster_centers_

# plt.figure(figsize=(10, 6))

# plt.scatter(object_sizes[:, 0], object_sizes[:, 1], s=10, c='blue', label='Object Sizes')

# plt.scatter(centroids[:, 0], centroids[:, 1], s=100, c='red', marker='x', label='Centroids')

# plt.xlabel('Width (w)')
# plt.ylabel('Height (h)')
# plt.title('Object Sizes')
# plt.legend()
# plt.grid(True)

# plt.savefig('object_sizes_and_centroids.png')

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))
[34m[1mtrain: [0mScanning /home/sung/nuscenes_det2d/train.cache... 28130 images, 1425 backgrounds, 0 corrupt: 100%|██████████| 28130/28130 [00:00<?, ?it/s]


In [9]:
#Applying KMeans clustering to find centroids of small, mid and big object sizes

small_objects = []
mid_objects = []
big_objects = []

for _, targets, _, _ in train_loader:
    for target in targets.numpy():
        width, height = target[4:6]
        if width <= 0.2 and height <= 0.2:
            small_objects.append(target[4:6])
        elif width <= 0.4 and height <= 0.4:
            mid_objects.append(target[4:6])
        else:
            big_objects.append(target[4:6])

def process_and_plot(objects, category):
    if len(objects) > 0:
        objects_array = np.array(objects)
        kmeans = KMeans(n_clusters=2)
        kmeans.fit(objects_array)
        centroids = kmeans.cluster_centers_

        plt.figure(figsize=(10, 6))
        plt.scatter(objects_array[:, 0], objects_array[:, 1], s=10, c='blue', label=f'{category} Object Sizes')
        plt.scatter(centroids[:, 0], centroids[:, 1], s=100, c='red', marker='x', label='Centroids')
        plt.xlabel('Width (w)')
        plt.ylabel('Height (h)')
        plt.title(f'{category} Object Sizes and Centroids')
        plt.legend()
        plt.grid(True)
        plt.savefig(f'{category.lower()}_sizes_and_centroids.png')
        plt.show()

        return centroids
    else:
        print(f"No data available for {category}")
        return []

centroids_small = process_and_plot(small_objects, "Small Objects")
centroids_mid = process_and_plot(mid_objects, "Mid Objects")
centroids_big_1 = process_and_plot(big_objects, "Big Objects")


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


In [12]:
print(centroids_small,'\n', centroids_mid, '\n', centroids_big_1)

[[    0.11045     0.08159]
 [   0.032083    0.032089]] 
 [[    0.25947     0.13957]
 [    0.12632      0.2757]] 
 [[    0.47564     0.26189]
 [    0.17616     0.51576]]


In [None]:
import torch
from models.common import DetectMultiBackend
from utils.torch_utils import select_device

weights = 'yolov5n.pt'
# data = 'data/nuscenes.yaml'
data = 'data/coco128.yaml'
half = False  # use FP16 half-precision inference
dnn = False  # use OpenCV DNN for ONNX inference
device = select_device('cpu')

model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)

# inference
model.eval()
with torch.no_grad():
    pred = model(imgs)  # forward


YOLOv5 🚀 ddf4e91 Python-3.8.19 torch-2.3.1+cu121 CPU

Fusing layers... 
YOLOv5n summary: 213 layers, 1867405 parameters, 0 gradients, 4.5 GFLOPs


In [40]:
import numpy as np
import matplotlib.pyplot as plt

#Drawing anchor box and grid cells on the image
#divide the image into grid cells
for head_idx in range(3):
    grid_ratio = pred[1][head_idx].shape[2]
    grid_size = imgsz // grid_ratio
    xy_pos = []
    for j in range(grid_ratio):
        for k in range(grid_ratio):
            xy_pos.append([j * grid_size, k * grid_size])

    # plt visualize image with grid cells
    fig, ax = plt.subplots()
    ax.imshow(imgs[0].permute(1,2,0).cpu().numpy())
    for idx, (x, y) in enumerate(xy_pos):
        ax.add_patch(plt.Rectangle((x, y), grid_size, grid_size, fill=False, edgecolor='red', lw=0.5))
        if idx == 135:
            center_x, center_y = x + grid_size // 2, y + grid_size // 2
            for i in range(len(anchors[head_idx])):
                anchor_width = anchors[head_idx][i][0] * grid_size
                anchor_height = anchors[head_idx][i][1] * grid_size
                ax.add_patch(plt.Rectangle((center_x - anchor_width // 2, center_y - anchor_height // 2), anchor_width, anchor_height, fill=False, edgecolor='blue', lw=0.5))
        
    plt.show()
    #save image with grid cells
    fig.savefig('grid_cells_{}.png'.format(grid_ratio), dpi=300)

In [41]:
from utils.general import non_max_suppression

conf_thres = 0.25  # confidence threshold
iou_thres = 0.45  # NMS IOU threshold
max_det = 1000  # maximum detections per image
classes = None
agnostic_nms = False  # class-agnostic NMS

pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

# [TODO] draw predictions (see detect.py:L178)

import matplotlib.patches as patches

class_names = [
    'pedestrian', 'animal', 'car'
]
#omitted other classes for simplicity

for img, output in zip(imgs, pred):  # Loop through images and corresponding outputs
    fig, ax = plt.subplots()
    ax.imshow(img.permute(1, 2, 0).cpu().numpy())  # Convert tensor image to numpy and display
    
    if output is not None and len(output) > 0:
        for x1, y1, x2, y2, conf, cls_pred in output:
            box = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(box)
            label = f'{class_names[int(cls_pred)]} {conf:.2f}'
            ax.text(x1, y1-13, label, color='white', ha='left', va='top', bbox=dict(facecolor='red', alpha=0.5), fontsize=8)
    
    plt.axis('off')
    plt.savefig('detection.png')  # Save the figure to file
    plt.close()

In [29]:
#save video of detected image results
import cv2
import os

def images_to_video(image_folder, video_name, fps):
    images = [img for img in os.listdir(image_folder) if img.endswith(".png") or img.endswith(".jpg")]
    images.sort()

    frame = cv2.imread(os.path.join(image_folder, images[0]))
    height, width, layers = frame.shape

    video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'DIVX'), fps, (width, height))

    for image in images:
        video.write(cv2.imread(os.path.join(image_folder, image)))

    video.release()

image_folder = 'runs/detect/exp3'
video_name = 'output_video.avi'
fps = 7

images_to_video(image_folder, video_name, fps)