# OBJECT DETECTION

### What we have

In [3]:
import json

# Path to the COCO JSON annotation file
coco_json_path = 'annotations/instances_train2017.json'

# Load the COCO JSON file
with open(coco_json_path, 'r') as f:
    coco_data = json.load(f)

# Print a sample from each section
print("Sample Image Data:\n", coco_data['images'][10000])
print("\nSample Annotation Data:\n", coco_data['annotations'][10000])
print("\nSample Category Data:\n", coco_data['categories'][0])

Sample Image Data:
 {'license': 4, 'file_name': '000000471488.jpg', 'coco_url': 'http://images.cocodataset.org/train2017/000000471488.jpg', 'height': 480, 'width': 640, 'date_captured': '2013-11-15 21:47:34', 'flickr_url': 'http://farm7.staticflickr.com/6233/6286490251_4bbcaf234f_z.jpg', 'id': 471488}

Sample Annotation Data:
 {'segmentation': [[144.81, 209.47, 144.81, 195.12, 146.95, 193.31, 148.76, 193.47, 150.25, 195.45, 149.75, 221.18, 150.08, 236.02, 149.92, 236.84, 152.65, 242.43, 156.03, 245.82, 156.46, 249.63, 144.6, 250.05, 144.81, 247.09, 146.72, 244.34, 148.2, 242.01, 144.6, 209.84]], 'area': 276.2696000000003, 'iscrowd': 0, 'image_id': 552563, 'bbox': [144.6, 193.31, 11.86, 56.74], 'category_id': 50, 'id': 705139}

Sample Category Data:
 {'supercategory': 'person', 'id': 1, 'name': 'person'}


# Pre processing and converting the annotations to YOLO format

In [4]:
import json
import os
from pycocotools.coco import COCO

### for training annotations

In [16]:
# Paths to the COCO dataset and where you want to save the YOLO-formatted annotations
coco_annotations_path = 'annotations/instances_train2017.json'
images_dir = 'train/images'
output_dir = 'yolo/train'


In [6]:
# Load COCO annotations
coco = COCO(coco_annotations_path)

loading annotations into memory...
Done (t=6.57s)
creating index...
index created!


In [18]:
# Get the class IDs and their corresponding names
categories = coco.loadCats(coco.getCatIds())
class_names = [cat['name'] for cat in categories]
class_id_map = {cat['id']: i for i, cat in enumerate(categories)}

In [19]:
print(categories)

[{'supercategory': 'person', 'id': 1, 'name': 'person'}, {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'}, {'supercategory': 'vehicle', 'id': 3, 'name': 'car'}, {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'}, {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'}, {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'}, {'supercategory': 'vehicle', 'id': 7, 'name': 'train'}, {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'}, {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'}, {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'}, {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'}, {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'}, {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'}, {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'}, {'supercategory': 'animal', 'id': 16, 'name': 'bird'}, {'supercategory': 'animal', 'id': 17, 'name': 'cat'}, {'supercategory': 'animal', 'id': 18, 'name': 'dog'}, {'supercategory

In [29]:
nc = len(categories)
print(nc)

80


In [30]:
names = [category['name'] for category in categories]

In [31]:
print(names)

['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [21]:
# Convert annotations for each image
for image_id in coco.getImgIds():
    image_info = coco.loadImgs(image_id)[0]
    annotation_ids = coco.getAnnIds(imgIds=image_id)
    annotations = coco.loadAnns(annotation_ids)
    
    image_width = image_info['width']
    image_height = image_info['height']
    
    # Prepare YOLO annotation lines
    yolo_annotations = []
    
    for annotation in annotations:
        if 'bbox' in annotation and annotation['bbox']:
            class_id = class_id_map[annotation['category_id']]
            bbox = annotation['bbox']
            # COCO format: [x_min, y_min, width, height]
            x_min, y_min, width, height = bbox
            # Convert to YOLO format [x_center, y_center, width, height] normalized
            x_center = (x_min + width / 2) / image_width
            y_center = (y_min + height / 2) / image_height
            width /= image_width
            height /= image_height
            
            yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    # Save YOLO annotations to a .txt file
    output_path = os.path.join(output_dir, f"{os.path.splitext(image_info['file_name'])[0]}.txt")
    with open(output_path, 'w') as f:
        f.write("\n".join(yolo_annotations))

    

print("Conversion complete!")

Conversion complete!


### for validation annotations

In [22]:
# Paths to the COCO dataset and where you want to save the YOLO-formatted annotations
coco_annotations_path = 'annotations/instances_val2017.json'
images_dir = 'val/images'
output_dir = 'yolo/val'

In [23]:
coco = COCO(coco_annotations_path)

loading annotations into memory...
Done (t=2.20s)
creating index...
index created!


In [24]:
# Get the class IDs and their corresponding names
categories = coco.loadCats(coco.getCatIds())
class_names = [cat['name'] for cat in categories]
class_id_map = {cat['id']: i for i, cat in enumerate(categories)}

In [None]:
print(categories)

In [27]:
# Convert annotations for each image
for image_id in coco.getImgIds():
    image_info = coco.loadImgs(image_id)[0]
    annotation_ids = coco.getAnnIds(imgIds=image_id)
    annotations = coco.loadAnns(annotation_ids)
    
    image_width = image_info['width']
    image_height = image_info['height']
    
    # Prepare YOLO annotation lines
    yolo_annotations = []
    
    for annotation in annotations:
        if 'bbox' in annotation and annotation['bbox']:
            class_id = class_id_map[annotation['category_id']]
            bbox = annotation['bbox']
            # COCO format: [x_min, y_min, width, height]
            x_min, y_min, width, height = bbox
            # Convert to YOLO format [x_center, y_center, width, height] normalized
            x_center = (x_min + width / 2) / image_width
            y_center = (y_min + height / 2) / image_height
            width /= image_width
            height /= image_height
            
            yolo_annotations.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
    
    # Save YOLO annotations to a .txt file
    output_path = os.path.join(output_dir, f"{os.path.splitext(image_info['file_name'])[0]}.txt")
    with open(output_path, 'w') as f:
        f.write("\n".join(yolo_annotations))

    

print("Conversion complete!")

Conversion complete!


### Creating train and val data folders

In [2]:
import os
import shutil
import random

In [3]:
# Paths to the source directories containing images and annotations
source_image_dir = 'train/'  
source_annotation_dir = 'yolo/train/' 

In [4]:
# Paths to the new directories
destination_image_train_dir = 'yolov5/yolo_data/images/train/'
destination_annotation_train_dir = 'yolov5/yolo_data/labels//train/'
destination_image_val_dir = 'yolov5/yolo_data/images/val/'
destination_annotation_val_dir = 'yolov5/yolo_data/labels/val/'

In [5]:
# Number of images to select for training and validation
num_train = 5000
num_val = 1000

In [7]:
# Get list of all image files
image_files = [f for f in os.listdir(source_image_dir) if f.endswith('.jpg')]  

In [8]:
# Shuffle and split the list of image files
random.shuffle(image_files)
train_files = image_files[:num_train]
val_files = image_files[num_train:num_train + num_val]

In [9]:
# Function to move files
def move_files(file_list, src_image_dir, src_annotation_dir, dest_image_dir, dest_annotation_dir):
    for file_name in file_list:
        # Move image
        src_image_path = os.path.join(src_image_dir, file_name)
        dest_image_path = os.path.join(dest_image_dir, file_name)
        shutil.copy(src_image_path, dest_image_path)

        # Move annotation
        annotation_file = file_name.replace('.jpg', '.txt')  # Adjust if using different extensions
        src_annotation_path = os.path.join(src_annotation_dir, annotation_file)
        dest_annotation_path = os.path.join(dest_annotation_dir, annotation_file)
        if os.path.exists(src_annotation_path):
            shutil.copy(src_annotation_path, dest_annotation_path)
        else:
            print(f"Warning: Annotation file {annotation_file} not found.")
            

In [10]:
# Move training files
move_files(train_files, source_image_dir, source_annotation_dir, destination_image_train_dir, destination_annotation_train_dir)

In [11]:
# Move validation files
move_files(val_files, source_image_dir, source_annotation_dir, destination_image_val_dir, destination_annotation_val_dir)

### Testing against some images

In [1]:
import torch
from pathlib import Path
import cv2
from PIL import Image, ImageDraw, ImageFont

In [2]:
# Load model
model = torch.hub.load('ultralytics/yolov5', 'custom', path='yolov5/runs/train/exp3/weights/best.pt')

# Define the folder with images

Using cache found in /Users/ojas/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-8-7 Python-3.10.14 torch-2.4.0 CPU

Fusing layers... 
Model summary: 157 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [3]:
# Define the folder with images
image_folder = 'test_img/'

In [4]:
# Load class names
class_names = model.names 

In [5]:
# Use the default font provided by PIL
font = ImageFont.load_default()

# Loop through all images in the folder
for img_path in Path(image_folder).glob("*.jpg"):
    img = Image.open(img_path).convert("RGB")
    draw = ImageDraw.Draw(img)
    results = model(img)

    # Plot boxes and labels on the image
    for *box, conf, cls in results.xyxy[0]:  # xyxy, confidence, class
        label = f'{class_names[int(cls)]} '
        x1, y1, x2, y2 = [int(x) for x in box]
        draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
        draw.text((x1, y1 - 10), label, fill="white", font=font)

    # Display the image
    img.show()

    # Save the image with detections
    img.save(f'detected_{img_path.name}')

print("Detection, labeling, and display complete.")

  with amp.autocast(autocast):
  with amp.autocast(autocast):


Detection, labeling, and display complete.


### Live camera

In [1]:
import torch
import cv2
import numpy as np


In [None]:
# Initialize the webcam
cap = cv2.VideoCapture(0)  # Use 0 for the default camera

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to RGB (YOLOv5 expects RGB images)
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Perform object detection
    results = model(img)

    # Parse the results
    detections = results.xyxy[0].numpy()  # x1, y1, x2, y2, confidence, class

    # Draw bounding boxes and labels on the original frame
    for x1, y1, x2, y2, conf, cls in detections:
        label = f"{model.names[int(cls)]} {conf:.2f}"
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
        cv2.putText(frame, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame with detections
    cv2.imshow('YOLOv5 Object Detection', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()


  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with amp.autocast(autocast):
  with a