In [1]:
!pip install torchinfo -q

In [1]:
# Import PyTorch for deep learning computations
import torch
# Import neural network module from PyTorch
import torch.nn as nn
# Import optimizers for training models
import torch.optim as optim
# Import time module for measuring execution time
import time
# Import NumPy for neumerical operations
import numpy as np
# Import Matplotlib for visualizations
import matplotlib.pyplot as plt
# Import os for file system operations
import os
# Import zipfile for extracting compressed datasets
import zipfile
from zipfile import ZipFile

# Import pandas for data manipulation and analysis
import pandas as pd
# Import PIL for image processing
from PIL import Image
# Import torchinfo for displaying model summaries
from torchinfo import summary
# Import DataLoader for handling batch data loading
from torch.utils.data import DataLoader

# Set Matplotlib style to 'ggplot' for better visualization
plt.style.use('ggplot')

In [4]:
# Extract the dataset
file = 'CMU_KO8.zip'
with ZipFile(file, 'r') as zip:
    # Extract all files
    print("Extraction...")
    zip.extractall()
    print("Done!")

Extraction...
Done!


In [5]:
# Relative path since the dataset is in the same folder as the .ipynb file
dataset_path = "./CMU_KO8"

# List main dataset directories and a preview of their contents
for root, dirs, files in os.walk(dataset_path):
    print(f"{root}")
    for file in files[:5]:
        print(f"{file}")

./CMU_KO8
README.txt
./CMU_KO8\calibration
calibration_000001.jpg
calibration_000002.jpg
calibration_000003.jpg
calibration_000004.jpg
calibration_000005.jpg
./CMU_KO8\multiple
./CMU_KO8\multiple\test
./CMU_KO8\multiple\test\bakingpan
bakingpan_001.bbox
bakingpan_001.jpg
bakingpan_002.bbox
bakingpan_002.jpg
bakingpan_003.bbox
./CMU_KO8\multiple\test\colander
colander_001.bbox
colander_001.jpg
colander_002.bbox
colander_002.jpg
colander_003.bbox
./CMU_KO8\multiple\test\cup
cup_001.bbox
cup_001.jpg
cup_002.bbox
cup_002.jpg
cup_003.bbox
./CMU_KO8\multiple\test\pitcher
pitcher_001.bbox
pitcher_001.jpg
pitcher_002.bbox
pitcher_002.jpg
pitcher_003.bbox
./CMU_KO8\multiple\test\saucepan
saucepan_001.bbox
saucepan_001.jpg
saucepan_002.bbox
saucepan_002.jpg
saucepan_003.bbox
./CMU_KO8\multiple\test\scissors
scissors_001.bbox
scissors_001.jpg
scissors_002.bbox
scissors_002.jpg
scissors_003.bbox
./CMU_KO8\multiple\test\shaker
shaker_001.bbox
shaker_001.jpg
shaker_002.bbox
shaker_002.jpg
shaker_003

In [11]:
# Class mapping for objects
class_map = {
    'bakingpan': 0,
    'colander': 1,
    'cup': 2,
    'pitcher': 3,
    'saucepan': 4,
    'scissors': 5,
    'shaker': 6,
    'thermos': 7
}

# Paths to dataset folders
base_path = './CMU_KO8/multiple/test/'
yolo_labels_path = './CMU_KO8/yolo_labels/'
os.makedirs(yolo_labels_path, exist_ok = True)

# Helper function to convert bbox to YOLO format
def convert_bbox_to_yolo(img_width, img_height, x, y, w, h):
    x_center = (x + w / 2) / img_width
    y_center = (y + h / 2) / img_height
    w_norm = w / img_width
    h_norm = h / img_height
    return x_center, y_center, w_norm, h_norm

# Process each object folder
# os.listdir(base_path) <-- lists all folders and files inside base_path
for object_name in os.listdir(base_path):
    # object_folder <-- builds the full path to each folder
    object_folder = os.path.join(base_path, object_name)

    # check if object_folder is actually a folder (not a file)
    # this ensures we only process folders like bakingpan/ and ifnore anything else
    if os.path.isdir(object_folder):
        # os.listdir(object_folder) <-- lists all the files inside each object_folder
        for file in os.listdir(object_folder):
            # filters only files ending with .bbox
            if file.endswith(".bbox"):
                # builds path to the .bbox file
                bbox_path = os.path.join(object_folder, file)
                # builds path to the matching image (by replacing .bbox with .jpg)
                img_path = bbox_path.replace(".bbox", ".jpg")
                # builds path to save the new YOLO label in yolo_labels/ folder
                label_path = os.path.join(yolo_labels_path, file.replace(".bbox", ".txt"))

                # Get image dimensions using the PIL -- Pillow library
                # YOLO needs bounding boxes in a normalized format (values between 0 and 1)
                # so we need these dimensions for conversion
                with Image.open(img_path) as img:
                    img_width, img_height = img.size

                # Read bbox and convert to YOLO format
                # read the bounding box values (x, y, w, h)
                # convert them to float
                with open(bbox_path, 'r') as f:
                    x, y, w, h = map(float, f.readline().strip().split())

                # looks up the numeric class ID for the object using the class_map dictionary
                class_id = class_map[object_name]
                # converts the bounding box from absolute pixel value to YOLO format
                x_center, y_center, w_norm, h_norm = convert_bbox_to_yolo(img_width, img_height, x, y, w, h)

                # Write YOLO label
                # creates a .txt file with the bounding box in YOLO format
                with open(label_path, 'w') as f:
                    f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n")

print("Conversion complete! Check the 'yolo-labels' folder for YOLO-formatted labels.")

FileNotFoundError: [WinError 3] The system cannot find the path specified: './CMU_KO8/multiple/test/'

In [9]:
import shutil
import random

# Paths to datasets folder
dataset_path = './CMU_Kitchen_YOLO'
images_test_path = os.path.join(dataset_path, 'images/test')
labels_test_path = os.path.join(dataset_path, 'labels/test')

images_val_path = os.path.join(dataset_path, 'images/val')
labels_val_path = os.path.join(dataset_path, 'labels/val')

# Define the proportion of test data to move to validation
val_ratio = 0.2

# Get all image files in the test folder
image_files = [f for f in os.listdir(images_test_path) if f.endswith('.jpg')]

# Shuffle the list to randomize selection
random.shuffle(image_files)

# Calculate how many images to move
num_val_samples = int(len(image_files) * val_ratio)

# Move images and corresponding labels to validation
for i in range(num_val_samples):
    image_file = image_files[i]

    # Find corresponding label file (replace .jpg with .txt)
    label_file = image_file.replace('.jpg', '.txt')

    # Move image
    shutil.move(os.path.join(images_test_path, image_file), os.path.join(images_val_path, image_file))

    # Move label
    shutil.move(os.path.join(labels_test_path, label_file), os.path.join(labels_val_path, label_file))

print(f"Moved {num_val_samples} images and labels to validation folder.")

Moved 160 images and labels to validation folder.


In [12]:
!pip install ultralytics -q

In [2]:
from ultralytics import YOLO
print(YOLO('yolov8n.pt'))

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_s

In [1]:
yaml_path = r"C:\Users\kelka\OneDrive\Documents\Custom Object Detection\CMU_Kitchen_YOLO"

# Check if the file exists
print("File exists:", os.path.isfile(yaml_path))

# Display the content to verify correctness
with open(yaml_path, 'r') as f:
    print(f.read())


NameError: name 'os' is not defined

In [16]:
!yolo task=detect mode=train model=yolov8n.pt data="C:/Users/kelka/OneDrive/Documents/CV-DL-projects/CMU_Kitchen_YOLO/dataset.yaml" epochs=1 imgsz=640

Ultralytics 8.3.96 ðŸš€ Python-3.11.11 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce GTX 1650, 4096MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=C:/Users/kelka/OneDrive/Documents/CV-DL-projects/CMU_Kitchen_YOLO/dataset.yaml, epochs=1, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train6, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=Fal


  0%|          | 0.00/755k [00:00<?, ?B/s]
 51%|#####     | 384k/755k [00:00<00:00, 3.67MB/s]
100%|##########| 755k/755k [00:00<00:00, 3.30MB/s]
100%|##########| 755k/755k [00:00<00:00, 3.35MB/s]

[34m[1mtrain: [0mScanning C:\Users\kelka\OneDrive\Documents\CV-DL-projects\CMU_Kitchen_YOLO\labels\train...:   0%|          | 0/200 [00:00<?, ?it/s]
[34m[1mtrain: [0mScanning C:\Users\kelka\OneDrive\Documents\CV-DL-projects\CMU_Kitchen_YOLO\labels\train... 58 images, 0 backgrounds, 0 corrupt:  29%|##9       | 58/200 [00:00<00:00, 557.22it/s]
[34m[1mtrain: [0mScanning C:\Users\kelka\OneDrive\Documents\CV-DL-projects\CMU_Kitchen_YOLO\labels\train... 138 images, 0 backgrounds, 0 corrupt:  69%|######9   | 138/200 [00:00<00:00, 682.69it/s]
[34m[1mtrain: [0mScanning C:\Users\kelka\OneDrive\Documents\CV-DL-projects\CMU_Kitchen_YOLO\labels\train... 200 images, 0 backgrounds, 0 corrupt: 100%|##########| 200/200 [00:00<00:00, 704.00it/s]

[34m[1mval: [0mScanning C:\Users\kelka\OneDrive\

In [4]:
!yolo task = detect mode = train model = yolov8n.pt data = "C:/Users/kelka/OneDrive/Documents/CV-DL-projects/CMU_Kitchen_YOLO/dataset.yaml" epochs = 10 imgsz = 640

Ultralytics 8.3.96 ðŸš€ Python-3.11.11 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce GTX 1650, 4096MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=C:/Users/kelka/OneDrive/Documents/CV-DL-projects/CMU_Kitchen_YOLO/dataset.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train9, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=Fa


[34m[1mtrain: [0mScanning C:\Users\kelka\OneDrive\Documents\CV-DL-projects\CMU_Kitchen_YOLO\labels\train.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|##########| 200/200 [00:00<?, ?it/s]
[34m[1mtrain: [0mScanning C:\Users\kelka\OneDrive\Documents\CV-DL-projects\CMU_Kitchen_YOLO\labels\train.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|##########| 200/200 [00:00<?, ?it/s]

[34m[1mval: [0mScanning C:\Users\kelka\OneDrive\Documents\CV-DL-projects\CMU_Kitchen_YOLO\labels\val.cache... 160 images, 0 backgrounds, 0 corrupt: 100%|##########| 160/160 [00:00<?, ?it/s]
[34m[1mval: [0mScanning C:\Users\kelka\OneDrive\Documents\CV-DL-projects\CMU_Kitchen_YOLO\labels\val.cache... 160 images, 0 backgrounds, 0 corrupt: 100%|##########| 160/160 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]
       1/10      3.99G      2.127      5.125       1.89         16        640:   0%|          | 0/13 [00:03<?, ?it/s]
       1/10      3.99G      2.127      5.125       1.89     