# YOLO Multi-Ingredient Detection — Cleaned Version

This notebook is a **cleaned-up version** of my YOLO training workflow.  

I ran several experiments with different datasets, methods, and hyperparameter settings while developing the project. To keep things clear and easy to follow, I’ve removed all the trial runs, intermediate results, and extra code.


This notebook contains only the **essential code** needed to reproduce the YOLO training for multi-ingredient detection in the
project.



In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install fiftyone
!pip install torch torchvision
!pip install ultralytics

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz
import os
import random
import yaml
from shutil import copy2

classes = ["Apple","Banana", "Grape","Milk","Bell pepper", "Bread", "Broccoli", "Cucumber", "Egg (Food)", "Zucchini", "Lemon", "Orange", "Tomato", "Cabbage", "Carrot"]
sample_limit_per_class = 500

# Load Open Images v7 dataset with specified classes and sample limit
view = foz.load_zoo_dataset(
    "open-images-v7",
    split="train",
    label_types=["detections"],
    classes=classes,
    max_samples=sample_limit_per_class * len(classes),
)

# schema to verify label fields
print(view.schema)

label_field = "ground_truth"

# export directories
export_dir = "/content/open_images_data"
train_dir = os.path.join(export_dir, 'train')
val_dir = os.path.join(export_dir, 'val')
test_dir = os.path.join(export_dir, 'test')

# Directories for train, validation, and test splits
for subset in [train_dir, val_dir, test_dir]:
    os.makedirs(os.path.join(subset, 'images'), exist_ok=True)
    os.makedirs(os.path.join(subset, 'labels'), exist_ok=True)

# Shuffle and split dataset into train (80%), validation (10%), and test (10%)
samples = list(view)
random.shuffle(samples)
train_samples = samples[:int(0.8 * len(samples))]
val_samples = samples[int(0.8 * len(samples)):int(0.9 * len(samples))]
test_samples = samples[int(0.9 * len(samples)):]

# Function to copy images and generate YOLO-format label files
def copy_files(samples, subset_dir):
    for sample in samples:
        # Copy image file to target directory
        image_file = sample.filepath
        image_filename = os.path.basename(image_file)
        copy2(image_file, os.path.join(subset_dir, 'images', image_filename))

        # Ensure sample contains valid detections
        if label_field in sample and sample[label_field] is not None and len(sample[label_field].detections) > 0:
            label_filename = os.path.splitext(image_filename)[0] + '.txt'
            label_path = os.path.join(subset_dir, 'labels', label_filename)

            with open(label_path, 'w') as label_file:
                for detection in sample[label_field].detections:
                    # Only include detections for target classes
                    if detection.label in classes:
                        class_id = classes.index(detection.label)
                        bbox = detection.bounding_box
                        x_center = max(0, min(1, bbox[0] + bbox[2] / 2))
                        y_center = max(0, min(1, bbox[1] / 2))
                        width = max(0, min(1, bbox[2]))
                        height = max(0, min(1, bbox[3]))

                        # Write normalized bounding box coordinates
                        label_file.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
        else:
            print(f"Warning: No labels found for image: {image_filename}")

copy_files(train_samples, train_dir)
copy_files(val_samples, val_dir)
copy_files(test_samples, test_dir)

# Generate YOLO data configuration (data.yaml)
data_yaml = {
    'train': os.path.join(export_dir, 'train/images'),
    'val': os.path.join(export_dir, 'val/images'),
    'test': os.path.join(export_dir, 'test/images'),
    'nc': len(classes),  # Number of classes
    'names': classes,    # Class names
}

# Save data.yaml to export directory
yaml_path = os.path.join(export_dir, 'data.yaml')
with open(yaml_path, 'w') as yaml_file:
    yaml.dump(data_yaml, yaml_file)

print(f"Dataset exported to: {export_dir}")
print(f"data.yaml file created at: {yaml_path}")


In [None]:
# Install augmentation libraries
!pip install --quiet albumentations==1.3.0 opencv-python-headless==4.7.0.72

This script rebuilds YOLO label files from a FiftyOne dataset. For each image, it copies the file into a new folder and creates a matching .txt label file that YOLO expects. It converts the bounding boxes from FiftyOne’s format - which uses the top-left corner - into YOLO’s format, which uses the box center and normalized values between 0 and 1. It also makes sure the coordinates stay valid and skips any labels that don’t match your class list.

In [None]:
#Basically cleanup step to make sure correct formatting
import os
from shutil import copy2

def regenerate_labels_from_fiftyone(samples_list, dest_dir, label_field, classes_list):
    images_dir = os.path.join(dest_dir, "images")
    labels_dir = os.path.join(dest_dir, "labels")
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)

    written = 0
    for sample in samples_list:
        src_img = sample.filepath
        img_name = os.path.basename(src_img)
        dst_img = os.path.join(images_dir, img_name)

        # Copying image if not present
        if not os.path.exists(dst_img):
            try:
                copy2(src_img, dst_img)
            except Exception as e:
                print(f"Could not copy {src_img}: {e}")
                continue

        # compose the label path
        label_path = os.path.join(labels_dir, os.path.splitext(img_name)[0] + ".txt")
        # Removing any existing label file to avoid duplicates
        if os.path.exists(label_path):
            os.remove(label_path)

        # Writing labels if detections exist
        if label_field in sample and sample[label_field] is not None and len(sample[label_field].detections) > 0:
            with open(label_path, "w") as lf:
                for det in sample[label_field].detections:
                    if det.label not in classes_list:
                        continue
                    cls_id = classes_list.index(det.label)
                    x_min, y_min, w, h = det.bounding_box
                    x_center = x_min + w / 2.0
                    y_center = y_min + h / 2.0
                    x_center = max(0.0, min(1.0, x_center))
                    y_center = max(0.0, min(1.0, y_center))
                    w = max(0.0, min(1.0, w))
                    h = max(0.0, min(1.0, h))
                    lf.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
                    written += 1
    print(f"Regenerated {written} bounding boxes in {dest_dir}")

regenerate_labels_from_fiftyone(train_samples, train_dir, label_field, classes)
regenerate_labels_from_fiftyone(val_samples, val_dir, label_field, classes)
regenerate_labels_from_fiftyone(test_samples, test_dir, label_field, classes)


In [None]:
# Count number of bounding boxes per class (train/val/test)
import glob
from collections import Counter

def count_bboxes_in_labels(labels_folder):
    counts = Counter()
    for txt_path in glob.glob(os.path.join(labels_folder, "*.txt")):
        with open(txt_path, "r") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                parts = line.split()
                try:
                    cid = int(parts[0])
                    counts[cid] += 1
                except:
                    pass
    return counts

train_counts = count_bboxes_in_labels(os.path.join(train_dir, "labels"))
val_counts = count_bboxes_in_labels(os.path.join(val_dir, "labels"))
test_counts = count_bboxes_in_labels(os.path.join(test_dir, "labels"))

print("Index -> Class name")
for i, name in enumerate(classes):
    print(f"{i:02d} -> {name}")

print("\nTrain counts (per-class):")
for i, name in enumerate(classes):
    print(f"{i:02d} {name:15s}: {train_counts.get(i,0)}")

print("\nValidation counts (per-class):")
for i, name in enumerate(classes):
    print(f"{i:02d} {name:15s}: {val_counts.get(i,0)}")

print("\nTest counts (per-class):")
for i, name in enumerate(classes):
    print(f"{i:02d} {name:15s}: {test_counts.get(i,0)}")

most_common = train_counts.most_common(3)
least_common = sorted([(k, train_counts.get(k,0)) for k in range(len(classes))], key=lambda x: x[1])[:3]
print("\nTop 3 classes (train):", [(classes[k], v) for k,v in most_common])
print("Bottom 3 classes (train):", [(classes[k], v) for k,v in least_common])


Targeted augmentation for underrepresented classes.

This code finds underrepresented classes in your training set and automatically creates more images for them by flipping, rotating, and tweaking brightness and color. It keeps all the YOLO labels accurate and normalized, so your dataset becomes more balanced and your model learns better.

In [None]:
import cv2
import uuid
import random
from pathlib import Path
import albumentations as A

images_dir = Path(train_dir) / "images"
labels_dir = Path(train_dir) / "labels"

largest_class_count = max(train_counts.values()) if train_counts else 0
TARGET_PER_CLASS = max(largest_class_count, 500)  # aim to bring every class up to this many boxes
AUG_PER_IMAGE = 2  # number of augmented variants per selected image
OUT_IMG_EXT = ".jpg"

aug = A.Compose(
    [
        A.HorizontalFlip(p=0.5),
        A.RandomRotate90(p=0.3),
        A.ShiftScaleRotate(shift_limit=0.06, scale_limit=0.15, rotate_limit=18, p=0.5, border_mode=cv2.BORDER_CONSTANT),
        A.RandomBrightnessContrast(p=0.5),
        A.HueSaturationValue(p=0.35),
        A.GaussNoise(p=0.15),
        A.Resize(640, 640, p=1.0),
    ],
    bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"])
)

def yolo_to_pascal(bbox, img_w, img_h):
    cx, cy, w, h = bbox
    x_min = (cx - w/2.0) * img_w
    y_min = (cy - h/2.0) * img_h
    x_max = (cx + w/2.0) * img_w
    y_max = (cy + h/2.0) * img_h
    return [x_min, y_min, x_max, y_max]

def pascal_to_yolo(box, img_w, img_h):
    x_min, y_min, x_max, y_max = box
    w = (x_max - x_min) / img_w
    h = (y_max - y_min) / img_h
    cx = (x_min + x_max) / 2.0 / img_w
    cy = (y_min + y_max) / 2.0 / img_h
    return [cx, cy, w, h]

# Building a map: basename -> list of (class_id, bbox_yolo)
image_to_boxes = {}
for label_file in labels_dir.glob("*.txt"):
    base = label_file.stem
    with open(label_file, "r") as f:
        lines = [l.strip() for l in f if l.strip()]
    boxes = []
    for line in lines:
        parts = line.split()
        cid = int(parts[0])
        cx, cy, w, h = map(float, parts[1:5])
        boxes.append((cid, (cx, cy, w, h)))
    if boxes:
        image_to_boxes[base] = boxes

# Reverse mapping class -> list of basenames
class_to_images = {i: [] for i in range(len(classes))}
for base, boxes in image_to_boxes.items():
    for cid, _ in boxes:
        class_to_images[cid].append(base)

# Performing augmentation for classes that are below target
augmented_images = 0
for cid in range(len(classes)):
    current = train_counts.get(cid, 0)
    if current >= TARGET_PER_CLASS:
        continue
    needed = TARGET_PER_CLASS - current
    candidates = class_to_images.get(cid, []).copy()
    if not candidates:
        print(f"Skipping class {cid} ({classes[cid]}): no training images to augment.")
        continue
    print(f"Augmenting class {cid} ({classes[cid]}): current {current}, target {TARGET_PER_CLASS}, needed {needed}")
    # randomly picking candidates
    while needed > 0 and candidates:
        base = random.choice(candidates)
        matches = list(images_dir.glob(base + ".*"))
        if not matches:
            candidates.remove(base)
            continue
        img_path = str(matches[0])
        img = cv2.imread(img_path)
        if img is None:
            candidates.remove(base)
            continue
        h, w = img.shape[:2]
        orig_boxes = []
        labels_for_aug = []
        lbl_path = labels_dir / (base + ".txt")
        with open(lbl_path, "r") as lf:
            for line in lf:
                parts = line.split()
                lab = int(parts[0])
                cx, cy, bw, bh = map(float, parts[1:5])
                pascal = yolo_to_pascal((cx, cy, bw, bh), w, h)
                if (pascal[2] - pascal[0] < 3) or (pascal[3] - pascal[1] < 3):
                    continue
                orig_boxes.append(pascal)
                labels_for_aug.append(lab)
        if not orig_boxes:
            candidates.remove(base)
            continue

        # create a few augmented variants from this image
        for _ in range(min(AUG_PER_IMAGE, needed)):
            try:
                res = aug(image=img, bboxes=orig_boxes, labels=labels_for_aug)
            except Exception as e:
                # if augmentation fails
                break
            out_img = res["image"]
            out_bboxes = res["bboxes"]
            out_labels = res["labels"]
            # saving results
            new_base = f"{base}_aug_{uuid.uuid4().hex[:8]}"
            out_img_path = images_dir / (new_base + OUT_IMG_EXT)
            cv2.imwrite(str(out_img_path), out_img)

            ih, iw = out_img.shape[:2]
            out_label_path = labels_dir / (new_base + ".txt")
            with open(out_label_path, "w") as olf:
                for lab, box in zip(out_labels, out_bboxes):
                    yolo_box = pascal_to_yolo(box, iw, ih)
                    cx, cy, bw, bh = [max(0.0, min(1.0, v)) for v in yolo_box]
                    olf.write(f"{lab} {cx:.6f} {cy:.6f} {bw:.6f} {bh:.6f}\n")
            augmented_images += 1
            needed -= 1
            if needed <= 0:
                break

print(f"Augmentation finished. Added {augmented_images} new augmented images.")


This code randomly picks four training images, stitches them together into a single mosaic, and updates all the bounding boxes so they line up correctly. It’s a clean way to make your dataset more diverse and help your YOLO model generalize better.

This cell synthesizes new images by placing four training images into a single image (mosaic). It adjusts and writes bounding boxes accordingly.

In [None]:
import numpy as np

def create_mosaic_image(basenames, images_dir, labels_dir, out_image_path, out_label_path, target_size=640):
    # Canvas
    canvas = np.full((target_size, target_size, 3), 114, dtype=np.uint8)
    half = target_size // 2
    offsets = [(0,0), (half,0), (0,half), (half,half)]
    accumulated_boxes = []
    accumulated_labels = []

    for i, base in enumerate(basenames[:4]):
        img_matches = list(images_dir.glob(base + ".*"))
        lbl_path = labels_dir / (base + ".txt")
        if not img_matches or not lbl_path.exists():
            continue
        img = cv2.imread(str(img_matches[0]))
        if img is None:
            continue
        h0, w0 = img.shape[:2]
        # Resizing to half x half ignoring aspect ratio for simplicity
        img_resized = cv2.resize(img, (half, half))
        x_off, y_off = offsets[i]
        canvas[y_off:y_off+half, x_off:x_off+half] = img_resized

        # transforming original boxes to new position
        with open(lbl_path, "r") as lf:
            for line in lf:
                parts = line.split()
                lab = int(parts[0])
                cx, cy, bw, bh = map(float, parts[1:5])
                x_min, y_min, x_max, y_max = yolo_to_pascal((cx, cy, bw, bh), w0, h0)
                # scale to half-size
                scale_x = half / float(w0)
                scale_y = half / float(h0)
                xmin2 = x_min * scale_x + x_off
                ymin2 = y_min * scale_y + y_off
                xmax2 = x_max * scale_x + x_off
                ymax2 = y_max * scale_y + y_off
                if xmax2 - xmin2 < 3 or ymax2 - ymin2 < 3:
                    continue
                accumulated_boxes.append([xmin2, ymin2, xmax2, ymax2])
                accumulated_labels.append(lab)

    if not accumulated_boxes:
        return False

    # Save mosaic image and write labels
    cv2.imwrite(str(out_image_path), canvas)
    ih, iw = canvas.shape[:2]
    with open(out_label_path, "w") as olf:
        for lab, box in zip(accumulated_labels, accumulated_boxes):
            yolo_box = pascal_to_yolo(box, iw, ih)
            cx, cy, bw, bh = [max(0.0, min(1.0, v)) for v in yolo_box]
            olf.write(f"{lab} {cx:.6f} {cy:.6f} {bw:.6f} {bh:.6f}\n")
    return True


candidates = [p.stem for p in (labels_dir).glob("*.txt")]
NUM_MOSAICS = 200
mosaics_created = 0

if len(candidates) >= 4:
    for i in range(NUM_MOSAICS):
        picks = random.sample(candidates, 4)
        new_base = f"mosaic_{uuid.uuid4().hex[:8]}"
        out_img = images_dir / (new_base + ".jpg")
        out_lbl = labels_dir / (new_base + ".txt")
        ok = create_mosaic_image(picks, images_dir, labels_dir, out_img, out_lbl, target_size=640)
        if ok:
            mosaics_created += 1

print(f"Created {mosaics_created} mosaic images.")


In [None]:
train_counts_after = count_bboxes_in_labels(os.path.join(train_dir, "labels"))

print("\nPer-class counts BEFORE augmentation (train):")
for i, name in enumerate(classes):
    print(f"{i:02d} {name:15s}: {train_counts.get(i,0)}")

print("\nPer-class counts AFTER augmentation (train):")
for i, name in enumerate(classes):
    print(f"{i:02d} {name:15s}: {train_counts_after.get(i,0)}")

total_before = sum(train_counts.values())
total_after = sum(train_counts_after.values())
print(f"\nTotal annotated boxes: before={total_before}, after={total_after}, added={total_after - total_before}")


In [None]:
from ultralytics import YOLO

# Load pretrained YOLO model
model = YOLO("yolo11n.pt")

# You can train the model with customized settings
results = model.train(
    data="/content/open_images_data/data.yaml",
    epochs=30,
    imgsz=640,                     # Set image size
    batch=16,
    project="/content/drive/MyDrive/YOLOv11_training results",  # Save to Google Drive
    name="yolo_experiment"
)


In [None]:
from pathlib import Path

import os
from ultralytics import YOLO
from PIL import Image

# Define paths
model_path = '/content/drive/MyDrive/YOLOv11_training results/yolo_experiment/weights/best.pt'
input_folder = '/content/drive/MyDrive/fruits and vegies'    # Folder containing test images
output_folder = '/content/drive/MyDrive/vegetables(open_images) detection'       # Folder to save detection results

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)
model = YOLO(model_path)

# Make sure output folder exists
Path(output_folder).mkdir(parents=True, exist_ok=True)

# Process each image in the input folder
for img_file in os.listdir(input_folder):
    img_path = os.path.join(input_folder, img_file)

    # Run inference
    results = model(img_path)

    # Save each result individually
    for i, result in enumerate(results):
        # Get the output path
        output_path = os.path.join(output_folder, f"annotated_{img_file}")

        # Save each annotated image to the output folder
        result.save(output_path)

    print(f"Processed {img_file}, results saved to {output_folder}")
