In [7]:
# Core
import os
import shutil
import xml.etree.ElementTree as ET

# YOLO
from ultralytics import YOLO

# Data handling / visualization
import matplotlib.pyplot as plt
import cv2
import yaml
import random
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob



In [8]:
# --------------------------
# 1. Setup device
# --------------------------
device = 'cpu'
print(f"Using device: {device}")

# --------------------------
# 2. Load pre-trained YOLOv8 model
# --------------------------
# Nano YOLOv8 for CPU
model = YOLO("yolov8n.pt")
print("Model loaded:", model)


Using device: cpu
Model loaded: YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.

In [None]:
# --------------------------
# 3. Dataset setup
# --------------------------
# Paths
repo_root = "C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc"
voc_root = os.path.join(repo_root, "VOCdevkit")
yolo_dataset_dir = os.path.join(repo_root, "YOLO_VOC")
os.makedirs(yolo_dataset_dir, exist_ok=True)

# VOC Classes
VOC_CLASSES = [
    'aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow',
    'diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor'
]

# Function to convert bounding boxes
def convert_bbox(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    return x*dw, y*dh, w*dw, h*dh

# Conversion function
def voc_to_yolo(voc_root, year, image_set, output_dir):
    img_dir = os.path.join(voc_root, f"VOC{year}", "JPEGImages")
    ann_dir = os.path.join(voc_root, f"VOC{year}", "Annotations")
    txt_output_dir = os.path.join(output_dir, image_set, "labels")
    os.makedirs(txt_output_dir, exist_ok=True)
    img_output_dir = os.path.join(output_dir, image_set, "images")
    os.makedirs(img_output_dir, exist_ok=True)

    # Copy images
    for img_file in glob.glob(os.path.join(img_dir, "*.jpg")):
        shutil.copy(img_file, img_output_dir)

    # Convert XML annotations
    for xml_file in glob.glob(os.path.join(ann_dir, "*.xml")):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        img_name = root.find("filename").text
        w = int(root.find("size/width").text)
        h = int(root.find("size/height").text)

        yolo_labels = []
        for obj in root.findall("object"):
            cls_name = obj.find("name").text
            if cls_name not in VOC_CLASSES:
                continue
            cls_id = VOC_CLASSES.index(cls_name)
            bbox = obj.find("bndbox")
            xmin = float(bbox.find("xmin").text)
            ymin = float(bbox.find("ymin").text)
            xmax = float(bbox.find("xmax").text)
            ymax = float(bbox.find("ymax").text)
            x_center, y_center, bw, bh = convert_bbox((w,h), (xmin,xmax,ymin,ymax))
            yolo_labels.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {bw:.6f} {bh:.6f}")

        txt_file_path = os.path.join(txt_output_dir, img_name.replace(".jpg",".txt"))
        with open(txt_file_path, "w") as f:
            f.write("\n".join(yolo_labels))

# Convert train and val sets
voc_to_yolo(voc_root, "2012", "train", yolo_dataset_dir)
voc_to_yolo(voc_root, "2012", "val", yolo_dataset_dir)

# Generate YAML config for YOLOv8
voc_yaml = {
    'train': os.path.join(yolo_dataset_dir, 'train', 'images'),
    'val': os.path.join(yolo_dataset_dir, 'val', 'images'),
    'nc': len(VOC_CLASSES),
    'names': VOC_CLASSES
}

yaml_path = os.path.join(repo_root, "voc.yaml")
with open(yaml_path, "w") as f:
    yaml.dump(voc_yaml, f)
print("YOLO dataset prepared and voc.yaml created at:", yaml_path)

TypeError: 'module' object is not callable

In [10]:
# --------------------------
# 3. Subset setup
# --------------------------
# --------------------------
# Paths
# --------------------------
repo_root = "C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc"
voc_root = os.path.join(repo_root, "VOCdevkit")
subset_dir = os.path.join(repo_root, "YOLO_VOC_subset")
os.makedirs(subset_dir, exist_ok=True)

subset_sizes = {
    "train": 500,   # number of images for CPU testing
    "val": 100
}

# --------------------------
# Function to create subset
# --------------------------
def create_yolo_subset(original_dir, subset_dir, split, num_images):
    orig_images_dir = os.path.join(original_dir, split, "images")
    orig_labels_dir = os.path.join(original_dir, split, "labels")
    
    subset_images_dir = os.path.join(subset_dir, split, "images")
    subset_labels_dir = os.path.join(subset_dir, split, "labels")
    os.makedirs(subset_images_dir, exist_ok=True)
    os.makedirs(subset_labels_dir, exist_ok=True)
    
    all_images = os.listdir(orig_images_dir)
    sampled_images = random.sample(all_images, min(num_images, len(all_images)))
    
    for img_file in sampled_images:
        # Copy image
        shutil.copy(os.path.join(orig_images_dir, img_file),
                    os.path.join(subset_images_dir, img_file))
        # Copy corresponding label
        label_file = img_file.replace(".jpg", ".txt")
        shutil.copy(os.path.join(orig_labels_dir, label_file),
                    os.path.join(subset_labels_dir, label_file))
    
    print(f"{split} subset created with {len(sampled_images)} images.")

# --------------------------
# Create subsets
# --------------------------
for split in ["train", "val"]:
    create_yolo_subset(yolo_dataset_dir, subset_dir, split, subset_sizes[split])

# --------------------------
# Create subset YAML
# --------------------------
subset_yaml = {
    'train': os.path.join(subset_dir, 'train'),
    'val': os.path.join(subset_dir, 'val'),
    'nc': len(VOC_CLASSES),
    'names': VOC_CLASSES
}

subset_yaml_path = os.path.join(repo_root, "voc_subset.yaml")
with open(subset_yaml_path, "w") as f:
    yaml.dump(subset_yaml, f)
print("Subset YAML created at:", subset_yaml_path)

train subset created with 500 images.
val subset created with 100 images.
Subset YAML created at: C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\voc_subset.yaml


In [11]:
# --------------------------
# 4. Fine-tune model (CPU-friendly)
# --------------------------
results = model.train(
    data=subset_yaml_path,      #yaml_path for full dataset; subset_yaml_path for debugging
    epochs=10,            # lower for CPU
    batch=4,             # small batch for CPU
    imgsz=224,           # smaller image size speeds up CPU training
    device=device         # CPU
)

Ultralytics 8.3.191  Python-3.10.18 torch-2.8.0+cpu CPU (12th Gen Intel Core(TM) i7-1260P)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\voc_subset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train9, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_ma

In [12]:
# --------------------------
# Directories and run predictions 
# --------------------------
output_dir = os.path.join(repo_root, "yolo_predictions_demo")
os.makedirs(output_dir, exist_ok=True)

val_images_dir = os.path.join(subset_dir, "val", "images")
val_images = glob.glob(os.path.join(val_images_dir, "*.jpg"))[:5]
print("Validation images for portfolio:", val_images)

predictions = []
results_list = []
for img_path in val_images:
    results = model.predict(
        source=img_path,
        imgsz=320,
        conf=0.1,
        save=True,
        project=output_dir,
        name="demo_portfolio"
    )
    # Determine predicted image path
    pred_folder_base = os.path.join(output_dir, "demo_portfolio", "predict")
    pred_folders = sorted([d for d in glob.glob(pred_folder_base + "*") if os.path.isdir(d)])
    if pred_folders:
        pred_folder = os.path.join(pred_folders[-1], "images")
    else:
        pred_folder = os.path.join(pred_folder_base, "images")
    pred_img_path = os.path.join(pred_folder, os.path.basename(img_path))
    
    predictions.append(pred_img_path)
    results_list.append(results)  # keep results for plotting




Validation images for portfolio: ['C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\\YOLO_VOC_subset\\val\\images\\2007_000123.jpg', 'C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\\YOLO_VOC_subset\\val\\images\\2007_000768.jpg', 'C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\\YOLO_VOC_subset\\val\\images\\2007_002427.jpg', 'C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\\YOLO_VOC_subset\\val\\images\\2007_003207.jpg', 'C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\\YOLO_VOC_subset\\val\\images\\2007_003286.jpg']

image 1/1 C:\Users\kamed\Desktop\argonne_K\object_detection_with_pascal_voc\YOLO_VOC_subset\val\images\2007_000123.jpg: 256x320 1 train, 61.4ms
Speed: 1.1ms preprocess, 61.4ms inference, 2.0ms postprocess per image at shape (1, 3, 256, 320)
Results saved to [1mC:\Users\kamed\Desktop\argonne_K\object_detection_with_pascal_voc\yolo_predictions_demo\demo_portfolio16[0m

image 1/1 C:\

In [15]:
# --------------------------
# Predict and generate portfolio
# --------------------------
portfolio_dir = os.path.join(repo_root, "portfolio_predictions")
os.makedirs(portfolio_dir, exist_ok=True)

portfolio_val_images = val_images[:5]  # pick first 5
portfolio_results = []

for img_path in portfolio_val_images:
    # Run YOLO prediction
    res = model.predict(
        source=img_path,
        imgsz=320,
        conf=0.1,
        save=True,       # saves annotated image
        project=output_dir,
        name="demo_portfolio"
    )
    portfolio_results.append(res[0])  # res is a list of Results

# --------------------------
# Display and save side-by-side
# --------------------------
for idx, (orig_path, res) in enumerate(zip(portfolio_val_images, portfolio_results), start=1):
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))
    
    # Original image
    axes[0].imshow(mpimg.imread(orig_path))
    axes[0].set_title("Original Image")
    axes[0].axis("off")
    
    # YOLO prediction (draw directly from Results object)
    img_with_boxes = res.plot()  # NumPy array with bounding boxes
    axes[1].imshow(img_with_boxes)
    axes[1].set_title("YOLO Prediction")
    axes[1].axis("off")
    
    plt.tight_layout()
    
    # Save figure
    save_path = os.path.join(portfolio_dir, f"portfolio_{idx:02d}.png")
    fig.savefig(save_path, bbox_inches='tight')
    print(f"Saved side-by-side image: {save_path}")
    
    plt.close(fig)


image 1/1 C:\Users\kamed\Desktop\argonne_K\object_detection_with_pascal_voc\YOLO_VOC_subset\val\images\2007_000123.jpg: 256x320 1 train, 46.8ms
Speed: 1.2ms preprocess, 46.8ms inference, 0.9ms postprocess per image at shape (1, 3, 256, 320)
Results saved to [1mC:\Users\kamed\Desktop\argonne_K\object_detection_with_pascal_voc\yolo_predictions_demo\demo_portfolio21[0m

image 1/1 C:\Users\kamed\Desktop\argonne_K\object_detection_with_pascal_voc\YOLO_VOC_subset\val\images\2007_000768.jpg: 256x320 2 buss, 1 car, 45.6ms
Speed: 0.9ms preprocess, 45.6ms inference, 0.8ms postprocess per image at shape (1, 3, 256, 320)
Results saved to [1mC:\Users\kamed\Desktop\argonne_K\object_detection_with_pascal_voc\yolo_predictions_demo\demo_portfolio22[0m

image 1/1 C:\Users\kamed\Desktop\argonne_K\object_detection_with_pascal_voc\YOLO_VOC_subset\val\images\2007_002427.jpg: 256x320 1 chair, 1 sofa, 42.1ms
Speed: 0.6ms preprocess, 42.1ms inference, 0.9ms postprocess per image at shape (1, 3, 256, 320)
