In [1]:
# Core
import os
import shutil
import xml.etree.ElementTree as ET

# YOLO
from ultralytics import YOLO

# Data handling / visualization
import matplotlib.pyplot as plt
import cv2
import yaml
import random
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob



In [2]:
# --------------------------
# 1. Setup device
# --------------------------
device = 'cpu'
print(f"Using device: {device}")

# --------------------------
# 2. Load pre-trained YOLOv8 model
# --------------------------
# Nano YOLOv8 for CPU
model = YOLO("yolov8n.pt")
print("Model loaded:", model)


Using device: cpu
Model loaded: YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(32, eps=0.001, momentum=0.

In [3]:
# --------------------------
# 3. Dataset setup
# --------------------------
# Paths
repo_root = "C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc"
voc_root = os.path.join(repo_root, "VOCdevkit")
yolo_dataset_dir = os.path.join(repo_root, "YOLO_VOC")
os.makedirs(yolo_dataset_dir, exist_ok=True)

# VOC Classes
VOC_CLASSES = [
    'aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow',
    'diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor'
]

# Function to convert bounding boxes
def convert_bbox(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    return x*dw, y*dh, w*dw, h*dh

# Conversion function
def voc_to_yolo(voc_root, year="2012", split="train", output_dir="YOLO_VOC"):
    img_dir = os.path.join(voc_root, f"VOC{year}", "JPEGImages")
    ann_dir = os.path.join(voc_root, f"VOC{year}", "Annotations")
    split_file = os.path.join(voc_root, f"VOC{year}", "ImageSets", "Main", f"{split}.txt")

    # Output dirs
    txt_output_dir = os.path.join(output_dir, split, "labels")
    img_output_dir = os.path.join(output_dir, split, "images")
    os.makedirs(txt_output_dir, exist_ok=True)
    os.makedirs(img_output_dir, exist_ok=True)

    # Read official split list
    with open(split_file, "r") as f:
        img_ids = [line.strip() for line in f.readlines()]

    for img_id in img_ids:
        # Copy image
        src_img = os.path.join(img_dir, f"{img_id}.jpg")
        dst_img = os.path.join(img_output_dir, f"{img_id}.jpg")
        shutil.copy(src_img, dst_img)

        # Convert annotation
        xml_file = os.path.join(ann_dir, f"{img_id}.xml")
        tree = ET.parse(xml_file)
        root = tree.getroot()
        w = int(root.find("size/width").text)
        h = int(root.find("size/height").text)

        yolo_labels = []
        for obj in root.findall("object"):
            cls_name = obj.find("name").text
            if cls_name not in VOC_CLASSES:
                continue
            cls_id = VOC_CLASSES.index(cls_name)
            bbox = obj.find("bndbox")
            xmin = float(bbox.find("xmin").text)
            ymin = float(bbox.find("ymin").text)
            xmax = float(bbox.find("xmax").text)
            ymax = float(bbox.find("ymax").text)
            x_center, y_center, bw, bh = convert_bbox((w, h), (xmin, xmax, ymin, ymax))
            yolo_labels.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {bw:.6f} {bh:.6f}")

        txt_file_path = os.path.join(txt_output_dir, f"{img_id}.txt")
        with open(txt_file_path, "w") as f:
            f.write("\n".join(yolo_labels))


# Convert train and val sets
voc_to_yolo(voc_root, "2012", "train", yolo_dataset_dir)
voc_to_yolo(voc_root, "2012", "val", yolo_dataset_dir)

# Generate YAML config for YOLOv8
voc_yaml = {
    'train': os.path.join(yolo_dataset_dir, 'train', 'images'),
    'val': os.path.join(yolo_dataset_dir, 'val', 'images'),
    'nc': len(VOC_CLASSES),
    'names': VOC_CLASSES
}

yaml_path = os.path.join(repo_root, "voc.yaml")
with open(yaml_path, "w") as f:
    yaml.dump(voc_yaml, f)
print("YOLO dataset prepared and voc.yaml created at:", yaml_path)

YOLO dataset prepared and voc.yaml created at: C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\voc.yaml


In [7]:
# --------------------------
# 3. Subset setup
# --------------------------
# --------------------------
# Paths
# --------------------------
repo_root = "C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc"
voc_root = os.path.join(repo_root, "VOCdevkit", "VOC2012")
subset_dir = os.path.join(repo_root, "YOLO_VOC_subset")
os.makedirs(subset_dir, exist_ok=True)

subset_sizes = {
    "train": 800,   # number of images for CPU testing
    "val": 200
}

# --------------------------
# Function to create subset
# --------------------------
def create_yolo_subset(voc_root, original_dir, subset_dir, split, num_images):
    split_file = os.path.join(voc_root, "ImageSets", "Main", f"{split}.txt")
    orig_images_dir = os.path.join(original_dir, split, "images")
    orig_labels_dir = os.path.join(original_dir, split, "labels")

    subset_images_dir = os.path.join(subset_dir, split, "images")
    subset_labels_dir = os.path.join(subset_dir, split, "labels")
    os.makedirs(subset_images_dir, exist_ok=True)
    os.makedirs(subset_labels_dir, exist_ok=True)

    # Load official split IDs
    with open(split_file, "r") as f:
        split_ids = [line.strip() + ".jpg" for line in f.readlines()]

    # Keep only those that exist in original_dir
    available_images = [f for f in split_ids if f in os.listdir(orig_images_dir)]

    # Random sample
    sampled_images = random.sample(available_images, min(num_images, len(available_images)))

    for img_file in sampled_images:
        # Copy image
        shutil.copy(os.path.join(orig_images_dir, img_file),
                    os.path.join(subset_images_dir, img_file))
        # Copy corresponding label
        label_file = img_file.replace(".jpg", ".txt")
        shutil.copy(os.path.join(orig_labels_dir, label_file),
                    os.path.join(subset_labels_dir, label_file))

    print(f"{split} subset created with {len(sampled_images)} images.")

# --------------------------
# Create subsets
# --------------------------
for split in ["train", "val"]:
    create_yolo_subset(voc_root, yolo_dataset_dir, subset_dir, split, subset_sizes[split])

# --------------------------
# Create subset YAML
# --------------------------
subset_yaml = {
    'train': os.path.join(subset_dir, 'train'),
    'val': os.path.join(subset_dir, 'val'),
    'nc': len(VOC_CLASSES),
    'names': VOC_CLASSES
}

subset_yaml_path = os.path.join(repo_root, "voc_subset.yaml")
with open(subset_yaml_path, "w") as f:
    yaml.dump(subset_yaml, f)
print("Subset YAML created at:", subset_yaml_path)

train subset created with 800 images.
val subset created with 200 images.
Subset YAML created at: C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\voc_subset.yaml


In [8]:
# --------------------------
# 4. Fine-tune model (CPU-friendly)
# --------------------------
results = model.train(
    data=subset_yaml_path,      #yaml_path for full dataset; subset_yaml_path for debugging/CPU
    epochs=10,            # lower for CPU
    batch=4,             # small batch for CPU
    imgsz=224,           # smaller image size speeds up CPU training
    device=device,        # CPU
    freeze=10           #freezes first 10 layers (backbone)
)

New https://pypi.org/project/ultralytics/8.3.193 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.191  Python-3.10.18 torch-2.8.0+cpu CPU (12th Gen Intel Core(TM) i7-1260P)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:/Users/kamed/Desktop/argonne_K/object_detection_with_pascal_voc\voc_subset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=10, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=224, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_

In [14]:
# --------------------------
# 5. Compute full validation metrics
# --------------------------
metrics = model.val(
    data=subset_yaml_path,
    split="val",
    imgsz=320,
    conf=0.1
)

print("Overall mAP@0.5:0.95:", metrics.box.map)
print("Overall Precision:", metrics.box.mp)
print("Overall Recall:", metrics.box.mr)


# --------------------------
# 6. Export per-class metrics into DataFrames
# --------------------------
import pandas as pd

# mAP DataFrame
df_map = pd.DataFrame({
    "class": VOC_CLASSES,
    "mAP@0.5:0.95": metrics.box.maps
})
df_map.loc["Overall"] = ["Overall", metrics.box.map]

# Precision DataFrame
df_precision = pd.DataFrame({
    "class": VOC_CLASSES,
    "Precision": metrics.box.p
})
df_precision.loc["Overall"] = ["Overall", metrics.box.mp]

# Recall DataFrame
df_recall = pd.DataFrame({
    "class": VOC_CLASSES,
    "Recall": metrics.box.r
})
df_recall.loc["Overall"] = ["Overall", metrics.box.mr]

# F1 DataFrame
f1_per_class = 2 * (metrics.box.p * metrics.box.r) / (metrics.box.p + metrics.box.r + 1e-6)
overall_f1 = 2 * (metrics.box.mp * metrics.box.mr) / (metrics.box.mp + metrics.box.mr + 1e-6)
df_f1 = pd.DataFrame({
    "class": VOC_CLASSES,
    "F1": f1_per_class
})
df_f1.loc[len(df_f1)] = ["Overall", overall_f1]

print("\nPer-class mAP:\n", df_map)
print("\nPer-class Precision:\n", df_precision)
print("\nPer-class Recall:\n", df_recall)
print("\nPer-class F1:\n", df_f1)

# Save to CSVs
df_map.to_csv(os.path.join(repo_root, "yolo_val_map.csv"), index=False)
df_precision.to_csv(os.path.join(repo_root, "yolo_val_precision.csv"), index=False)
df_recall.to_csv(os.path.join(repo_root, "yolo_val_recall.csv"), index=False)
df_f1.to_csv(os.path.join(repo_root, "yolo_val_f1.csv"), index=False)

# --------------------------
# 7. Generate per-class bar charts
# --------------------------
import matplotlib.pyplot as plt
import numpy as np

# mAP bar chart
plt.figure(figsize=(12, 6))
plt.bar(df_map["class"][:-1], df_map["mAP@0.5:0.95"][:-1])
plt.xticks(rotation=45, ha="right")
plt.ylabel("mAP@0.5:0.95")
plt.title("YOLOv8 Per-Class mAP (Validation)")
plt.tight_layout()
plt.savefig(os.path.join(repo_root, "yolo_perclass_map.png"), bbox_inches="tight")
plt.close()

# Precision bar chart
plt.figure(figsize=(12, 6))
plt.bar(df_precision["class"][:-1], df_precision["Precision"][:-1])
plt.xticks(rotation=45, ha="right")
plt.ylabel("Precision")
plt.title("YOLOv8 Per-Class Precision (Validation)")
plt.tight_layout()
plt.savefig(os.path.join(repo_root, "yolo_perclass_precision.png"), bbox_inches="tight")
plt.close()

# Recall bar chart
plt.figure(figsize=(12, 6))
plt.bar(df_recall["class"][:-1], df_recall["Recall"][:-1])
plt.xticks(rotation=45, ha="right")
plt.ylabel("Recall")
plt.title("YOLOv8 Per-Class Recall (Validation)")
plt.tight_layout()
plt.savefig(os.path.join(repo_root, "yolo_perclass_recall.png"), bbox_inches="tight")
plt.close()

# F1 bar chart
plt.figure(figsize=(12, 6))
plt.bar(df_f1["class"][:-1], df_f1["F1"][:-1])
plt.xticks(rotation=45, ha="right")
plt.ylabel("F1 Score")
plt.title("YOLOv8 Per-Class F1 (Validation)")
plt.tight_layout()
plt.savefig(os.path.join(repo_root, "yolo_perclass_f1.png"), bbox_inches="tight")
plt.close()

# --------------------------
# 8. Combined grouped bar chart (Precision, Recall, F1)
# --------------------------
classes = df_precision["class"][:-1]  # exclude "Overall"
x = np.arange(len(classes))  # positions
width = 0.25

plt.figure(figsize=(14, 6))
plt.bar(x - width, df_precision["Precision"][:-1], width=width, label="Precision")
plt.bar(x, df_recall["Recall"][:-1], width=width, label="Recall")
plt.bar(x + width, df_f1["F1"][:-1], width=width, label="F1")

plt.xticks(x, classes, rotation=45, ha="right")
plt.ylabel("Score")
plt.title("YOLOv8 Per-Class Precision, Recall, and F1 (Validation)")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(repo_root, "yolo_perclass_prf1.png"), bbox_inches="tight")
plt.close()

print("All metrics exported: mAP, Precision, Recall, F1 (CSV + individual + grouped bar charts).")

Ultralytics 8.3.191  Python-3.10.18 torch-2.8.0+cpu CPU (12th Gen Intel Core(TM) i7-1260P)
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 649.8437.6 MB/s, size: 94.4 KB)
[K[34m[1mval: [0mScanning C:\Users\kamed\Desktop\argonne_K\object_detection_with_pascal_voc\YOLO_VOC_subset\val\labels.cache... 509 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 509/509 509036.9it/s 0.0s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 128/128 9.2it/s 14.0s0.1s
                   all        509       1425       0.51      0.577      0.528      0.376
             aeroplane         15         37      0.595      0.318      0.353      0.265
               bicycle         22         28      0.473      0.643      0.521      0.403
                  bird         29         79      0.533      0.367      0.358      0.247
                  boat         18         29      0.478      0.759      0.529      0.342
                bott

In [17]:
# --------------------------
# Scatterplot: Per-class mAP vs F1
# --------------------------
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

# Exclude "Overall" row
perclass_map = df_map["mAP@0.5:0.95"][:-1]
perclass_f1 = df_f1["F1"][:-1]
classes = df_map["class"][:-1]

# Compute correlation
corr, pval = pearsonr(perclass_map, perclass_f1)

plt.figure(figsize=(8, 6))
sns.scatterplot(x=perclass_map, y=perclass_f1)

# Annotate each class
for i, cls in enumerate(classes):
    plt.text(perclass_map.iloc[i] + 0.005,
             perclass_f1.iloc[i] + 0.005,
             cls, fontsize=8)

plt.xlabel("mAP@0.5:0.95")
plt.ylabel("F1 Score")
plt.title(f"YOLOv8 Per-Class mAP vs F1 (Validation)\nPearson r={corr:.2f}, p={pval:.3f}")
plt.grid(True)
plt.tight_layout()

# Save
plt.savefig(os.path.join(repo_root, "yolo_map_vs_f1_scatter.png"), bbox_inches="tight")
plt.close()

print("Generated scatterplot: Per-class mAP vs F1 (with correlation)")


ModuleNotFoundError: No module named 'seaborn'

In [15]:
# --------------------------
# Compare per-class mAP vs F1
# --------------------------
import matplotlib.pyplot as plt
import numpy as np

classes = df_map["class"][:-1]  # exclude "Overall"
x = np.arange(len(classes))
width = 0.35  # bar width

plt.figure(figsize=(14, 6))
plt.bar(x - width/2, df_map["mAP@0.5:0.95"][:-1], width=width, label="mAP@0.5:0.95")
plt.bar(x + width/2, df_f1["F1"][:-1], width=width, label="F1 Score")

plt.xticks(x, classes, rotation=45, ha="right")
plt.ylabel("Score")
plt.title("YOLOv8 Per-Class Comparison: mAP vs F1 (Validation)")
plt.legend()
plt.tight_layout()

# Save
plt.savefig(os.path.join(repo_root, "yolo_perclass_map_vs_f1.png"), bbox_inches="tight")
plt.close()

print("Generated grouped bar chart: Per-class mAP vs F1")

Generated grouped bar chart: Per-class mAP vs F1


In [None]:
# --------------------------
# 8. Side-by-side GT vs Predictions (10 random val images)
# --------------------------
import random
import xml.etree.ElementTree as ET

val_images_dir = os.path.join(subset_dir, "val", "images")
val_labels_dir = os.path.join(subset_dir, "val", "labels")

# Randomly select 10 val images
all_val_images = glob.glob(os.path.join(val_images_dir, "*.jpg"))
sample_val_images = random.sample(all_val_images, min(10, len(all_val_images)))

comparison_dir = os.path.join(repo_root, "comparison_predictions")
os.makedirs(comparison_dir, exist_ok=True)

for idx, img_path in enumerate(sample_val_images, start=1):
    # Load GT labels
    label_path = os.path.join(val_labels_dir, os.path.basename(img_path).replace(".jpg", ".txt"))
    gt_boxes = []
    if os.path.exists(label_path):
        with open(label_path, "r") as f:
            for line in f:
                cls_id, x, y, w, h = line.strip().split()
                cls_id = int(cls_id)
                gt_boxes.append((VOC_CLASSES[cls_id], float(x), float(y), float(w), float(h)))

    # Run YOLO prediction
    results = model.predict(source=img_path, imgsz=320, conf=0.1, verbose=False)
    res = results[0]

    fig, axes = plt.subplots(1, 2, figsize=(14, 7))

    # Left: Ground truth (draw bounding boxes manually)
    img = mpimg.imread(img_path)
    axes[0].imshow(img)
    axes[0].set_title("Ground Truth")
    axes[0].axis("off")
    h, w = img.shape[:2]
    for cls_name, x, y, bw, bh in gt_boxes:
        xmin = int((x - bw/2) * w)
        ymin = int((y - bh/2) * h)
        xmax = int((x + bw/2) * w)
        ymax = int((y + bh/2) * h)
        rect = plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin,
                             linewidth=2, edgecolor="lime", facecolor="none")
        axes[0].add_patch(rect)
        axes[0].text(xmin, ymin-5, cls_name, color="lime", fontsize=10, weight="bold")

    # Right: YOLO predictions (ultralytics res.plot())
    img_pred = res.plot()
    axes[1].imshow(img_pred)
    axes[1].set_title("YOLO Prediction")
    axes[1].axis("off")

    plt.tight_layout()
    save_path = os.path.join(comparison_dir, f"comparison_{idx:02d}.png")
    fig.savefig(save_path, bbox_inches="tight")
    plt.close(fig)

    print(f"Saved GT vs Prediction comparison: {save_path}")