In [1]:
import os
import keras
import keras_hub
import os, shutil, pathlib
import json

In [2]:
os.environ["KERAS_BACKEND"] = "jax"
images_path ="coco_dataset/datasets/coco/"
annotations_path="coco_dataset/datasets/annotations/annotations/"

In [3]:
with open(f"{annotations_path}instances_train2017.json", "r") as f:
    annotations = json.load(f)

images = {image["id"]: image for image in annotations["images"]}

In [4]:
def scale_box(box, width, height):
    scale = 1.0 / max(width, height)
    x, y, w, h = [v * scale for v in box]
    x += (height - width) * scale / 2 if height > width else 0
    y += (width - height) * scale / 2 if width > height else 0
    return [x, y, w, h]

metadata = {}
for annotation in annotations["annotations"]:
    id = annotation["image_id"]
    if id not in metadata:
        metadata[id] = {"boxes": [], "labels": []}
    image = images[id]
    box = scale_box(annotation["bbox"], image["width"], image["height"])
    metadata[id]["boxes"].append(box)
    metadata[id]["labels"].append(annotation["category_id"])
    metadata[id]["path"] = images_path + "train2017/" + image["file_name"]
metadata = list(metadata.values())

In [5]:
# taking count of unique values of label more than 4 for test and <= for train and val
import random
from collections import Counter
metadata_1 = [x for x in metadata if len(set(tuple(box) for box in x["boxes"])) <= 4]
random.shuffle(metadata_1)
metadata_2 = [x for x in metadata if len(set(tuple(box) for box in x["boxes"])) > 4]
random.shuffle(metadata_2)

In [6]:
metadata_1[433]

{'boxes': [[0.06839999999999999, 0.06278, 0.852, 0.8923800000000001]],
 'labels': [86],
 'path': 'coco_dataset/datasets/coco/train2017/000000279909.jpg'}

In [7]:
import os
from pathlib import Path

# Mapping from your annotations (unchanged)
coco_categories = annotations["categories"]
coco_id_to_index = {cat['id']: idx for idx, cat in enumerate(coco_categories)}
print(f"Class map example: 1 -> {coco_id_to_index.get(1, 'N/A')}, 62 -> {coco_id_to_index.get(62, 'N/A')}")

# Create dirs (absolute for safety)
dataset_dir = Path(os.path.abspath("./yolo_dataset"))  # Full path
for split in ['train', 'val']:
    img_split = dataset_dir / 'images' / split
    lbl_split = dataset_dir / 'labels' / split
    img_split.mkdir(parents=True, exist_ok=True)
    lbl_split.mkdir(parents=True, exist_ok=True)
    print(f"Created {img_split} and {lbl_split}")

# Split metadata_1
val_size = int(0.2 * len(metadata_1))
train_metadata = metadata_1[:-val_size]
val_metadata = metadata_1[-val_size:]

def convert_split(metadata_split, split_name):
    count = 0
    link_fail = 0
    invalid_count = 0
    for sample in metadata_split:
        img_name = Path(sample["path"]).name
        src_img = os.path.abspath(sample["path"])  # Absolute source for symlink
        
        # Symlink image (no copy!)
        dst_img = dataset_dir / 'images' / split_name / img_name
        if not dst_img.exists():
            try:
                if os.path.exists(src_img):
                    os.symlink(src_img, dst_img)  # Lightweight link
                    print(f"Symlinked {img_name} -> {src_img}") if count < 5 else None  # Debug first 5
                else:
                    print(f"SKIP: Source {src_img} not found for {img_name}")
                    link_fail += 1
                    continue
            except OSError as e:
                print(f"LINK FAIL for {img_name}: {str(e)} (e.g., cross-device if volumes differ)")
                link_fail += 1
                continue
        
        # Create label (unchanged; writes to disk, but tiny)
        dst_lbl = dataset_dir / 'labels' / split_name / img_name.replace('.jpg', '.txt')
        with open(dst_lbl, 'w') as f:
            for box, label in zip(sample["boxes"], sample["labels"]):
                yolo_cls = coco_id_to_index.get(label, -1)
                if yolo_cls == -1:
                    print(f"WARNING: Invalid class {label} in {img_name}")
                    invalid_count += 1
                    continue
                x_center = max(0.0, min(1.0, box[0] + box[2] / 2))
                y_center = max(0.0, min(1.0, box[1] + box[3] / 2))
                w = max(0.0, min(1.0, box[2]))
                h = max(0.0, min(1.0, box[3]))
                if w > 0 and h > 0:
                    f.write(f"{yolo_cls} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
                else:
                    invalid_count += 1
        
        count += 1
        #if count % 100 == 0:
         #   print(f"Processed {count} in {split_name} (link fails: {link_fail})")
    
    print(f"{split_name}: {count} files, {link_fail} link fails, {invalid_count} invalid annos")
    return count

train_count = convert_split(train_metadata, 'train')
val_count = convert_split(val_metadata, 'val')

# Validation (check symlinks)
train_img_dir = dataset_dir / 'images' / 'train'
num_links = len(list(train_img_dir.glob('*.jpg')))
print(f"Final check: {num_links} symlinks in train/ (expected ~{len(train_metadata)})")
if num_links > 0:
    # Test a symlink (should resolve to original)
    sample_link = list(train_img_dir.glob('*.jpg'))[0]
    #print(f"Sample symlink {sample_link.name} resolves to: {os.path.realpath(sample_link)}")
    # Sample label as before
    sample_lbl = list((dataset_dir / 'labels' / 'train').glob('*.txt'))[0]
    with open(sample_lbl, 'r') as f:
        lines = f.readlines()
    print(f"Sample label ({sample_lbl.name}): {lines[:3]}")
else:
    print("STILL EMPTY! Check link fails above.")

Class map example: 1 -> 0, 62 -> 56
Created /work/Notebooks/yolo_dataset/images/train and /work/Notebooks/yolo_dataset/labels/train
Created /work/Notebooks/yolo_dataset/images/val and /work/Notebooks/yolo_dataset/labels/val
Symlinked 000000014781.jpg -> /work/Notebooks/coco_dataset/datasets/coco/train2017/000000014781.jpg
Symlinked 000000127997.jpg -> /work/Notebooks/coco_dataset/datasets/coco/train2017/000000127997.jpg
Symlinked 000000533941.jpg -> /work/Notebooks/coco_dataset/datasets/coco/train2017/000000533941.jpg
Symlinked 000000282473.jpg -> /work/Notebooks/coco_dataset/datasets/coco/train2017/000000282473.jpg
Symlinked 000000186034.jpg -> /work/Notebooks/coco_dataset/datasets/coco/train2017/000000186034.jpg
train: 47364 files, 0 link fails, 0 invalid annos
Symlinked 000000312662.jpg -> /work/Notebooks/coco_dataset/datasets/coco/train2017/000000312662.jpg
Symlinked 000000564339.jpg -> /work/Notebooks/coco_dataset/datasets/coco/train2017/000000564339.jpg
Symlinked 000000435988.jpg

In [8]:
# Get all 91 names from your annotations (in order)
coco_names = [cat['name'] for cat in sorted(annotations["categories"], key=lambda x: x['id'])]
print(f"Using {len(coco_names)} classes: {coco_names[:5]}...{coco_names[-3:]}")

yaml_content = f"""path: ./yolo_dataset  # Relative to notebook
train: images/train
val: images/val
nc: {len(coco_names)}  # 91 from your annotations
names: {coco_names}
"""
with open("coco_custom.yaml", 'w') as f:
    f.write(yaml_content)
print("dataset.yaml updated. nc=", len(coco_names))

Using 80 classes: ['person', 'bicycle', 'car', 'motorcycle', 'airplane']...['teddy bear', 'hair drier', 'toothbrush']
dataset.yaml updated. nc= 80


In [9]:
import torch  # For device check
from ultralytics import YOLO
from pathlib import Path

# Clear any old cache
dataset_dir = Path("./yolo_dataset")
cache_files = list(dataset_dir.glob('**/*.cache'))
for cache in cache_files:
    cache.unlink()
print(f"Deleted {len(cache_files)} old cache files")

# Train
model = YOLO('yolov8n.pt')  # Nano for speed
results = model.train(
    data="coco_custom.yaml",
    epochs=4,  # Bump to 50+ for better results later
    imgsz=640,
    batch=16,  # Lower to 8 if OOM
    workers=4,
    device=0 if torch.cuda.is_available() else 'cpu',
    name="yolov8_custom_coco_fixed"
)
model.save("yolov8_retrained.pt")
print("Training complete! Check runs/detect/yolov8_custom_coco_fixed/ for plots.")

Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/home/ucloud/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Deleted 0 old cache files
Ultralytics 8.3.225 üöÄ Python-3.12.11 torch-2.9.0+cu128 CUDA:0 (NVIDIA L4, 22574MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=coco_custom.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=4, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, img

In [10]:
target_path = 'coco_dataset/datasets/coco/train2017/000000019489.jpg'

# Find the index
for i, item in enumerate(metadata_2):
    if item['path'] == target_path:
        found_index = i
        print(f"Found at index {i}: {item}")
        break
else:
    print("Not found")

Found at index 39473: {'boxes': [[0.091234375, 0.569546875, 0.9018750000000001, 0.23781249999999998], [0.7050468750000001, 0.5797656250000001, 0.06853125, 0.1213125], [0.3253125, 0.61565625, 0.19846875, 0.21637499999999998], [0.7103281250000001, 0.5082031250000001, 0.27159375, 0.323828125], [0.002984375, 0.24706250000000002, 0.12534375, 0.57303125], [0.37868750000000007, 0.4344375, 0.068734375, 0.08904687500000001], [0.42978125, 0.290328125, 0.285015625, 0.5178281250000001], [0.728234375, 0.33809374999999997, 0.076109375, 0.19846875], [0.8356718750000001, 0.36234375, 0.14028125, 0.34023437500000003], [0.7100312500000001, 0.36928125, 0.048906250000000005, 0.09412500000000001], [0.6675312500000001, 0.40890625, 0.065140625, 0.12565625], [0.16718750000000002, 0.271875, 0.29765625, 0.56015625], [0.114546875, 0.43245312500000005, 0.079015625, 0.233203125], [0.8232187500000001, 0.35121875, 0.066859375, 0.17895312500000002], [0.9603125000000001, 0.39001562500000003, 0.0396875, 0.200984375], [0

In [11]:
metadata_2[found_index]

{'boxes': [[0.091234375, 0.569546875, 0.9018750000000001, 0.23781249999999998],
  [0.7050468750000001, 0.5797656250000001, 0.06853125, 0.1213125],
  [0.3253125, 0.61565625, 0.19846875, 0.21637499999999998],
  [0.7103281250000001, 0.5082031250000001, 0.27159375, 0.323828125],
  [0.002984375, 0.24706250000000002, 0.12534375, 0.57303125],
  [0.37868750000000007, 0.4344375, 0.068734375, 0.08904687500000001],
  [0.42978125, 0.290328125, 0.285015625, 0.5178281250000001],
  [0.728234375, 0.33809374999999997, 0.076109375, 0.19846875],
  [0.8356718750000001, 0.36234375, 0.14028125, 0.34023437500000003],
  [0.7100312500000001, 0.36928125, 0.048906250000000005, 0.09412500000000001],
  [0.6675312500000001, 0.40890625, 0.065140625, 0.12565625],
  [0.16718750000000002, 0.271875, 0.29765625, 0.56015625],
  [0.114546875, 0.43245312500000005, 0.079015625, 0.233203125],
  [0.8232187500000001, 0.35121875, 0.066859375, 0.17895312500000002],
  [0.9603125000000001, 0.39001562500000003, 0.0396875, 0.20098437

In [12]:
from ultralytics import YOLO
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from keras_hub.utils import coco_id_to_name  # For GT names

# Load retrained model
yolov8_retrained = YOLO("yolov8_retrained.pt")

# Predict on sample
sample = metadata_2[found_index]
results = yolov8_retrained(sample["path"], conf=0.25, verbose=True)  # Standard conf

# Extract detections
detections = []
for r in results:
    if r.boxes is not None:
        for box in r.boxes:
            if box.conf[0] >= 0.25:
                detections.append({
                    'class_id': int(box.cls[0]),
                    'class_name': yolov8_retrained.names[int(box.cls[0])],
                    'conf': float(box.conf[0]),
                    'box': box.xywhn[0].tolist()  # [x_center, y_center, w, h]
                })

print(f"Retrained YOLOv8: {len(detections)} detections (conf >=0.25)")
print(f"Pred classes: {[d['class_name'] for d in detections]}")

# GT for comparison
gt_classes = [coco_id_to_name(l) for l in sample["labels"]]
print(f"GT classes (unique): {set(gt_classes)}")

# Visualize YOLOv8 (simple plot)
def draw_yolov8_prediction(image_path, detections, cutoff=0.25):
    fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=150)
    img = plt.imread(image_path)
    ax.imshow(img)
    ax.set_title("YOLOv8 Retrained Predictions")
    
    filtered_dets = [d for d in detections if d['conf'] >= cutoff]
    for d in filtered_dets:
        x, y, w, h = d['box']
        rect = Rectangle((x - w/2, y - h/2), w, h, linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)
        ax.text(x, y - h/2 - 0.01, f"{d['class_name']} {d['conf']:.2f}", 
                color='red', fontsize=8, ha='center', va='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))
    
    plt.axis('off')
    plt.show()

draw_yolov8_prediction(sample["path"], detections, cutoff=0.25)



image 1/1 /work/Notebooks/coco_dataset/datasets/coco/train2017/000000019489.jpg: 448x640 7 persons, 1 teddy bear, 79.2ms
Speed: 1.7ms preprocess, 79.2ms inference, 1.8ms postprocess per image at shape (1, 3, 448, 640)
Retrained YOLOv8: 8 detections (conf >=0.25)
Pred classes: ['person', 'person', 'person', 'person', 'person', 'teddy bear', 'person', 'person']
GT classes (unique): {'teddy_bear', 'tie', 'bottle', 'chair', 'person'}


<Figure size 1500x1200 with 1 Axes>