In [None]:
import os
import shutil
import random
import numpy as np
import cv2
from pathlib import Path
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from ultralytics import YOLO
import logging
from collections import Counter
from sklearn.model_selection import train_test_split

# --- Setup ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
logger.info("Random seeds set for reproducibility.")

# --- Paths ---
base_dir = Path('../datasets').resolve()
cvat_dir = base_dir / 'cvat_upload'
cvat_images_dir = cvat_dir / 'images'
cvat_labels_dir = cvat_dir / 'labels'

yolo_dir = Path('../yolo_cvat').resolve()
train_img_dir = yolo_dir / 'images' / 'train'
val_img_dir = yolo_dir / 'images' / 'val'
train_lbl_dir = yolo_dir / 'labels' / 'train'
val_lbl_dir = yolo_dir / 'labels' / 'val'

for d in [train_img_dir, val_img_dir, train_lbl_dir, val_lbl_dir]:
    d.mkdir(parents=True, exist_ok=True)

# --- Collect and Split Data ---
images = list(cvat_images_dir.glob('*.jpg'))
labels = list(cvat_labels_dir.glob('*.txt'))

# Identify images without labels
images_without_labels = [img for img in images if not (cvat_labels_dir / f"{img.stem}.txt").exists()]
logger.info(f"Found {len(images_without_labels)} images without labels.")

# Create empty label files for missing
for img in images_without_labels:
    empty_lbl = cvat_labels_dir / f"{img.stem}.txt"
    open(empty_lbl, 'w').close()
    logger.info(f"Created empty label file: {empty_lbl}")

train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42)
logger.info(f"Collected {len(images)} images. Train: {len(train_imgs)}, Val: {len(val_imgs)}")

# Copy images and labels
for split_imgs, img_dst, lbl_dst in [(train_imgs, train_img_dir, train_lbl_dir), 
                                     (val_imgs, val_img_dir, val_lbl_dir)]:
    for img in tqdm(split_imgs, desc=f"Copying to {img_dst.parent.name}"):
        lbl = cvat_labels_dir / f"{img.stem}.txt"
        shutil.copy(img, img_dst / img.name)
        if lbl.exists():
            shutil.copy(lbl, lbl_dst / lbl.name)
        else:
            open(lbl_dst / f"{img.stem}.txt", 'w').close()

# --- Define classes ---
selected_classes = ['hole', 'pole', 'stairs', 'bottle/glass', 'rock']

# --- Check Annotations ---
def check_annotations(label_dir, num_classes):
    for lbl_file in label_dir.glob('*.txt'):
        with open(lbl_file, 'r') as f:
            lines = f.readlines()
        for line in lines:
            if line.strip():
                parts = line.split()
                if len(parts) != 5:
                    logger.error(f"Invalid format in {lbl_file}: {line}")
                    continue
                cls_id = int(parts[0])
                if cls_id >= num_classes or cls_id < 0:
                    logger.error(f"Invalid class ID {cls_id} in {lbl_file}")
                for val in parts[1:]:
                    if not 0 <= float(val) <= 1:
                        logger.error(f"Out-of-range value in {lbl_file}: {val}")

check_annotations(train_lbl_dir, len(selected_classes))
check_annotations(val_lbl_dir, len(selected_classes))

# --- Class Distribution ---
def get_class_counts(label_dir):
    class_counts = Counter()
    for lbl_file in label_dir.glob('*.txt'):
        with open(lbl_file, 'r') as f:
            lines = f.readlines()
        for line in lines:
            if line.strip():
                cls_id = int(line.split()[0])
                class_counts[cls_id] += 1
    return class_counts

train_class_counts = get_class_counts(train_lbl_dir)
val_class_counts = get_class_counts(val_lbl_dir)
logger.info(f"Train class counts: {dict(train_class_counts)}")
logger.info(f"Val class counts: {dict(val_class_counts)}")

plt.figure(figsize=(10, 5))
plt.bar(selected_classes, [train_class_counts.get(i, 0) for i in range(len(selected_classes))], 
        alpha=0.5, label='Train')
plt.bar(selected_classes, [val_class_counts.get(i, 0) for i in range(len(selected_classes))], 
        alpha=0.5, label='Val')
plt.xlabel('Classes')
plt.ylabel('Number of Instances')
plt.title('Class Distribution')
plt.legend()
plt.savefig('class_distribution.png')
plt.close()
logger.info("Saved class distribution plot.")

# --- Visualize Annotations ---
def visualize_annotations(image_path, label_path, class_names):
    img = cv2.imread(str(image_path))
    if img is None:
        logger.error(f"Failed to load image: {image_path}")
        return None
    h, w = img.shape[:2]
    with open(label_path, 'r') as f:
        for line in f:
            if line.strip():
                cls_id, x, y, width, height = map(float, line.split())
                x1 = int((x - width/2) * w)
                y1 = int((y - height/2) * h)
                x2 = int((x + width/2) * w)
                y2 = int((y + height/2) * h)
                cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
                label = class_names[int(cls_id)]
                cv2.putText(img, label, (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0), 2)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def plot_samples_per_class(image_dir, label_dir, class_names, num_samples=3):
    class_images = {i: [] for i in range(len(class_names))}
    for lbl_file in label_dir.glob('*.txt'):
        with open(lbl_file, 'r') as f:
            lines = f.readlines()
        classes_in_image = set()
        for line in lines:
            if line.strip():
                cls_id = int(line.split()[0])
                classes_in_image.add(cls_id)
        for c_id in classes_in_image:
            class_images[c_id].append(lbl_file.stem)
    
    for cls_id, stems in class_images.items():
        if not stems:
            logger.warning(f"No images found for class {class_names[cls_id]}")
            continue
        selected_stems = random.sample(stems, min(num_samples, len(stems)))
        for s in selected_stems:
            img_path = image_dir / f"{s}.jpg"
            lbl_path = label_dir / f"{s}.txt"
            annotated = visualize_annotations(img_path, lbl_path, class_names)
            if annotated is not None:
                plt.figure(figsize=(8,6))
                plt.imshow(annotated)
                plt.title(f"{class_names[cls_id]} - {s}")
                plt.axis('off')
                safe_class_name = class_names[cls_id].replace("/", "_")  # <-- fix here
                plt.savefig(f"annotation_{safe_class_name}_{s}.png")
                plt.close()


plot_samples_per_class(train_img_dir, train_lbl_dir, selected_classes, 3)

# --- YOLO data.yaml ---
data_yaml_path = yolo_dir / 'data.yaml'
with open(data_yaml_path, 'w') as f:
    f.write(f"train: {train_img_dir.resolve()}\n")
    f.write(f"val: {val_img_dir.resolve()}\n")
    f.write(f"nc: {len(selected_classes)}\n")
    f.write(f"names: {selected_classes}\n")
logger.info(f"Created data.yaml at {data_yaml_path}")

# --- Train YOLO (Single Model) ---
model = YOLO('yolov8s.pt')  

results = model.train(
    data=str(data_yaml_path),
    epochs=50,
    imgsz=640,
    batch=8,
    lr0=0.001,
    warmup_epochs=3,
    device='cuda' if torch.cuda.is_available() else 'cpu',
    name="cvat_finetune_all_classes",
    project="runs/train",
    patience=10,
    verbose=True,
    plots=False,
    augment=True,
    mosaic=1.0,
    mixup=0.2,
    degrees=15,
    translate=0.2
)

# --- Print Metrics ---
print(f"mAP50: {results.box.map50}")
print(f"mAP50-95: {results.box.map}")
print(f"Precision: {results.box.p}")
print(f"Recall: {results.box.r}")
for i, cls in enumerate(selected_classes):
    print(f"{cls} - mAP50: {results.box.maps[i]}")


2025-04-07 20:12:39,536 - INFO - Random seeds set for reproducibility.
2025-04-07 20:12:39,555 - INFO - Found 0 images without labels.
2025-04-07 20:12:39,557 - INFO - Collected 1140 images. Train: 912, Val: 228
Copying to images: 100%|██████████| 912/912 [00:00<00:00, 2167.96it/s]
Copying to images: 100%|██████████| 228/228 [00:00<00:00, 2252.13it/s]
2025-04-07 20:12:40,162 - INFO - Train class counts: {1: 166, 2: 79, 3: 12, 0: 78, 4: 81}
2025-04-07 20:12:40,163 - INFO - Val class counts: {4: 20, 1: 36, 2: 22, 0: 14, 3: 6}
2025-04-07 20:12:40,318 - INFO - Saved class distribution plot.
2025-04-07 20:12:42,657 - INFO - Created data.yaml at /home/r0jin/projects/EnigmaAI/yolo_cvat/data.yaml


New https://pypi.org/project/ultralytics/8.3.103 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.239 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 2070 with Max-Q Design, 7959MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/home/r0jin/projects/EnigmaAI/yolo_cvat/data.yaml, epochs=50, time=None, patience=10, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=cuda, workers=8, project=runs/train, name=cvat_finetune_all_classes, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=False, source=None, vid_stride=1, stream_buffer=False, visualize=False, augme

[34m[1mtrain: [0mScanning /home/r0jin/projects/EnigmaAI/yolo_cvat/labels/train... 912 images, 642 backgrounds, 0 corrupt: 100%|██████████| 912/912 [00:00<00:00, 2760.39it/s]

[34m[1mtrain: [0mNew cache created: /home/r0jin/projects/EnigmaAI/yolo_cvat/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



  A.ImageCompression(quality_lower=75, p=0.0),
  self._set_keys()
[34m[1mval: [0mScanning /home/r0jin/projects/EnigmaAI/yolo_cvat/labels/val... 228 images, 157 backgrounds, 0 corrupt: 100%|██████████| 228/228 [00:00<00:00, 1122.97it/s]

[34m[1mval: [0mNew cache created: /home/r0jin/projects/EnigmaAI/yolo_cvat/labels/val.cache





[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.001' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50      2.78G      2.067      7.685      1.984          6        640: 100%|██████████| 114/114 [00:18<00:00,  6.24it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  7.84it/s]


                   all        228         98      0.601      0.133      0.135     0.0743

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50      2.24G      2.188      4.121      2.206         14        640: 100%|██████████| 114/114 [00:16<00:00,  7.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  8.92it/s]

                   all        228         98      0.879     0.0333     0.0386     0.0173






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50       2.3G      2.365      4.479      2.381         10        640: 100%|██████████| 114/114 [00:15<00:00,  7.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  9.07it/s]

                   all        228         98      0.668     0.0167      0.013    0.00472






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50      2.23G        2.5      4.194      2.513          5        640: 100%|██████████| 114/114 [00:15<00:00,  7.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  8.73it/s]

                   all        228         98      0.607     0.0455     0.0144    0.00527






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50       2.3G      2.547      4.416      2.552          7        640: 100%|██████████| 114/114 [00:15<00:00,  7.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  8.94it/s]

                   all        228         98    0.00414       0.28     0.0239    0.00664






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50      2.21G      2.594      4.413      2.614          8        640: 100%|██████████| 114/114 [00:16<00:00,  7.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  8.89it/s]

                   all        228         98      0.402     0.0727     0.0222    0.00849






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50       2.3G      2.546      4.268      2.566          3        640: 100%|██████████| 114/114 [00:17<00:00,  6.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  8.10it/s]

                   all        228         98      0.828     0.0278     0.0197    0.00606






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50       2.3G       2.37      4.074      2.426          5        640: 100%|██████████| 114/114 [00:16<00:00,  7.00it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  8.19it/s]

                   all        228         98       0.71     0.0601     0.0694     0.0299






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50      2.28G      2.273      3.966      2.357          5        640: 100%|██████████| 114/114 [00:16<00:00,  7.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  9.00it/s]

                   all        228         98      0.782     0.0621      0.133      0.058






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50      2.23G      2.246      3.878      2.349         14        640: 100%|██████████| 114/114 [00:16<00:00,  6.95it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  8.45it/s]

                   all        228         98     0.0738      0.143     0.0712     0.0252






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50      2.28G      2.164      3.677      2.323         12        640: 100%|██████████| 114/114 [00:16<00:00,  6.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:01<00:00,  8.89it/s]

                   all        228         98        0.9     0.0636      0.099     0.0374
Stopping training early as no improvement observed in last 10 epochs. Best results observed at epoch 1, best model saved as best.pt.
To update EarlyStopping(patience=10) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






11 epochs completed in 0.069 hours.
Optimizer stripped from runs/train/cvat_finetune_all_classes/weights/last.pt, 22.5MB
Optimizer stripped from runs/train/cvat_finetune_all_classes/weights/best.pt, 22.5MB

Validating runs/train/cvat_finetune_all_classes/weights/best.pt...
Ultralytics YOLOv8.0.239 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 2070 with Max-Q Design, 7959MiB)
Model summary (fused): 168 layers, 11127519 parameters, 0 gradients, 28.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 15/15 [00:03<00:00,  4.58it/s]

                   all        228         98      0.597      0.229      0.211     0.0895
                  hole        228         14          1          0    0.00385    0.00087
                  pole        228         36       0.29      0.278       0.18     0.0846
                stairs        228         22          1          0      0.022     0.0106
          bottle/glass        228          6      0.615      0.667      0.771       0.29
                  rock        228         20     0.0781        0.2     0.0771     0.0614
Speed: 0.3ms preprocess, 12.3ms inference, 0.0ms loss, 0.9ms postprocess per image
mAP50: 0.2108263839100303
mAP50-95: 0.0894598172101645
Precision: [          1     0.29005           1     0.61482    0.078085]
Recall: [          0     0.27778           0     0.66667         0.2]
hole - mAP50: 0.0008704460162703442
pole - mAP50: 0.08461082574586559
stairs - mAP50: 0.010584799581273226
bottle/glass - mAP50: 0.2898644177574071
rock - mAP50: 0.06136859695000617





In [None]:
import cv2
import matplotlib.pyplot as plt
import torch
from ultralytics import YOLO

# --- Paths to trained weights ---
POLE_MODEL_PATH = "runs/train/subset_test22/weights/best.pt"  # Older 'pole' model
NEW_MODEL_PATH = "runs/train/cvat_finetune_no_pole/weights/best.pt"  

# Load each model
pole_model = YOLO(POLE_MODEL_PATH)
other_model = YOLO(NEW_MODEL_PATH)


# ['door','table','openedDoor','chair','pole','bike','truck','car','dog','bus']
# Then 'pole' is index 4. 

pole_class_id_in_old_model = 4
other_classes = ['hole','stairs','bottle/glass','rock']  

def combine_detections(image_path, pole_conf=0.25, other_conf=0.25):
    """
    Runs inference from two YOLO models:
    1) pole_model  - we select only 'pole' from it
    2) other_model - for the other classes

    Returns an annotated image with bounding boxes from both.
    """
    img = cv2.imread(str(image_path))
    if img is None:
        print(f"Failed to load image: {image_path}")
        return None
    
    # 1) Inference for 'pole' only
    pole_results = pole_model.predict(source=image_path, conf=pole_conf, classes=[pole_class_id_in_old_model])
    #   classes=[pole_class_id_in_old_model] ensures we only keep detection with old model's class=4 ('pole')
    pole_boxes = pole_results[0].boxes

    # 2) Inference for other classes
 
    other_results = other_model.predict(source=image_path, conf=other_conf)
    other_boxes = other_results[0].boxes
    
    # Convert boxes to a combined list
    detections = []

    # Add 'pole' from old model
    for box in pole_boxes:
        xyxy = box.xyxy[0].cpu().numpy()
        conf = float(box.conf.cpu().numpy())
        # old model's class name is 'pole' only for those we forced, so:
        detections.append({
            'xyxy': xyxy,
            'conf': conf,
            'class_name': 'pole'
        })

    # Add other classes from new model
    for box in other_boxes:
        xyxy = box.xyxy[0].cpu().numpy()
        conf = float(box.conf.cpu().numpy())
        cls_id = int(box.cls.cpu().numpy())
        # Map the new model's class ID to a string label
        class_name = other_classes[cls_id] if cls_id < len(other_classes) else str(cls_id)
        detections.append({
            'xyxy': xyxy,
            'conf': conf,
            'class_name': class_name
        })
    
    # Visualize
    for det in detections:
        x1, y1, x2, y2 = map(int, det['xyxy'])
        cv2.rectangle(img, (x1, y1), (x2, y2), (0,255,0), 2)
        label = f"{det['class_name']} {det['conf']:.2f}"
        cv2.putText(img, label, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0,255,0), 2)
    
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Example usage:
image_path = "../datasets/cvat_upload/images/frame_IMG_4322_00024.jpg"
annotated_img = combine_detections(image_path)
if annotated_img is not None:
    plt.figure(figsize=(10, 8))
    plt.imshow(annotated_img)
    plt.axis('off')
    plt.show()



image 1/1 /home/r0jin/projects/EnigmaAI/code/../datasets/cvat_upload/images/frame_IMG_4322_00024.jpg: 640x640 1 pole, 8.0ms
Speed: 1.3ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 /home/r0jin/projects/EnigmaAI/code/../datasets/cvat_upload/images/frame_IMG_4322_00024.jpg: 640x640 (no detections), 8.3ms
Speed: 1.6ms preprocess, 8.3ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)


<Figure size 800x600 with 1 Axes>

<Figure size 1000x800 with 1 Axes>