In [4]:
!pip install ultralytics



In [1]:
import json
from collections import Counter
import os
import shutil
from typing import List, Tuple
import json
from ultralytics.data.utils import verify_image_label
from ultralytics import YOLO
import yaml  
import numpy as np


In [6]:
label_file = "large_rock_dataset.json"

with open(label_file, 'r') as f:
    data = json.load(f)

splits = [tile.get('split', 'train') for tile in data['dataset']]  # Default to 'train' if missing
split_counts = Counter(splits)

for split, count in split_counts.items():
    print(f"{split.capitalize()}: {count} images")
    print(f"Percentage: {count / len(splits) * 100:.2f}%")

Train: 640 images
Percentage: 64.52%
Test: 352 images
Percentage: 35.48%


In [7]:
class LargeRocksDatasetV2:
    def __init__(self, image_folder: str, json_dataset: str, output_path: str):
        """
        Initialize the dataset processor
        
        Args:
            image_folder (str): Path to folder containing `.tif` images
            json_dataset (str): Path to JSON dataset file
            output_path (str): Path to save YOLOv8 formatted dataset
        """
        self.image_folder = image_folder
        self.label_file = json_dataset
        self.output_path = output_path
        
        # Define directories for train and test splits
        self.splits = ["train", "test"]
        self.image_dir = output_path
        self.label_dir = output_path
        
        # Create directories for each split
        for split in self.splits:
            os.makedirs(os.path.join(self.image_dir, split, "images"), exist_ok=True)
            os.makedirs(os.path.join(self.label_dir, split, "labels"), exist_ok=True)
    
    def _convert_bbox(self, rel_loc: Tuple[float, float], bbox_size: Tuple[int, int], img_size: Tuple[int, int]) -> List[float]:
        """
        Convert bounding box info to YOLO format: [class_id, x_center, y_center, width, height].
        
        Args:
            rel_loc (Tuple[float, float]): Relative location of the object in the image (normalized).
            bbox_size (Tuple[int, int]): Size of the bounding box in pixels.
            img_size (Tuple[int, int]): Image size (width, height).
        
        Returns:
            List[float]: Bounding box in YOLO format.
        """
        x_center, y_center = rel_loc
        width = bbox_size[0] / img_size[0]
        height = bbox_size[1] / img_size[1]
        return [0, x_center, y_center, width, height]  # class_id = 0 for rocks
    
    def process_dataset(self):
        """
        Process the dataset and convert it to YOLOv8 format with train/test splits.
        """
        # Load the annotations JSON
        with open(self.label_file, 'r') as f:
            data = json.load(f)
        
        # Iterate over each image in the dataset
        for tile in data['dataset']:
            file_name = tile['file_name']
            img_path = os.path.join(self.image_folder, file_name)
            
            # Check if the image exists
            if not os.path.exists(img_path):
                print(f"Image {img_path} not found. Skipping.")
                continue
            
            img_width, img_height = tile['width'], tile['height']
            annotations = tile.get('rocks_annotations', [])
            split = tile.get('split', "train")  # Default to 'train' if no split is specified
            
            # Ensure split is either train or test
            if split not in self.splits:
                print(f"Skipping split '{split}' for file {file_name}.")
                continue
            
            # Copy the image to the appropriate YOLO image folder
            dst_img_path = os.path.join(self.image_dir, split, "images", file_name)
            shutil.copy(img_path, dst_img_path)
            
            # Prepare labels for this image
            label_lines = []
            for annotation in annotations:
                rel_loc = annotation['relative_within_patch_location']
                bbox_size = annotation.get('bbox_size', [30, 30])  # Default bbox size to 30x30
                yolo_bbox = self._convert_bbox(rel_loc, bbox_size, (img_width, img_height))
                label_lines.append(" ".join(map(str, yolo_bbox)))
            
            # Save labels to the appropriate folder
            label_file = os.path.join(self.label_dir, split, "labels", f"{os.path.splitext(file_name)[0]}.txt")
            with open(label_file, 'w') as lf:
                lf.write("\n".join(label_lines))
        
        print(f"Dataset ({self.image_folder}) converted to YOLO format with train/test splits at {self.output_path}")



In [8]:
# Example usage
image_folder = "swissImage_50cm_patches"  # Path to image folder
label_file = "large_rock_dataset.json"  # Path to JSON annotation file
output_path = "dataset_rgb_only"  # Path to save processed dataset

rocks_dataset = LargeRocksDatasetV2(image_folder, label_file, output_path)
rocks_dataset.process_dataset()

Dataset (swissImage_50cm_patches) converted to YOLO format with train/test splits at dataset_rgb_only


In [9]:
# Define the required arguments
image_file = "dataset_rgb_only/train/images/2581_1126_0_2.tif"  # Path to the image file
label_file = "dataset_rgb_only/train/labels/2581_1126_0_2.txt"  # Path to the corresponding label file
prefix = "[VERIFY] "  # Optional log message prefix
keypoint = False  # Whether the labels include keypoints
num_classes = 1  # Total number of classes in the dataset
nkpt = 0  # Number of keypoints (if keypoint is True)
ndim = 0  # Number of dimensions for keypoints

# Verify the image and its label
args = (image_file, label_file, prefix, keypoint, num_classes, nkpt, ndim)
result = verify_image_label(args)

# Output the result
print("Verification Results:")
print(f"Image File: {result[0]}")
print(f"Labels: {result[1]}")
print(f"Image Shape: {result[2]}")
print(f"Segments: {result[3]}")
print(f"Keypoints: {result[4]}")
print(f"Missing Labels: {result[5]}")
print(f"Found Labels: {result[6]}")
print(f"Empty Labels: {result[7]}")
print(f"Corrupt Files: {result[8]}")
print(f"Message: {result[9]}")


Verification Results:
Image File: dataset_rgb_only/train/images/2581_1126_0_2.tif
Labels: [[          0        0.12        0.39    0.046875    0.046875]]
Image Shape: (640, 640)
Segments: []
Keypoints: None
Missing Labels: 0
Found Labels: 1
Empty Labels: 0
Corrupt Files: 0
Message: 


In [10]:
def remove_duplicates_in_labels(base_dir):
    """
    Traverse the labels directory and remove duplicate lines in each label file.
    Print a message only if duplicates were removed.
    """
    subfolders = ['train/labels', 'test/labels']
    
    for subfolder in subfolders:
        labels_path = os.path.join(base_dir, subfolder)
        
        if not os.path.exists(labels_path):
            print(f"Directory not found: {labels_path}")
            continue
        
        for label_file in os.listdir(labels_path):
            file_path = os.path.join(labels_path, label_file)
            
            if not label_file.endswith('.txt'):
                continue  # Skip non-label files
            
            try:
                # Read file and remove duplicates
                with open(file_path, 'r') as f:
                    lines = f.readlines()
                
                unique_lines = list(set(lines))  # Remove duplicates
                
                # Check if duplicates were removed
                if len(lines) != len(unique_lines):
                    # Write back the unique lines
                    with open(file_path, 'w') as f:
                        f.writelines(sorted(unique_lines))  # Sorting for consistency
                    
                    print(f"Duplicates removed in file: {file_path}")
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")

# Specify the base directory of your dataset
base_dataset_dir = 'dataset_rgb_only'

# Call the function
remove_duplicates_in_labels(base_dataset_dir)


Duplicates removed in file: dataset_rgb_only/train/labels/2588_1133_0_2.txt
Duplicates removed in file: dataset_rgb_only/train/labels/2704_1127_3_3.txt
Duplicates removed in file: dataset_rgb_only/train/labels/2588_1133_1_2.txt
Duplicates removed in file: dataset_rgb_only/train/labels/2582_1127_0_1.txt
Duplicates removed in file: dataset_rgb_only/train/labels/2598_1132_1_3.txt
Duplicates removed in file: dataset_rgb_only/train/labels/2598_1132_0_3.txt
Duplicates removed in file: dataset_rgb_only/test/labels/2626_1102_2_0.txt


In [51]:
def write_yaml_file(output_path, path_params, class_names={0: "Rock"}, augmentation_params=None):

    # Extract paths from the dictionary
    dataset_path = path_params.get("dataset_path", "")
    train_path = path_params.get("train_path", "")
    val_path = path_params.get("val_path", "")
    test_path = path_params.get("test_path", "")

    # Build the data dictionary for YAML
    data = {
        "path": dataset_path,
        "train": train_path,
        "val": val_path,
        "test": test_path,
        "names": class_names
    }

    nc = len(class_names)
    data["nc"] = nc

    if augmentation_params:
        data["augmentation"] = augmentation_params

    # Write the YAML file
    with open(output_path, 'w') as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False)
    
    print(f"YAML file written to: {output_path}")



In [82]:
# Example usage
output_yaml = "data.yaml"

DEVICE = "mps" # Set the device to 'cpu', 'mps' or 'cuda'
EPOCHS = 2  # Number of epochs to train
OPTIMIZER = "AdamW"  # Optimizer to use for training
BATCH_SIZE = 8  # Batch size for training
IMG_SIZE = 640  # Image size for training
SAVE_DIR = "/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/runs/train"
PRETRAINED =True
DROPOUT = 0
MOSAIC = 0 # Use Mosaic augmentation --> dont make sense for rock detection
SCALE = 0 # Use Scale augmentation --> dont make sense for rock detection
augmentation_params = {
                        "hsv_h": 0.0,
                        "hsv_s": 0.0,
                        "hsv_v": 0.0,
                        "flipud": 0.0,
                        "fliplr": 0.0, 
                    }

# Define the paths in a dictionary
path_params = {
                "dataset_path": "/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/dataset_rgb_only",
                "train_path": "/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/dataset_rgb_only/train",
                "val_path": "/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/dataset_rgb_only/train",
                "test_path": "/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/dataset_rgb_only/test",
            }

# Define class names and augmentation parameters
class_names = {
                0: "Rock"
            }


# Call the function
write_yaml_file(output_yaml, path_params, class_names)


YAML file written to: data.yaml


In [83]:
# Load the YAML configuration
model = YOLO('yolov8n.pt')  # Load YOLOv8

In [85]:
results = model.train(
    data="/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/data.yaml",
    epochs=EPOCHS,
    batch=BATCH_SIZE,
    imgsz=IMG_SIZE,
    device=DEVICE,
    optimizer=OPTIMIZER,
    pretrained=PRETRAINED,
    dropout=DROPOUT,
    mosaic=MOSAIC,
    scale=SCALE,
    translate= 0.0,
    hsv_h=augmentation_params["hsv_h"],
    hsv_s=augmentation_params["hsv_s"],
    hsv_v=augmentation_params["hsv_v"],
    flipud=augmentation_params["flipud"],
    fliplr=augmentation_params["fliplr"],
)

New https://pypi.org/project/ultralytics/8.3.49 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.40 🚀 Python-3.11.5 torch-2.5.0 MPS (Apple M2 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/data.yaml, epochs=2, time=None, patience=100, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=train48, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=

[34m[1mtrain: [0mScanning /Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/dataset_rgb_only/train/labels.cache... 640 images, 320 backgrounds, 0 corrupt: 100%|██████████| 640/640 [00:00<?, ?it/s]




[34m[1mval: [0mScanning /Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/dataset_rgb_only/train/labels.cache... 640 images, 320 backgrounds, 0 corrupt: 100%|██████████| 640/640 [00:00<?, ?it/s]

Plotting labels to /Users/janclevorn/runs/detect/train48/labels.jpg... 





[34m[1moptimizer:[0m AdamW(lr=0.01, momentum=0.937) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/Users/janclevorn/runs/detect/train48[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/2      8.59G       2.64      5.015      1.714         21        640:   8%|▊         | 6/80 [00:19<04:03,  3.29s/it]


KeyboardInterrupt: 