In [5]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import random
from PIL import Image
import torch
import json
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from tifffile import tifffile 

import json
from collections import Counter
from Dataset import LargeRocksDataset
from utils import *


In [6]:
label_file = "large_rock_dataset.json"

with open(label_file, 'r') as f:
    data = json.load(f)

splits = [tile.get('split', 'train') for tile in data['dataset']]  # Default to 'train' if missing
split_counts = Counter(splits)

for split, count in split_counts.items():
    print(f"{split.capitalize()}: {count} images")
    print(f"Percentage: {count / len(splits) * 100:.2f}%")

Train: 640 images
Percentage: 64.52%
Test: 352 images
Percentage: 35.48%


In [7]:
image_folder = "swissImage_50cm_patches"  # Path to swissImage_50cm_patches or equivalent
label_file = "large_rock_dataset.json"  # JSON file with annotations
output_path = "YOLO_Only_RGB"  # Directory to save processed dataset

rocks_dataset = LargeRocksDataset(image_folder, label_file, output_path)
rocks_dataset.process_dataset()

Dataset (swissImage_50cm_patches) converted to YOLO format with train/val/test splits at YOLO_Only_RGB


In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def visualize_batch(images, labels, batch_number=False):
    """
    Visualize all images in a batch with their bounding boxes.
    Args:
        images (torch.Tensor): Batch of images with shape (batch_size, channels, height, width).
        labels (list): List of tensors containing bounding boxes for each image.
    """
    batch_size = len(images)
    cols = batch_size // 2 + batch_size % 2  # Calculate the number of columns (split into two rows)

    fig, axs = plt.subplots(
        2, cols, figsize=(cols * 5, 10)  # Adjust figure size based on grid dimensions
    )
    axs = axs.flatten()  # Flatten axes array for easy indexing

    # Set the title if provided
    if batch_number:
        fig.suptitle(f"Batch Number: {batch_number}")

    for idx in range(batch_size):
        image = images[idx].permute(1, 2, 0).numpy()  # Convert to (H, W, C)
        image = (image * 0.5 + 0.5)  # Undo normalization for display
        axs[idx].imshow(image)
        axs[idx].axis('off')  # Turn off axes for a cleaner look

        # Plot bounding boxes
        for lbl in labels[idx]:
            cls, x_center, y_center, width, height = lbl.numpy()
            img_height, img_width, _ = image.shape
            x_center, y_center = x_center * img_width, y_center * img_height
            width, height = width * img_width, height * img_height
            x_min, y_min = x_center - width / 2, y_center - height / 2

            # Draw rectangle
            rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
            axs[idx].add_patch(rect)
            axs[idx].text(x_min, y_min - 5, f"Class: {int(cls)}", color='red', fontsize=8)

    # Hide unused subplots if the grid has extra cells
    for idx in range(batch_size, len(axs)):
        axs[idx].axis('off')


In [None]:
class RandomHorizontalFlipWithBBox:
    def __init__(self, flip_prob=0.5):
        """
        Initialize the transform with a probability of flipping the image and bounding boxes.
        Args:
            flip_prob (float): Probability of applying the horizontal flip.
        """
        self.flip_prob = flip_prob

    def __call__(self, image, labels):
        """
        Apply the transformation.
        Args:
            image (PIL.Image): The input image.
            labels (torch.Tensor): The bounding box labels in YOLO format (class, x_center, y_center, width, height).
        Returns:
            image (PIL.Image): Transformed image.
            labels (torch.Tensor): Adjusted bounding box labels.
        """
        if random.random() < self.flip_prob:
            # Flip the image horizontally
            image = image.transpose(Image.FLIP_LEFT_RIGHT)
            
            # Adjust the labels
            if len(labels) > 0:
                labels[:, 1] = 1 - labels[:, 1]  # Invert the x_center for horizontal flip
        
        return image, labels
    

class RandomVerticalFlipWithBBox:
    def __init__(self, flip_prob=0.5):
        """
        Initialize the transformation with a probability of flipping vertically.
        Args:
            flip_prob (float): Probability of applying the vertical flip.
        """
        self.flip_prob = flip_prob

    def __call__(self, image, labels):
        """
        Apply the transformation.
        Args:
            image (PIL.Image): The input image.
            labels (torch.Tensor): Bounding box labels in YOLO format 
                                   (class, x_center, y_center, width, height).
        Returns:
            image (PIL.Image): Transformed image.
            labels (torch.Tensor): Adjusted labels after flipping.
        """
        if random.random() < self.flip_prob:
            # Flip the image vertically
            image = image.transpose(Image.FLIP_TOP_BOTTOM)
            
            # Adjust the bounding box labels for the flip
            if len(labels) > 0:
                labels[:, 2] = 1 - labels[:, 2]  # Invert y_center for vertical flip
        
        return image, labels

class ComposeCustomTransforms:
    def __init__(self, transforms):
        """
        Initialize with a list of custom transformations.
        Args:
            transforms (list): A list of callable custom transforms.
        """
        self.transforms = transforms

    def __call__(self, image, labels):
        """
        Apply each transform in the sequence.
        Args:
            image (PIL.Image): Input image.
            labels (torch.Tensor): YOLO-style labels.
        Returns:
            image, labels: Transformed image and labels.
        """
        for transform in self.transforms:
            image, labels = transform(image, labels)
        return image, labels


class YOLODataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None, custom_transform=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transform = transform
        self.custom_transform = custom_transform
        self.image_files = sorted(os.listdir(image_dir))
        self.label_files = sorted(os.listdir(label_dir))

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, index):
        # Load image
        image_path = os.path.join(self.image_dir, self.image_files[index])
        image = Image.open(image_path).convert("RGB")

        # Load labels
        label_path = os.path.join(self.label_dir, self.label_files[index])
        with open(label_path, 'r') as f:
            labels = f.readlines()
        labels = [list(map(float, line.strip().split())) for line in labels]
        labels = torch.tensor(labels)

        # Apply custom transform (e.g., flipping)
        if self.custom_transform:
            image, labels = self.custom_transform(image, labels)

        # Apply regular transformations (resize, normalize, etc.)
        if self.transform:
            image = self.transform(image)

        return image, labels

def custom_collate_fn(batch):
    """
    Custom collate function to handle batches of variable-size bounding box labels.
    Args:
        batch (list): A list of (image, labels) tuples.
    Returns:
        images (torch.Tensor): Stacked images of shape [batch_size, channels, height, width].
        labels (list): A list of label tensors, each of shape [num_boxes, 5].
    """
    images = torch.stack([item[0] for item in batch])  # Stack all images
    labels = [item[1] for item in batch]  # Keep labels as a list
    return images, labels


In [None]:
train_transform = transforms.Compose([
    transforms.ColorJitter(brightness=0.5),#, contrast=0.75),  # Apply random brightness and contrast
    transforms.Resize((416, 416)),
    transforms.ToTensor(),  # Convert to tensor after applying augmentations
    transforms.Normalize((0.5,), (0.5,))  # Normalize image
])

# Combine custom transforms
custom_transform = ComposeCustomTransforms([
    RandomHorizontalFlipWithBBox(flip_prob=0.5),
    RandomVerticalFlipWithBBox(flip_prob=0.5)
])

# Directories for training data
image_dir = "YOLO_Only_RGB/images/train"
label_dir = "YOLO_Only_RGB/labels/train"

# Create dataset and data loader
train_dataset = YOLODataset(
    image_dir=image_dir,
    label_dir=label_dir,
    transform=train_transform,
    custom_transform=custom_transform
)

train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=0,  
    collate_fn=custom_collate_fn
)


In [None]:
batch_count = 1  # Keep track of the number of batches visualized
for images, labels in train_loader:

    visualize_batch(images, labels, batch_count)  # Call the existing visualize_batch function

    batch_count += 1
    if batch_count == 4:  # Stop after visualizing two batches
        break

In [8]:
from PIL import Image
import os

# Paths to your dataset
base_dir = 'YOLO_Only_RGB/images'
subfolders = ['train_tif', 'val_tif', 'test_tif']

for subfolder in subfolders:
    input_folder = os.path.join(base_dir, subfolder)
    output_folder = os.path.join(base_dir, subfolder.replace('_tif', ''))  # Correct string manipulation
    os.makedirs(output_folder, exist_ok=True)

    for file_name in os.listdir(input_folder):
        if file_name.endswith('.tif') or file_name.endswith('.tiff'):
            file_path = os.path.join(input_folder, file_name)
            img = Image.open(file_path)

            # Save as high-quality JPG
            output_file = os.path.join(output_folder, os.path.splitext(file_name)[0] + '.jpg')
            img.convert("RGB").save(output_file, "JPEG", quality=95)  # Set quality to 95 (default is 75)
            print(f"Converted {file_name} to {output_file}")


Converted 2597_1131_0_1.tif to YOLO_Only_RGB/images/train/2597_1131_0_1.jpg
Converted 2581_1126_3_2.tif to YOLO_Only_RGB/images/train/2581_1126_3_2.jpg
Converted 2632_1144_1_3.tif to YOLO_Only_RGB/images/train/2632_1144_1_3.jpg
Converted 2597_1132_1_1.tif to YOLO_Only_RGB/images/train/2597_1132_1_1.jpg
Converted 2597_1131_2_3.tif to YOLO_Only_RGB/images/train/2597_1131_2_3.jpg
Converted 2632_1144_3_1.tif to YOLO_Only_RGB/images/train/2632_1144_3_1.jpg
Converted 2581_1126_1_0.tif to YOLO_Only_RGB/images/train/2581_1126_1_0.jpg
Converted 2597_1132_3_3.tif to YOLO_Only_RGB/images/train/2597_1132_3_3.jpg
Converted 2634_1145_1_1.tif to YOLO_Only_RGB/images/train/2634_1145_1_1.jpg
Converted 2704_1127_1_0.tif to YOLO_Only_RGB/images/train/2704_1127_1_0.jpg
Converted 2588_1133_0_2.tif to YOLO_Only_RGB/images/train/2588_1133_0_2.jpg
Converted 2634_1145_3_3.tif to YOLO_Only_RGB/images/train/2634_1145_3_3.jpg
Converted 2704_1127_3_2.tif to YOLO_Only_RGB/images/train/2704_1127_3_2.jpg
Converted 25

In [15]:
import yaml

def create_yaml_file(train_path, val_path, test_path, num_classes, class_names, output_file='data.yaml'):
    """
    Creates a YOLOv8 data.yaml file for training.
    
    Args:
        train_path (str): Path to the training images directory.
        val_path (str): Path to the validation images directory.
        test_path (str): Path to the testing images directory (optional).
        num_classes (int): Number of classes in the dataset.
        class_names (list): List of class names.
        output_file (str): Name of the output YAML file. Default is 'data.yaml'.
    """
    # YAML content structure
    data = {
        'train': train_path,
        'val': val_path,
        'test': test_path,  # Optional, remove or set to None if not using a test set
        'nc': num_classes,
        'names': class_names
    }
    
    # Write YAML file
    with open(output_file, 'w') as yaml_file:
        yaml.dump(data, yaml_file, default_flow_style=False)
    
    print(f"YAML file created: {output_file}")

# Example usage:
train_dir = 'YOLO_Only_RGB/images/train'
val_dir = 'YOLO_Only_RGB/images/val'
test_dir = 'YOLO_Only_RGB/images/test'  # Optional
num_classes = 1
class_names = ['Rock']  # Replace with your actual class names

create_yaml_file(train_dir, val_dir, test_dir, num_classes, class_names, output_file='dataset.yaml')


YAML file created: dataset.yaml


In [16]:
from ultralytics import YOLO

In [17]:
model = YOLO("yolo11n.pt")  # load a pretrained model (recommended for training)

In [19]:
# Train the model with MPS
results = model.train(data="/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/dataset.yaml", epochs=100, imgsz=640, device="mps")


Ultralytics 8.3.40 🚀 Python-3.11.5 torch-2.5.0 MPS (Apple M2 Pro)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11n.pt, data=/Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/dataset.yaml, epochs=100, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=mps, workers=8, project=None, name=train6, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_lab

[34m[1mtrain: [0mScanning /Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/YOLO_Only_RGB/labels/train.cache... 640 images, 320 backgrounds, 0 corrupt: 100%|██████████| 640/640 [00:00<?, ?it/s]




[34m[1mval: [0mScanning /Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/YOLO_Only_RGB/labels/test... 352 images, 123 backgrounds, 0 corrupt: 100%|██████████| 352/352 [00:00<00:00, 4518.22it/s]

[34m[1mval: [0mNew cache created: /Users/janclevorn/Desktop/EPFL/IPEO_Project_Group_4/YOLO_Only_RGB/labels/test.cache





Plotting labels to /Users/janclevorn/runs/detect/train6/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m/Users/janclevorn/runs/detect/train6[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/40 [00:00<?, ?it/s]Corrupt JPEG data: 3 extraneous bytes before marker 0xd9
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Corrupt JPEG data: 2 extraneous bytes before marker 0xd9
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: 2 extraneous bytes before marker 0xd9
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: 2 extraneous bytes before marker 0xd9
      1/100      4.79G      3.468      12.15       2.27         15        640:   2%|▎         | 1/40 [00:27<18:01, 27.74s/it]Corrupt JPEG data: 5 extraneous bytes before marker 0xd9
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: 2 extraneous bytes before marker 0xd9
Corrupt JPEG data: premature end of data segme

KeyboardInterrupt: 