# Large Rocks Detection Project

Welcome to the **Large Rocks Detection Project**! This notebook serves to implement our machine learning pipeline for detecting large rocks. Below is an outline of the steps we will follow throughout the project:

## Outline

1. **Dataset Preparation**  
   Organize and adapt the training, validation, and test datasets using `dataset.py`.
2. **Data Augmentation**  
   Apply geometric and visual transformations for enhanced generalization, leveraging `dataset.py`.
3. **Model Training**  
   Train the model using `model.py`.
4. **Regularization to Combat Overfitting**  
   Employ validation strategies to minimize overfitting.
5. **Evaluation on Test Data**  
   Test the model on the final dataset and visualize the results.
6. **Accuracy Metrics**  
   Calculate and report accuracy metrics for a comprehensive performance evaluation.

Let’s dive into each step and build a robust solution for detecting large rocks!

In [4]:
from tifffile import tifffile 
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from PIL import Image

from ultralytics import YOLO
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torch
import torchvision.transforms as T
import os
import shutil
import dataset as dt
import augmentation as  aug

## 1. Dataset Preparation

Download the given data ... (in a folder named 'Data')

In [None]:
# Load dataset from JSON
json_file_path = 'Data/large_rock_dataset.json'
data, dataset = dt.load_dataset_from_json(json_file_path)

# Define which type of images you want to use (here we use RGB and hillshade)
img_folders = ['Data/swissImage_50cm_patches/', 'Data/swissSURFACE3D_hillshade_patches/']

for i in img_folders:
    # Extract folder name from the path
    folder_name = os.path.basename(os.path.normpath(i))

    # Define base directory name where the images and labels will be stored
    base_dir_name = f'dataset_{folder_name}'

    # Split and organize the dataset
    train_images, test = dt.split_train_from_json(dataset, i, base_dir_name)
    train_labels = dt.save_train_annotations(dataset, base_dir_name)

    # Create a validation set
    val_images = dt.create_validation_set_images(train_images, base_dir_name)
    val_labels = dt.create_validation_set_labels(train_labels, base_dir_name)

    # Convert. tif to .jpg for Yolov8
    dt.convert_tif_to_jpg(train_images)
    dt.convert_tif_to_jpg(val_images)

    # Write the labels in Yolov8 format
    # YOLOv8 assumes constant bbox size
    bbox_width = 10 / 640  # Normalized width
    bbox_height = 10 / 640  # Normalized height

    dt.convert_labels_to_yolo_format(
        train_labels,
        base_dir_name,
        bbox_width=bbox_width,
        bbox_height=bbox_height,
        type='train'
    )
    
    dt.convert_labels_to_yolo_format(
        val_labels,
        base_dir_name,
        bbox_width=bbox_width,
        bbox_height=bbox_height,
        type='val'
    )

## 2. Data Augmentation

### For RGB images

In [None]:
# Initialize and inspect the dataset

# Set paths for training images and YOLO-format labels.
image_folder = "dataset_swissImage_50cm_patches/train_images"
label_folder = "dataset_swissImage_50cm_patches/yolo_train_labels"

# Calculate mean and standard deviation for normalization.
mean, std = dt.calculate_mean_std(image_folder)

# Create the RockDetectionDataset with normalization (no augmentation for now).
dataset = dt.RockDetectionDataset(image_folder, label_folder, mean, std, augment=False)

# Iterate through the dataset to:
#  - Print the image name, tensor shape, and associated labels.
#  - Break after the first iteration for quick inspection.
for idx, (aug_images, aug_labels) in enumerate(dataset):
    image_name = dataset.image_files[idx]  # Get the name of the current image
    print(f"Image Name: {image_name}")
    print(f"Image Shape: {aug_images.size()}")
    print(f"Labels: {aug_labels}")
    break

In [None]:
# Data Augmentation Workflow: Geometric, Brightness, and Obstruction
# This script performs three types of augmentations (Geometric, Brightness, and Obstruction)
# and saves the augmented datasets into separate folders.

# Parameters
batch_size = 16 
rgb_folder = 'dataset_swissImage_50cm_patches'

# Geometric
output_image_folder_g = os.path.join(rgb_folder, "augmented_train_images_geom")
output_label_folder_g = os.path.join(rgb_folder, "augmented_train_labels_geom")

aug.aug_pipeline_geom(dataset, mean, std, batch_size, output_image_folder_g, output_label_folder_g)

In [6]:
# Brightness
output_image_folder_b = os.path.join(rgb_folder, "augmented_train_images_brightning")
output_label_folder_b = os.path.join(rgb_folder, "augmented_train_labels_brightning")

aug.aug_pipeline_brightning(dataset, mean, std, batch_size, output_image_folder_b, output_label_folder_b)

In [7]:
# Obstruction
output_image_folder_o = os.path.join(rgb_folder, "augmented_train_images_obstruction")
output_label_folder_o = os.path.join(rgb_folder, "augmented_train_labels_obstruction")

aug.aug_pipeline_obstruction(dataset, mean, std, batch_size, output_image_folder_o, output_label_folder_o)

### For hillshade images

In [None]:
# Initialize and inspect the dataset

# Set paths for training images and YOLO-format labels.
image_folder = "dataset_swissSURFACE3D_hillshade_patches/train_images"
label_folder = "dataset_swissSURFACE3D_hillshade_patches/yolo_train_labels"

# Calculate mean and standard deviation for normalization.
mean, std = dt.calculate_mean_std(image_folder)

# Create the RockDetectionDataset with normalization (no augmentation for now).
dataset = dt.RockDetectionDataset(image_folder, label_folder, mean, std, augment=False)

# Iterate through the dataset to:
#  - Print the image name, tensor shape, and associated labels.
#  - Break after the first iteration for quick inspection.
for idx, (aug_images, aug_labels) in enumerate(dataset):
    image_name = dataset.image_files[idx]  # Get the name of the current image
    print(f"Image Name: {image_name}")
    print(f"Image Shape: {aug_images.size()}")
    print(f"Labels: {aug_labels}")
    break

In [None]:
# Parameters
batch_size = 16 
hillshade_folder = 'dataset_swissSURFACE3D_hillshade_patches'

# Geometric
output_image_folder_g = os.path.join(hillshade_folder, "augmented_train_images_geom")
output_label_folder_g = os.path.join(hillshade_folder, "augmented_train_labels_geom")

aug.aug_pipeline_geom(dataset, mean, std, batch_size, output_image_folder_g, output_label_folder_g)

In [10]:
# Brightness
output_image_folder_b = os.path.join(hillshade_folder, "augmented_train_images_brightning")
output_label_folder_b = os.path.join(hillshade_folder, "augmented_train_labels_brightning")

aug.aug_pipeline_brightning(dataset, mean, std, batch_size, output_image_folder_b, output_label_folder_b)

In [11]:
# Obstruction
output_image_folder_o = os.path.join(hillshade_folder, "augmented_train_images_obstruction")
output_label_folder_o = os.path.join(hillshade_folder, "augmented_train_labels_obstruction")

aug.aug_pipeline_obstruction(dataset, mean, std, batch_size, output_image_folder_o, output_label_folder_o)

### Combine all datasets in one, organised for Yolov8 model

In [None]:
aug.organize_yolo_dataset(rgb_folder, hillshade_folder)


YOLO dataset organized in 'yolo_dataset'


## 3. Model Trainning

Below, we present the code used to train the 3 different model, note these model were trained using the GPU from google colab

In [34]:
def train_model(model_name,model,  yaml_file_path, save_dir):
    model = YOLO(model)

    model.train(
    data=yaml_file_path,
    name=model_name,
    project=save_dir,
    epochs=50,
    imgsz=640,
    batch=16,
    lr0=0.001,
    weight_decay=0.001,
    patience=10,
    augment=True,
    amp=True,
    save_period=5)
    
    return model

def predict_model(save_dir ,validation_set_path, save_dir_pred):
    os.makedirs(save_dir_pred, exist_ok=True)    
    model = YOLO(save_dir + '/weights/best.pt')
    results = model.predict(
        source= validation_set_path, 
        save=True,                     # Save prediction images
        save_txt=True,                 # Save predictions in YOLO format
        save_conf=True,
        conf=0.1, #confidence of at least 0.1
        project=save_dir_pred,  # Set the output directory here
        imgsz=640             # Image size (ensure it matches your training)
        )
    
    return results
# Corrected dictionary
model_inputs = {
    'large': ('yolov8x.yaml','model/Yolo_large', 'model/predictions_large'),
    'intermediate': ('yolov8m.yaml','model/Yolo_intermediate', 'model/predictions_intermediate'),
    'small': ('yolov8n.yaml','model/Yolo_small', 'model/predictions_small')
}
yaml_file_path = 'Data/dataset.yaml'
validation_set_path = 'Data/unified_val/images'

In [None]:
#train the model
for model_name, (model,save_dir, _) in model_inputs.items():
    trained_model = train_model(model_name, model, yaml_file_path, save_dir)

In [33]:
# Predict using models
for model_name, (_, save_dir, save_dir_pred) in model_inputs.items():
    predictions = predict_model(save_dir, validation_set_path, save_dir_pred)
    print(f'Predictions for {model_name} finished')


image 1/128 g:\Mon Drive\epfl\MA3\Image processing\PROJET\project_ipeo\Data\unified_val\images\2581_1126_1_1_train_RGB.jpg: 640x640 44 rocks, 310.4ms
image 2/128 g:\Mon Drive\epfl\MA3\Image processing\PROJET\project_ipeo\Data\unified_val\images\2581_1126_1_2_train_hillshade.jpg: 640x640 11 rocks, 249.9ms
image 3/128 g:\Mon Drive\epfl\MA3\Image processing\PROJET\project_ipeo\Data\unified_val\images\2581_1126_3_2_train_hillshade.jpg: 640x640 43 rocks, 232.5ms
image 4/128 g:\Mon Drive\epfl\MA3\Image processing\PROJET\project_ipeo\Data\unified_val\images\2581_1127_0_1_train_RGB.jpg: 640x640 (no detections), 228.7ms
image 5/128 g:\Mon Drive\epfl\MA3\Image processing\PROJET\project_ipeo\Data\unified_val\images\2581_1127_0_2_train_RGB.jpg: 640x640 4 rocks, 245.6ms
image 6/128 g:\Mon Drive\epfl\MA3\Image processing\PROJET\project_ipeo\Data\unified_val\images\2581_1127_0_2_train_hillshade.jpg: 640x640 1 rock, 243.4ms
image 7/128 g:\Mon Drive\epfl\MA3\Image processing\PROJET\project_ipeo\Data\u

In [None]:

# Helper function to load YOLO labels
def load_yolo_labels(label_file):
    if not os.path.exists(label_file):
        return []
    with open(label_file, 'r') as f:
        lines = f.readlines()
    return [list(map(float, line.strip().split())) for line in lines]
# Helper function to draw bounding boxes
def draw_boxes(image, boxes, color, label_type=""):
    h, w, _ = image.shape
    for box in boxes:
        x_center, y_center, width, height = box[1:5]  # YOLO format: <class_id> <x_center> <y_center> <width> <height>
        x1 = int((x_center - width / 2) * w)
        y1 = int((y_center - height / 2) * h)
        x2 = int((x_center + width / 2) * w)
        y2 = int((y_center + height / 2) * h)
        cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
        cv2.putText(image, f"{label_type}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

# Function to calculate accuracy
def plot_model_predictions(image_folder, label_folder, model_inputs):
    """
    Plot the predictions of different models for all images and compare their accuracy.

    Parameters:
    - image_folder (str): Path to the folder containing the images.
    - label_folder (str): Path to the folder containing the labels.
    - model_inputs (dict): Dictionary where keys are model names and values are tuples of model paths and prediction paths.
    """
    image_files = sorted(os.listdir(image_folder))
    label_files = sorted(os.listdir(label_folder))
    
    for image_index in range(len(image_files)):
        # Load the image
        image_path = os.path.join(image_folder, image_files[image_index])
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Load the ground truth labels
        label_path = os.path.join(label_folder, label_files[image_index])
        true_labels = load_yolo_labels(label_path)
        
        # Plot the image
        fig, ax = plt.subplots(1, len(model_inputs) + 1, figsize=(15, 5))
        ax[0].imshow(image)
        ax[0].set_title('Ground Truth')
        
        # Plot the ground truth labels
        draw_boxes(image, true_labels, color=(0, 255, 0), label_type="GT")  # Green for ground truth
        ax[0].imshow(image)
        
        # Plot predictions for each model
        for i, (model_name, (_, _, pred_path)) in enumerate(model_inputs.items()):
            pred_image = image.copy()
            pred_label_file = os.path.join(pred_path, 'predict/labels', label_files[image_index])
            pred_labels = load_yolo_labels(pred_label_file)
            
            if pred_labels:
                draw_boxes(pred_image, pred_labels, color=(255, 0, 0), label_type="Pred")  # Red for predictions
            else:
                cv2.putText(pred_image, "No predictions", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            
            ax[i + 1].imshow(pred_image)
            ax[i + 1].set_title(model_name)
        
        plt.show()
# Example usage
model_inputs = {
    'large': ('yolov8x.yaml','model/Yolo_large', 'model/predictions_large'),
    'intermediate': ('yolov8m.yaml','model/Yolo_intermediate', 'model/predictions_intermediate'),
    'small': ('yolov8n.yaml','model/Yolo_small', 'model/predictions_small')
}
image_folder = "Data/unified_val/images"
label_folder = "Data/unified_val/labels"
plot_model_predictions(image_folder, label_folder, model_inputs)

In [None]:
# Function to calculate accuracy
def plot_model_predictions_with_accuracy(image_folder, label_folder, model_inputs):
    """
    Plot the predictions of different models for images with 10+ ground truth labels and compare their accuracy.

    Parameters:
    - image_folder (str): Path to the folder containing the images.
    - label_folder (str): Path to the folder containing the labels.
    - model_inputs (dict): Dictionary where keys are model names and values are tuples of model paths and prediction paths.
    """
    image_files = sorted(os.listdir(image_folder))
    label_files = sorted(os.listdir(label_folder))
    
    for image_index in range(len(image_files)):
        # Load the image
        image_path = os.path.join(image_folder, image_files[image_index])
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Load the ground truth labels
        label_path = os.path.join(label_folder, label_files[image_index])
        true_labels = load_yolo_labels(label_path)
        
        # Filter images with 10+ ground truth labels
        if len(true_labels) < 10:
            continue
        
        # Plot the image
        fig, ax = plt.subplots(1, len(model_inputs) + 1, figsize=(15, 5))
        ax[0].imshow(image)
        ax[0].set_title('Ground Truth')
        
        # Plot the ground truth labels
        draw_boxes(image, true_labels, color=(0, 255, 0), label_type="GT")  # Green for ground truth
        ax[0].imshow(image)
        
        # Plot predictions for each model and calculate accuracy
        for i, (model_name, (_, _, pred_path)) in enumerate(model_inputs.items()):
            pred_image = image.copy()
            pred_label_file = os.path.join(pred_path, 'predict/labels', label_files[image_index])
            pred_labels = load_yolo_labels(pred_label_file)
            
            if pred_labels:
                draw_boxes(pred_image, pred_labels, color=(255, 0, 0), label_type="Pred")  # Red for predictions
                
                # Calculate accuracy
                true_positive = sum(1 for pred in pred_labels if any(np.allclose(pred[1:5], true[1:5], atol=0.1) for true in true_labels))
                false_positive = len(pred_labels) - true_positive
                false_negative = len(true_labels) - true_positive
                accuracy = true_positive / (true_positive + false_positive + false_negative)
                
                cv2.putText(pred_image, f"Accuracy: {accuracy:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            else:
                cv2.putText(pred_image, "No predictions", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            
            ax[i + 1].imshow(pred_image)
            ax[i + 1].set_title(model_name)
        
        plt.show()

# Example usage
plot_model_predictions_with_accuracy(image_folder, label_folder, model_inputs)