In [8]:
import json
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import shutil

from tifffile import tifffile 
#tifffile is a usefull and lightweight library to read raster images (.tif)
# example how to use tiffile here : https://pypi.org/project/tifffile/#examples
# install it with :
# ! pip install tifffile

In [2]:
# Illustration of some samples coming from the dataset
json_file_path = 'Data/large_rock_dataset.json'

# Load the JSON file
with open(json_file_path, 'r') as file:
    data = json.load(file)
print('General information about the data:', data['info'])
dataset =data['dataset']
print('Number of samples  :', len(dataset) )
sample_info = dataset[10]
print('Looking at the the first images:', sample_info ['file_name'])
print('Looking at rocks annotations for the first images:\n', sample_info ['rocks_annotations'][0])

General information about the data: {'description': 'Large Rocks Detection Dataset ', 'version': '1.0', 'year': 2024, 'contributor': 'Valerie Zermatten', 'date_created': '2024/09/30'}
Number of samples  : 992
Looking at the the first images: 2581_1126_2_2.tif
Looking at rocks annotations for the first images:
 {'rock_id': 1459.0, 'abs_rock_coordinates': [2581767.93, 1126509.48], 'pixel_within_patch_coordinates': [608.0, 51.0], 'relative_within_patch_location': [0.95, 0.08]}


In [3]:
unique_splits = set(sample['split'] for sample in dataset)
print("Unique values in 'split':", unique_splits)

Unique values in 'split': {'test', 'train'}


In [9]:
import os
import shutil

# Define paths
base_dir = 'dataset_surface_hillshade'
train_dir = os.path.join(base_dir, 'train_images')
test_dir = os.path.join(base_dir, 'test')

# Create directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

## DONE:

# # Iterate through all samples
# for sample in dataset:
#     file_name = 'Data/swissSURFACE3D_hillshade_patches/' + sample['file_name']
#     split = sample['split']  # Assuming the "split" key indicates train/test/val

#     # Define source and destination paths
#     src_path = file_name  # Assuming file_name contains the full or relative path
#     if split == 'train':
#         dest_dir = train_dir
#     elif split == 'test':
#         dest_dir = test_dir
#     # elif split == 'val':
#     #     dest_dir = val_dir
#     else:
#         print(f"Unknown split '{split}' for file '{file_name}'. Skipping.")
#         continue

#     dest_path = os.path.join(dest_dir, os.path.basename(file_name))

#     # Copy file to the appropriate directory
#     try:
#         shutil.copy(src_path, dest_path)
#         print(f"Copied '{file_name}' to '{dest_dir}'")
#     except Exception as e:
#         print(f"Error copying '{file_name}': {e}")

# print("Dataset split completed.")


In [10]:
# train_labels

# Define paths
labels_dir = os.path.join(base_dir, 'train_labels')

# Create the train_labels directory if it doesn't exist
os.makedirs(labels_dir, exist_ok=True)

# DONE:

# Process images with split == 'train'
# for sample in dataset:
#     if sample['split'] == 'train':
#         # Extract relevant details
#         file_name = sample['file_name']
#         annotations = sample.get('rocks_annotations', [])
        
#         # Create a .txt file for this image
#         base_name = os.path.splitext(os.path.basename(file_name))[0]
#         txt_file_path = os.path.join(labels_dir, f"{base_name}.txt")
        
#         # Write annotations to the .txt file
#         with open(txt_file_path, 'w') as txt_file:
#             for annotation in annotations:
#                 txt_file.write(f"{annotation}\n")
        
#         print(f"Created annotation file: {txt_file_path}")

# print("All train annotations have been saved to the 'train_labels' folder.")


In [11]:
# Create Validation Set
file_count = len([file for file in os.listdir(train_dir) if os.path.isfile(os.path.join(train_dir, file))])
print(f"Number of images in train set: {file_count}")

# Define source and destination folders
source_folder = train_dir
destination_folder = os.path.join(base_dir, 'val_images')

# DONE:

# List all files in the source folder
# files = [file for file in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, file))]

# # Calculate 10% of the total files
# num_files_to_move = max(1, int(len(files) * 0.1))  # Ensure at least one file is moved

# # Randomly select 10% of the files
# files_to_move = random.sample(files, num_files_to_move)

# # Move the selected files
# for file in files_to_move:
#     src_path = os.path.join(source_folder, file)
#     dest_path = os.path.join(destination_folder, file)
#     shutil.move(src_path, dest_path)
#     print(f"Moved '{file}' to '{destination_folder}'")

# print(f"Moved {len(files_to_move)} files to '{destination_folder}'.")


Number of images in train set: 640
Moved '2586_1131_1_1.tif' to 'dataset_surface_hillshade\val_images'
Moved '2594_1132_0_3.tif' to 'dataset_surface_hillshade\val_images'
Moved '2635_1145_2_0.tif' to 'dataset_surface_hillshade\val_images'
Moved '2598_1131_3_1.tif' to 'dataset_surface_hillshade\val_images'
Moved '2598_1131_2_1.tif' to 'dataset_surface_hillshade\val_images'
Moved '2582_1126_2_2.tif' to 'dataset_surface_hillshade\val_images'
Moved '2581_1126_3_2.tif' to 'dataset_surface_hillshade\val_images'
Moved '2595_1130_1_2.tif' to 'dataset_surface_hillshade\val_images'
Moved '2664_1136_3_2.tif' to 'dataset_surface_hillshade\val_images'
Moved '2599_1131_0_3.tif' to 'dataset_surface_hillshade\val_images'
Moved '2598_1132_1_1.tif' to 'dataset_surface_hillshade\val_images'
Moved '2581_1127_2_2.tif' to 'dataset_surface_hillshade\val_images'
Moved '2664_1135_1_2.tif' to 'dataset_surface_hillshade\val_images'
Moved '2589_1133_0_3.tif' to 'dataset_surface_hillshade\val_images'
Moved '2581_1

In [12]:
# Define folder paths
val_images_folder = os.path.join(base_dir, 'val_images')
train_labels_folder = os.path.join(base_dir, 'train_labels')
val_labels_folder = os.path.join(base_dir, 'val_labels')

# Create the val_labels folder if it doesn't exist
os.makedirs(val_labels_folder, exist_ok=True)

# List all image files in val_images folder (excluding extensions)
val_image_files = {os.path.splitext(file)[0] for file in os.listdir(val_images_folder) if os.path.isfile(os.path.join(val_images_folder, file))}

# Move matching label files from train_labels to val_labels
for label_file in os.listdir(train_labels_folder):
    # Get the base name (without extension) of the label file
    base_name = os.path.splitext(label_file)[0]
    
    if base_name in val_image_files:
        src_path = os.path.join(train_labels_folder, label_file)
        dest_path = os.path.join(val_labels_folder, label_file)
        shutil.move(src_path, dest_path)
        print(f"Moved '{label_file}' to '{val_labels_folder}'")

print("Matching label files moved to 'val_labels' folder.")




Moved '2581_1126_1_2.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2581_1126_3_2.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2581_1127_0_2.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2581_1127_2_0.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2581_1127_2_2.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2582_1126_1_1.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2582_1126_2_2.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2583_1126_0_0.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2583_1126_1_0.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2583_1126_3_1.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2583_1127_3_3.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2586_1131_0_2.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2586_1131_0_3.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2586_1131_1_1.txt' to 'dataset_surface_hillshade\val_labels'
Moved '2586_1132_0_1.txt' to 'dataset_surface_hi