### This file splits the annotated images and the corresponding labels to train, test and split. 

In [33]:
import os
from sklearn.model_selection import train_test_split
import shutil

# Function to copy files into respective folders
def copy_files(src_folder, dest_folder, files):
    for file in files:
        src_path = os.path.join(src_folder, file)
        dest_path = os.path.join(dest_folder, file)
        
        # Ensure the destination directory exists before copying
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
        
        # Print debug information
        print(f"Copying {src_path} to {dest_path}")
        
        shutil.copy(src_path, dest_path)
        
# Setting the path to the root directory
root_dataset_dir = './Fishphotos_all/'

# Setting the path for train, test, and val folders
train_dir = '../Fish_split/train'
test_dir = '../Fish_split/test'
val_dir = '../Fish_split/val'

# Create directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Looping through each subclass directory
for class_name in os.listdir(root_dataset_dir):
    class_dir = os.path.join(root_dataset_dir, class_name)

    # Getting the list of image files and corresponding label files for the subclass
    image_files = os.listdir(os.path.join(class_dir, 'images'))
    label_files = os.listdir(os.path.join(class_dir, 'labels'))

    # Sorting the files
    image_files.sort()
    label_files.sort()

    # Splitting training and testing set first
    train_images, test_images, train_labels, test_labels = train_test_split(
        image_files, label_files, test_size=0.2, random_state=42
    )

    # Splitting testing set into test and val set
    test_images, val_images, test_labels, val_labels = train_test_split(
        test_images, test_labels, test_size=0.5, random_state=42
    )

    # Creating subdirectories for the current class into training, testing, and validation
    train_class_dir = os.path.join(train_dir, class_name)
    test_class_dir = os.path.join(test_dir, class_name)
    val_class_dir = os.path.join(val_dir, class_name)

    os.makedirs(train_class_dir, exist_ok=True)
    os.makedirs(test_class_dir, exist_ok=True)
    os.makedirs(val_class_dir, exist_ok=True)

    copy_files(
        os.path.join(class_dir, 'images'), os.path.join(train_class_dir, 'images'), train_images
    )
    copy_files(
        os.path.join(class_dir, 'labels'), os.path.join(train_class_dir, 'labels'), train_labels
    )

    copy_files(
        os.path.join(class_dir, 'images'), os.path.join(test_class_dir, 'images'), test_images
    )
    copy_files(
        os.path.join(class_dir, 'labels'), os.path.join(test_class_dir, 'labels'), test_labels
    )

    copy_files(
        os.path.join(class_dir, 'images'), os.path.join(val_class_dir, 'images'), val_images
    )
    copy_files(
        os.path.join(class_dir, 'labels'), os.path.join(val_class_dir, 'labels'), val_labels
    )


Copying ./Fishphotos_all/Day_0\images\20231027_235104.jpg to ../Fish_split/train\Day_0\images\20231027_235104.jpg
Copying ./Fishphotos_all/Day_0\images\20231028_000511.jpg to ../Fish_split/train\Day_0\images\20231028_000511.jpg
Copying ./Fishphotos_all/Day_0\images\20231028_000149.jpg to ../Fish_split/train\Day_0\images\20231028_000149.jpg
Copying ./Fishphotos_all/Day_0\images\20231027_234128.jpg to ../Fish_split/train\Day_0\images\20231027_234128.jpg
Copying ./Fishphotos_all/Day_0\images\20231027_235402.jpg to ../Fish_split/train\Day_0\images\20231027_235402.jpg
Copying ./Fishphotos_all/Day_0\images\20231028_000639.jpg to ../Fish_split/train\Day_0\images\20231028_000639.jpg
Copying ./Fishphotos_all/Day_0\images\20231027_234603.jpg to ../Fish_split/train\Day_0\images\20231027_234603.jpg
Copying ./Fishphotos_all/Day_0\images\20231027_234335.jpg to ../Fish_split/train\Day_0\images\20231027_234335.jpg
Copying ./Fishphotos_all/Day_0\images\20231028_001435.jpg to ../Fish_split/train\Day_0\i