## sklearn train/val split

The following snippet splits the resulting images of each bird into training and validation sets through sklearn, for training/validating the classifier

In [2]:
from sklearn.model_selection import train_test_split
import os
import shutil

In [3]:
source_dir = r"D:\Bowerbird-ID\4_Run_YOLOv11_det_seg\Output_masked_frames"
train_dir = r"D:\Bowerbird-ID\5_Split_train_val_sets\Dataset\Training"
val_dir = r"D:\Bowerbird-ID\5_Split_train_val_sets\Dataset\Validation"

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Iterate through each bird ID folder
for bird_id in os.listdir(source_dir):
    bird_path = os.path.join(source_dir, bird_id)
    
    if os.path.isdir(bird_path): 
        images = os.listdir(bird_path)
        
        if not images:  # Skip if the folder is empty
            print(f"Warning: No images found for bird '{bird_id}' in {bird_path}")
            continue

        # Split images into training and validation sets
        train_imgs, val_imgs = train_test_split(images, test_size=0.33, random_state=42)

        train_bird_dir = os.path.join(train_dir, bird_id)
        val_bird_dir = os.path.join(val_dir, bird_id)
        os.makedirs(train_bird_dir, exist_ok=True)
        os.makedirs(val_bird_dir, exist_ok=True)

        # Move training images
        for img in train_imgs:
            source_img_path = os.path.join(bird_path, img)
            dest_train_path = os.path.join(train_bird_dir, img)
            shutil.move(source_img_path, dest_train_path)

        # Move validation images
        for img in val_imgs:
            source_img_path = os.path.join(bird_path, img)
            dest_val_path = os.path.join(val_bird_dir, img)
            shutil.move(source_img_path, dest_val_path)

print("Dataset successfully split into training and validation sets.")

Dataset successfully split into training and validation sets.


Class-Specific Splits: Each bird ID folder's images are split into training and validation sets independently.
Folder Structure Preservation: Training and validation images are placed into separate folders under Training and Validation directories, maintaining the bird ID folder structure.
Copy Instead of Move: Images are copied using shutil.copy2 to ensure the original images remain untouched.

In [4]:
import os
from sklearn.model_selection import train_test_split
import shutil

source_dir = r"D:\Bowerbird-ID\4_Run_YOLOv11_det_seg\Output_masked_frames"
train_dir = r"D:\Bowerbird-ID\5_Split_train_val_sets\Dataset\Training"
val_dir = r"D:\Bowerbird-ID\5_Split_train_val_sets\Dataset\Validation"

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Iterate through each bird ID folder
for bird_id in os.listdir(source_dir):
    bird_path = os.path.join(source_dir, bird_id)
    
    if os.path.isdir(bird_path): 
        images = os.listdir(bird_path)
        
        if not images:  # Skip if the folder is empty
            print(f"Warning: No images found for bird '{bird_id}' in {bird_path}")
            continue

        # Split images into training and validation sets
        train_imgs, val_imgs = train_test_split(images, test_size=0.3, random_state=42)

        train_bird_dir = os.path.join(train_dir, bird_id)
        val_bird_dir = os.path.join(val_dir, bird_id)
        os.makedirs(train_bird_dir, exist_ok=True)
        os.makedirs(val_bird_dir, exist_ok=True)

        # Copy training images
        for img in train_imgs:
            source_img_path = os.path.join(bird_path, img)
            dest_train_path = os.path.join(train_bird_dir, img)
            shutil.copy2(source_img_path, dest_train_path)

        # Copy validation images
        for img in val_imgs:
            source_img_path = os.path.join(bird_path, img)
            dest_val_path = os.path.join(val_bird_dir, img)
            shutil.copy2(source_img_path, dest_val_path)

print("Dataset successfully split into training and validation sets.")


Dataset successfully split into training and validation sets.
