# Splitting the dataset into training and evaluation sets

In [1]:
import os
import shutil
import random

# Directories
image_dir = 'blood-dataset/images'
annotation_dir = 'blood-dataset/normalized_annotations'
train_image_dir = 'blood-dataset/train/images'
train_annotation_dir = 'blood-dataset/train/annotations'
eval_image_dir = 'blood-dataset/eval/images'
eval_annotation_dir = 'blood-dataset/eval/annotations'

# Create directories if they don't exist
os.makedirs(train_image_dir, exist_ok=True)
os.makedirs(train_annotation_dir, exist_ok=True)
os.makedirs(eval_image_dir, exist_ok=True)
os.makedirs(eval_annotation_dir, exist_ok=True)

# List all images
all_images = [f for f in os.listdir(image_dir) if f.endswith('.png')]

# Shuffle the images for random splitting
random.shuffle(all_images)

# Define split ratio
train_ratio = 0.8
train_size = int(train_ratio * len(all_images))

# Split the dataset
train_images = all_images[:train_size]
eval_images = all_images[train_size:]

def copy_files(file_list, src_image_dir, src_annotation_dir, dst_image_dir, dst_annotation_dir):
    for file_name in file_list:
        # Copy image file
        shutil.copy(os.path.join(src_image_dir, file_name), os.path.join(dst_image_dir, file_name))
        
        # Copy corresponding annotation file
        annotation_file_name = os.path.splitext(file_name)[0] + '.txt'
        shutil.copy(os.path.join(src_annotation_dir, annotation_file_name), os.path.join(dst_annotation_dir, annotation_file_name))

# Copy training files
copy_files(train_images, image_dir, annotation_dir, train_image_dir, train_annotation_dir)

# Copy evaluation files
copy_files(eval_images, image_dir, annotation_dir, eval_image_dir, eval_annotation_dir)

print("Dataset split completed. Train set size:", len(train_images), "Eval set size:", len(eval_images))


Dataset split completed. Train set size: 80 Eval set size: 20
