In [1]:
import os
import shutil
import random
from math import floor
import glob

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

# Set paths
source_folder = '/content/drive/MyDrive/YangBOT/Dataset/DatasetV2_67/unzip_orginal/65SKA/65SKA05_1/65SKA05_1Healthy'  # Change this to your source folder path
train_folder = '/content/drive/MyDrive/YangBOT/Dataset/DatasetV2_67/train_test_val_original2/train'
val_folder = '/content/drive/MyDrive/YangBOT/Dataset/DatasetV2_67/train_test_val_original2/val'
test_folder = '/content/drive/MyDrive/YangBOT/Dataset/DatasetV2_67/train_test_val_original2/test'

# Create destination folders if they don't exist
os.makedirs(train_folder, exist_ok=True)
os.makedirs(val_folder, exist_ok=True)
os.makedirs(test_folder, exist_ok=True)

# Set the split ratios
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

# Ensure the ratios sum to 1
assert train_ratio + val_ratio + test_ratio == 1.0, "Ratios must sum to 1.0"

# Get all image files from the source folder
image_files = glob.glob(os.path.join(source_folder, '*.jpg')) + glob.glob(os.path.join(source_folder, '*.JPG'))
# Shuffle the image files to randomize
random.shuffle(image_files)

# Calculate the number of images for each split
total_images = len(image_files)
train_count = floor(train_ratio * total_images)
val_count = floor(val_ratio * total_images)
test_count = total_images - train_count - val_count  # Ensure all images are accounted for

# Split the images
train_files = image_files[:train_count]
val_files = image_files[train_count:train_count + val_count]
test_files = image_files[train_count + val_count:]

# Function to copy files and corresponding label files
def copy_files(file_list, destination_folder):
    copied_files = []
    for file in file_list:
        # Copy the image file
        shutil.copy(file, destination_folder)
        copied_files.append(os.path.basename(file))

        # Copy the corresponding label file if it exists
        label_file = os.path.splitext(file)[0] + '.txt'
        if os.path.exists(label_file):
            shutil.copy(label_file, destination_folder)
            copied_files.append(os.path.basename(label_file))

    return copied_files

# Copy the files and record the copied file names
train_copied = copy_files(train_files, train_folder)
val_copied = copy_files(val_files, val_folder)
test_copied = copy_files(test_files, test_folder)

# Create a summary file
summary_file = '/content/drive/MyDrive/YangBOT/Dataset/DatasetV2_67/split_summary2.txt'
with open(summary_file, 'w') as f:
    f.write(f"Total images: {total_images}\n")
    f.write(f"Training images: {train_count}\n")
    f.write(f"Validation images: {val_count}\n")
    f.write(f"Testing images: {test_count}\n\n")

    f.write("Training images and labels:\n")
    f.writelines("%s\n" % img for img in train_copied)

    f.write("\nValidation images and labels:\n")
    f.writelines("%s\n" % img for img in val_copied)

    f.write("\nTesting images and labels:\n")
    f.writelines("%s\n" % img for img in test_copied)

print(f"Split completed! Summary saved to {summary_file}")


Split completed! Summary saved to /content/drive/MyDrive/YangBOT/Dataset/DatasetV2_67/split_summary2.txt
