In [1]:
import os
import shutil
import random

# Set these paths as needed
src_dir = r"./101_ObjectCategories/101_ObjectCategories"   # Folder with original class folders
dst_dir = "."                         # Destination folder for train/valid/test

# Split definition: 50% train, 25% valid, 25% test
splits = ['train', 'valid', 'test']
split_ratio = [0.5, 0.25, 0.25]

# Make sure random is reproducible
random.seed(42)

for class_name in os.listdir(src_dir):
    class_path = os.path.join(src_dir, class_name)
    if not os.path.isdir(class_path):
        continue
    images = [img for img in os.listdir(class_path) if img.lower().endswith(('.jpg', '.jpeg', '.png'))]
    if not images:
        continue  # skip empty folders or annotation dirs

    random.shuffle(images)
    n_total = len(images)
    n_train = int(split_ratio[0] * n_total)
    n_valid = int(split_ratio[1] * n_total)
    n_test = n_total - n_train - n_valid  # Ensure all images used

    split_idx = {
        'train': (0, n_train),
        'valid': (n_train, n_train + n_valid),
        'test': (n_train + n_valid, n_total)
    }

    for split in splits:
        split_dir = os.path.join(dst_dir, split, class_name)
        os.makedirs(split_dir, exist_ok=True)
        start, end = split_idx[split]
        for img_name in images[start:end]:
            src_img = os.path.join(class_path, img_name)
            dst_img = os.path.join(split_dir, img_name)
            shutil.copy2(src_img, dst_img)

    print(f'Split done for class: {class_name}')

print('Dataset splitting complete! You can now use the folders for PyTorch training.')


Split done for class: accordion
Split done for class: airplanes
Split done for class: anchor
Split done for class: ant
Split done for class: BACKGROUND_Google
Split done for class: barrel
Split done for class: bass
Split done for class: beaver
Split done for class: binocular
Split done for class: bonsai
Split done for class: brain
Split done for class: brontosaurus
Split done for class: buddha
Split done for class: butterfly
Split done for class: camera
Split done for class: cannon
Split done for class: car_side
Split done for class: ceiling_fan
Split done for class: cellphone
Split done for class: chair
Split done for class: chandelier
Split done for class: cougar_body
Split done for class: cougar_face
Split done for class: crab
Split done for class: crayfish
Split done for class: crocodile
Split done for class: crocodile_head
Split done for class: cup
Split done for class: dalmatian
Split done for class: dollar_bill
Split done for class: dolphin
Split done for class: dragonfly
Split 

In [2]:
import os
print(os.listdir('.'))


['.qodo', '101_ObjectCategories', 'Annotations', 'creditcard.csv', 'dl_1_libraries.ipynb', 'dl_2_ffnn.ipynb', 'dl_3_mnist.ipynb', 'dl_4_autoencoder.ipynb', 'dl_5_cbow.ipynb', 'dl_6_object detection.ipynb', 'model_digit.json', 'model_digit.weights.h5', 'split_caltech101.ipynb', 'test', 'train', 'valid', 'vectors.txt', 'vectors_simple.txt']


In [3]:
import os
print("Current directory files:")
print(os.listdir('.'))
print("Classes in src_dir:")
print(os.listdir('./101_ObjectCategories'))


Current directory files:
['.qodo', '101_ObjectCategories', 'Annotations', 'creditcard.csv', 'dl_1_libraries.ipynb', 'dl_2_ffnn.ipynb', 'dl_3_mnist.ipynb', 'dl_4_autoencoder.ipynb', 'dl_5_cbow.ipynb', 'dl_6_object detection.ipynb', 'model_digit.json', 'model_digit.weights.h5', 'split_caltech101.ipynb', 'test', 'train', 'valid', 'vectors.txt', 'vectors_simple.txt']
Classes in src_dir:
['101_ObjectCategories']
