## Split test dataset
### From train and valid (10%)

In [4]:
import os
import shutil
import random

# Define dataset paths (relative to current working directory)
train_dir = "train"
valid_dir = "valid"
test_dir = "test"

# Ensure test directory exists
os.makedirs(test_dir, exist_ok=True)

# Function to split dataset
def split_data(source_dir, dest_dir, split_ratio=0.1):
    for subclass in os.listdir(source_dir):
        subclass_path = os.path.join(source_dir, subclass)
        if os.path.isdir(subclass_path):  # Ensure it's a directory
            dest_subclass_path = os.path.join(dest_dir, subclass)
            os.makedirs(dest_subclass_path, exist_ok=True)
            
            files = os.listdir(subclass_path)
            random.shuffle(files)  # Shuffle files for randomness
            num_to_move = max(1, int(len(files) * split_ratio))  # Ensure at least one file

            for file in files[:num_to_move]:
                src_file = os.path.join(subclass_path, file)
                dest_file = os.path.join(dest_subclass_path, file)
                shutil.move(src_file, dest_file)

# Process both train and valid sets
split_data(train_dir, test_dir)
split_data(valid_dir, test_dir)

print("Test set created successfully!")

Test set created successfully!


### Rename files with their class names

In [None]:
import os

# Define directories
directories = ["test", "train","valid"]

# Function to rename files
def rename_files(directory):
    for subclass in os.listdir(directory):
        subclass_path = os.path.join(directory, subclass)
        
        if os.path.isdir(subclass_path):  # Ensure it's a directory
            files = sorted(os.listdir(subclass_path))  # Sort to maintain order
            for idx, file in enumerate(files, start=1):
                old_file_path = os.path.join(subclass_path, file)
                
                # Extract file extension (to keep original format)
                file_ext = os.path.splitext(file)[1]  
                new_file_name = f"{subclass}_{idx}{file_ext}"
                new_file_path = os.path.join(subclass_path, new_file_name)
                
                os.rename(old_file_path, new_file_path)

# Process both test and valid directories
for dir_name in directories:
    rename_files(dir_name)

print("Renaming completed successfully!")

### Rename all directories with dataset name

In [2]:
import os

# Define directories
directories = ["train", "valid", "test"]

# Function to rename subclass directories
def rename_subclass_dirs(directory):
    for subclass in os.listdir(directory):
        subclass_path = os.path.join(directory, subclass)
        
        if os.path.isdir(subclass_path) and not subclass.startswith("Tomato_"):  # Ensure it's a directory and avoid re-renaming
            new_subclass_name = f"Tomato__{subclass}"
            new_subclass_path = os.path.join(directory, new_subclass_name)
            os.rename(subclass_path, new_subclass_path)
            print(f"Renamed: {subclass} → {new_subclass_name}")

# Process train, valid, and test directories
for dir_name in directories:
    rename_subclass_dirs(dir_name)

print("All subclass directories renamed successfully!")

All subclass directories renamed successfully!
