In [5]:
import os
import pandas as pd
import shutil

def separate_copra_images(base_path):
    """
    Separate copra images into class-specific folders from train, valid, test subfolders.
    
    Parameters:
    -----------
    base_path : str
        Base directory containing train, valid, test folders
    """
    # Subfolders to process
    subfolders = ['train', 'valid', 'test']
    
    # Base output directory for class-specific folders
    output_base = os.path.join(base_path, 'sorted_classes')
    
    # Create output directories for each class
    classes = ['undercooked', 'overcooked', 'perfectly-cooked']
    for cls in classes:
        os.makedirs(os.path.join(output_base, cls), exist_ok=True)
    
    # Track processed images
    processed_images = {cls: [] for cls in classes}
    unprocessed_images = []
    
    # Process each subfolder
    for subfolder in subfolders:
        # Full path to subfolder
        subfolder_path = os.path.join(base_path, subfolder)
        
        # Path to CSV in this subfolder
        csv_path = os.path.join(subfolder_path, '_annotations.csv')
        
        # Check if CSV exists
        if not os.path.exists(csv_path):
            print(f"No CSV found in {subfolder} folder. Skipping.")
            continue
        
        # Read the CSV file
        df = pd.read_csv(csv_path)
        
        # Iterate through the CSV and move images
        for _, row in df.iterrows():
            filename = row['filename']
            image_class = row['class']
            
            # Full paths
            source_image_path = os.path.join(subfolder_path, filename)
            dest_image_path = os.path.join(output_base, image_class, filename)
            
            try:
                # Check if source image exists
                if os.path.exists(source_image_path):
                    # Copy image to class-specific folder
                    shutil.copy2(source_image_path, dest_image_path)
                    processed_images[image_class].append(filename)
                else:
                    unprocessed_images.append(filename)
            
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    
    # Print summary
    print("\nImage Separation Summary:")
    for cls in classes:
        print(f"{cls.capitalize()} images: {len(processed_images[cls])}")
    
    print(f"\nTotal processed images: {sum(len(imgs) for imgs in processed_images.values())}")
    print(f"Unprocessed images: {len(unprocessed_images)}")
    
    # Optional: Log unprocessed images
    if unprocessed_images:
        with open(os.path.join(output_base, 'unprocessed_images.txt'), 'w') as f:
            f.write('\n'.join(unprocessed_images))
        print("\nList of unprocessed images saved to unprocessed_images.txt")

# Set the base path
base_path = r'C:\Users\ASUS VIVOBOOK\Downloads\updated dataset unaugmented.v2-new-dataset.tensorflow'

# Run the separation function
separate_copra_images(base_path)


Image Separation Summary:
Undercooked images: 527
Overcooked images: 513
Perfectly-cooked images: 520

Total processed images: 1560
Unprocessed images: 0
