# Importing Libraries

In [1]:
import os
import cv2
from collections import defaultdict

# Resizing

In [8]:
# Define datasets and resize dimensions
DATASETS = ["1_UCI_Dataset", "2_Rice_Leaf_Disease_Images", "3_Rice_Disease_Image_Dataset"]
TARGET_SIZE = (224, 224)  # Resize to 224x224

In [9]:
# Function to check image sizes, corrupt images, and class distribution
def check_images(directory):
    image_sizes = defaultdict(int)
    invalid_images = []
    class_distribution = defaultdict(int)

    for class_folder in os.listdir(directory):
        class_path = os.path.join(directory, class_folder)
        
        if not os.path.isdir(class_path):
            continue  # Skip non-directory files
        
        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)
            
            # Read image using OpenCV
            img = cv2.imread(img_path)
            
            if img is None:
                invalid_images.append(img_path)
                continue
            
            height, width, _ = img.shape
            image_sizes[(width, height)] += 1
            class_distribution[class_folder] += 1

    return image_sizes, invalid_images, class_distribution

# Function to resize and save images in place
def resize_and_save_images(source_dir, target_size=(224, 224)):
    for class_folder in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_folder)

        if not os.path.isdir(class_path):
            continue  # Skip non-directory files

        for img_file in os.listdir(class_path):
            img_path = os.path.join(class_path, img_file)

            # Read image
            img = cv2.imread(img_path)
            if img is None:
                print(f"❌ Skipping corrupt image: {img_path}")
                continue

            # Resize image
            resized_img = cv2.resize(img, target_size)

            # Overwrite original image with resized version
            cv2.imwrite(img_path, resized_img)

In [10]:
# Process each dataset
for dataset in DATASETS:
    print(f"\n📂 Processing Dataset: {dataset}")

    # Check before resizing
    print("🔍 Checking images before resizing...")
    sizes_before, invalids_before, classes_before = check_images(dataset)
    print(f"Unique Image Sizes: {sizes_before}")
    print(f"Corrupt Images: {len(invalids_before)}")
    print(f"Class Distribution: {classes_before}")

    # Resize images
    print("✂️ Resizing images...")
    resize_and_save_images(dataset, TARGET_SIZE)
    print(f"✅ Resizing completed for {dataset}!")

    # Check after resizing
    print("🔍 Checking images after resizing...")
    sizes_after, invalids_after, classes_after = check_images(dataset)
    print(f"Unique Image Sizes: {sizes_after}")
    print(f"Corrupt Images: {len(invalids_after)}")
    print(f"Class Distribution: {classes_after}")

print("\n🎉 All datasets have been resized successfully!")


📂 Processing Dataset: 1_UCI_Dataset
🔍 Checking images before resizing...
Unique Image Sizes: defaultdict(<class 'int'>, {(224, 224): 120})
Corrupt Images: 0
Class Distribution: defaultdict(<class 'int'>, {'Bacterial leaf blight': 40, 'Brown spot': 40, 'Leaf smut': 40})
✂️ Resizing images...
✅ Resizing completed for 1_UCI_Dataset!
🔍 Checking images after resizing...
Unique Image Sizes: defaultdict(<class 'int'>, {(224, 224): 120})
Corrupt Images: 0
Class Distribution: defaultdict(<class 'int'>, {'Bacterial leaf blight': 40, 'Brown spot': 40, 'Leaf smut': 40})

📂 Processing Dataset: 2_Rice_Leaf_Disease_Images
🔍 Checking images before resizing...
Unique Image Sizes: defaultdict(<class 'int'>, {(300, 300): 4624, (215, 287): 6, (216, 289): 4, (230, 306): 2, (214, 284): 2, (216, 288): 9, (210, 280): 3, (219, 292): 9, (220, 294): 4, (217, 289): 9, (223, 296): 3, (212, 283): 2, (215, 286): 2, (213, 283): 2, (296, 222): 6, (295, 221): 12, (300, 224): 3, (296, 221): 3, (299, 225): 6, (300, 225)