In [None]:
!python -m pip install imgaug

In [1]:
import os
import random
import numpy as np
from PIL import Image
import imgaug.augmenters as iaa

# Path to your dataset folder
dataset_path = "taiwan/data augmentation/Train"
target_count = 1004  # Target number of images per folder

# Define minor augmentation techniques
augmenters = iaa.Sequential([
    iaa.Fliplr(0.5),  # Horizontal flip with 50% probability
    iaa.Flipud(0.1),  # Vertical flip with 10% probability
    iaa.Affine(scale=(0.95, 1.05), rotate=(-10, 10), shear=(-5, 5)),  # Slight zoom, rotation, and shear
    iaa.Crop(percent=(0, 0.05)),  # Minor random cropping
])

# Function to save augmented images
def save_augmented_images(folder_path, images, augmenter, target):
    current_count = len(images)
    original_image = Image.open(images[0])
    original_size = original_image.size  # Preserve original dimensions

    while current_count < target:
        # Randomly choose an image
        img_path = random.choice(images)
        img = Image.open(img_path)
        img_array = np.array(img, dtype=np.uint8)
        
        # Apply augmentation
        augmented_image_array = augmenter(image=img_array)
        augmented_image = Image.fromarray(augmented_image_array).resize(original_size, Image.Resampling.LANCZOS)
        
        # Save the augmented image with high quality
        new_filename = os.path.join(folder_path, f"aug_{current_count}.jpg")
        augmented_image.save(new_filename, quality=100)  # Save with perfect quality
        
        current_count += 1

# Detect folders in the dataset directory
folders = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]

# Augment images for each detected folder
for folder in folders:
    folder_path = os.path.join(dataset_path, folder)
    images = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith(('.png', '.jpg', '.jpeg'))]
    
    # Perform augmentation if needed
    if len(images) < target_count:
        print(f"Augmenting {folder} from {len(images)} to {target_count} images.")
        save_augmented_images(folder_path, images, augmenters, target_count)
    else:
        print(f"{folder} already has {len(images)} images or more.")

print("Data augmentation completed!")

Augmenting Bacterial spot from 624 to 1004 images.
Augmenting Black mold from 398 to 1004 images.
Augmenting Gray spot from 507 to 1004 images.
Augmenting health from 678 to 1004 images.
Augmenting Late blight from 550 to 1004 images.
Augmenting powdery mildew from 963 to 1004 images.
Data augmentation completed!
