In [1]:
import os
import random
import numpy as np
from PIL import Image
import imgaug.augmenters as iaa

#Uncomment lower line if you using numpy latest version or it will give error
np.bool = np.bool_

def save_augmented_images(folder_path, images, augmenter, target):
    current_count = len(images)
    image_index = 0  # To ensure unique filenames

    while current_count < target:
        try:
            # Randomly choose an image
            img_path = random.choice(images)
            img = Image.open(img_path).convert("RGB")  # Ensure RGB mode
            img_array = np.array(img)

            # Apply augmentation
            augmented_image = augmenter(image=img_array)
            augmented_image = Image.fromarray(augmented_image)

            # Generate a unique filename
            new_filename = os.path.join(folder_path, f"aug_{image_index}.jpg")
            while os.path.exists(new_filename):  # Ensure uniqueness
                image_index += 1
                new_filename = os.path.join(folder_path, f"aug_{image_index}.jpg")

            # Save the augmented image
            augmented_image.save(new_filename)

            current_count += 1
            image_index += 1  # Update index for next iteration
            
        except Exception as e:
            print(f"Error processing image {img_path}: {e}")

def augment_data(path, size):
    dataset_path = path
    target_count = size  

    augmenter = iaa.Sequential([
        iaa.SomeOf((1, 4), [  
            iaa.Sharpen(alpha=(0, 0.3), lightness=(0.8, 1.2)),  # Less aggressive sharpening
            iaa.Crop(percent=(0, 0.1)),  # Reduce cropping range
            iaa.Sometimes(0.3, iaa.Affine(rotate=(-5, 5), mode="edge")),  # Small rotation, no black borders
            iaa.Sometimes(0.3, iaa.MotionBlur(k=3)),  # Motion blur 
            iaa.Sometimes(0.3, iaa.ElasticTransformation(alpha=1, sigma=0.2)),  # Mimic natural cell deformations
            iaa.Sometimes(0.3, iaa.MultiplyBrightness((0.8, 1.2))),  # Vary brightness slightly
            iaa.Fliplr(0.5),  # 50% chance of horizontal flipping
        ])
    ], random_order=True)  # Apply in random order

    # Detect folders in the dataset directory
    folders = [d for d in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, d))]

    # Augment images for each detected folder
    for folder in folders:
        folder_path = os.path.join(dataset_path, folder)
        images = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith(('.png', '.jpg', '.jpeg'))]

        # Perform augmentation if needed
        if len(images) < target_count:
            print(f"Augmenting {folder} from {len(images)} to {target_count} images.")
            save_augmented_images(folder_path, images, augmenter, target_count)
        else:
            print(f"{folder} already has {len(images)} images or more.")

    print("Data augmentation completed!")


In [2]:
augment_data("./normalized_output of Blood Cancer vahadane", 3350)

Augmenting basophil from 1218 to 3350 images.
Augmenting eosinophil from 3117 to 3350 images.
Augmenting erythroblast from 1551 to 3350 images.
Augmenting ig from 2895 to 3350 images.
Augmenting lymphocyte from 1214 to 3350 images.
Augmenting monocyte from 1420 to 3350 images.
Augmenting neutrophil from 3329 to 3350 images.
Augmenting platelet from 2348 to 3350 images.
Data augmentation completed!
