a way to automate data augmentation:

In [1]:
import albumentations as A
import cv2
import os
import random
from PIL import Image
import matplotlib.pyplot as plt

In [5]:
orig_data = './Dataset'
augmented_data = './AugmentedData/'

if os.path.exists(augmented_data):
    exit(0) # already did the augmentation

In [3]:
def visualize(image, figsize=(10, 10)):
    plt.figure(figsize=figsize)
    plt.axis('off')
    plt.imshow(image)

In [7]:
os.mkdir(augmented_data)

transform = A.Compose([
    A.CLAHE(),
    A.RandomRotate90(),
    A.Transpose(),
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.50, rotate_limit=45, p=0.75),
    A.RandomResizedCrop(scale=(0.8, 1.0), height=2000, width=1500),
    A.Blur(blur_limit=3),
    A.OpticalDistortion(),
    A.GridDistortion(),
    A.HueSaturationValue()
])

    
# List all image files in the input data folder
image_files = [img for img in os.listdir(orig_data) if img.endswith('.jpg')]

for image_file in image_files:
    image_path = os.path.join(orig_data, image_file)
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Generate multiple augmented images from each original image
    for i in range(5):  # Adjust the number of augmented images per original image
        augmented_image = transform(image=image)['image']

        # Generate a random number for the filename
        random_number = random.randint(1, 10000)
        while os.path.exists(os.path.join(augmented_data, f'augmented_{random_number}_{image_file}')):
            random_number = random.randint(1, 10000)

        # Save the augmented image to the output folder with random numbering
        output_image_filename = f'augmented_{random_number}_{image_file}'
        output_image_path = os.path.join(augmented_data, output_image_filename)
        cv2.imwrite(output_image_path, cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))

        # visualize(augmented_image, (image_width, image_height)) # To show pictures


