<center><h1>Maltese Christian Statue (MCS) by Parish Dataset Augmentation</h1>
<h2>Matthias Bartolo</h2>

</center>

In [1]:
from time import time
import numpy as np
import os
from PIL import Image, ImageEnhance, ImageFilter
import random

# Define the original and augmented dataset paths
original_dataset_path = '../MCS Dataset by Parish/'
augmented_dataset_path = '../Augmented MCS Dataset by Parish1k/'

# Ensure the augmented dataset path exists
os.makedirs(augmented_dataset_path, exist_ok=True)

# Retrieve the categories
categories = os.listdir(original_dataset_path)

# Define the number of images required per category
num_images = 1_500

# Function to apply photometric augmentations using PIL
def photometric_augment(image):
    augmented_images = []

    # Change saturation
    enhancer = ImageEnhance.Color(image)
    for saturation_factor in [0.5, 1.5]:
        augmented_images.append(enhancer.enhance(saturation_factor))

    # Change brightness
    enhancer = ImageEnhance.Brightness(image)
    for brightness_factor in [-50, 50]:
        bright = ImageEnhance.Brightness(image).enhance(1 + brightness_factor / 255.0)
        augmented_images.append(bright)

    # Change contrast
    enhancer = ImageEnhance.Contrast(image)
    for contrast_factor in [0.5, 1.5]:
        augmented_images.append(enhancer.enhance(contrast_factor))

    # Convert to grayscale and back to RGB
    gray_image = image.convert("L")
    augmented_images.append(gray_image.convert("RGB"))

    # Random channel swapping (PIL does not provide direct channel access like OpenCV, so we split channels manually)
    r, g, b = image.split()
    channels = [r, g, b]
    random.shuffle(channels)
    augmented_images.append(Image.merge("RGB", channels))

    return augmented_images

# Function to apply geometric augmentations using PIL
def geometric_augment(image):
    augmented_images = []
    w, h = image.size

    # Horizontal and vertical flip
    augmented_images.append(image.transpose(Image.FLIP_LEFT_RIGHT))  # Horizontal
    augmented_images.append(image.transpose(Image.FLIP_TOP_BOTTOM))  # Vertical

    # Random cropping
    for _ in range(2):
        x_start = random.randint(0, w // 4)
        y_start = random.randint(0, h // 4)
        x_end = random.randint(3 * w // 4, w)
        y_end = random.randint(3 * h // 4, h)
        cropped = image.crop((x_start, y_start, x_end, y_end))
        cropped_resized = cropped.resize((w, h))
        augmented_images.append(cropped_resized)

    # Rotate images
    for angle in [15, -15, 30, -30]:
        rotated = image.rotate(angle)
        augmented_images.append(rotated)

    # Stretching and zooming
    for scale_factor in [1.2, 0.8]:
        resized = image.resize((int(w * scale_factor), int(h * scale_factor)))
        center_crop = resized.crop((0, 0, w, h))
        augmented_images.append(center_crop)

    # Gaussian blur and sharpening
    blurred = image.filter(ImageFilter.GaussianBlur(radius=2))
    augmented_images.append(blurred)
    sharpened = image.filter(ImageFilter.UnsharpMask(radius=2, percent=150, threshold=3))
    augmented_images.append(sharpened)

    # Perspective transformation: PIL doesn't directly support perspective transforms like OpenCV, so we omit this

    return augmented_images

# Function to mix images using PIL
def mix_images(image1, image2):
    # Resize image2 to match image1 size if needed
    if image1.size != image2.size:
        image2 = image2.resize(image1.size)
    alpha = random.uniform(0.3, 0.7)
    return Image.blend(image1, image2, alpha)

# Process each category
valid_extensions = ('.jpg', '.jpeg', '.png')
for category in categories:
    # Create category folder in the augmented dataset
    category_path = os.path.join(original_dataset_path, category)
    augmented_category_path = os.path.join(augmented_dataset_path, category)
    os.makedirs(augmented_category_path, exist_ok=True)

    # List images in the category
    images = os.listdir(category_path)
    num_existing_images = len(images)

    # Copy original images to the augmented dataset
    for image_name in images:
        if not image_name.lower().endswith(valid_extensions):
            continue
        src_path = os.path.join(category_path, image_name)
        image = Image.open(src_path)
        dst_path = os.path.join(augmented_category_path, image_name)
        image.save(dst_path)

    # Check if more images are needed
    if num_existing_images >= num_images:
        continue

    # Augment images to reach the required count
    images_to_generate = num_images - num_existing_images
    index = 0
    while images_to_generate > 0:
        # Cycle through existing images and augment them
        image_name = images[index % num_existing_images]
        image_path = os.path.join(category_path, image_name)
        image = Image.open(image_path)

        # Apply photometric and geometric augmentations
        augmented_images = photometric_augment(image) + geometric_augment(image)

        # Blend and mix images if possible
        if len(images) > 1:
            mix_index = (index + 1) % num_existing_images
            mix_image_path = os.path.join(category_path, images[mix_index])
            mix_image = Image.open(mix_image_path)
            augmented_images.append(mix_images(image, mix_image))

        for aug_image in augmented_images:
            if images_to_generate <= 0:
                break
            aug_image_name = f"{image_name.split('.')[0]}_aug_{int(time() * 1000)}.jpg"
            aug_image.save(os.path.join(augmented_category_path, aug_image_name))
            images_to_generate -= 1

        index += 1

print("Image augmentation completed. Augmented dataset is ready!")


Image augmentation completed. Augmented dataset is ready!
