In [2]:
import tensorflow
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import torch
import torchvision.transforms as transforms
# from torchvision.transforms import ColorJitter, RandomGrayscale, Lambda, GaussianBlur, RandomPosterize
from PIL import Image
import random
import os

# Random Erasing Augmentation

In [3]:
# Define the paths
image_dir = "Dataset/Training Images"  # Corrected path
output_dir = "Dataset/Phase 2 data aug/output_random_erased"  # Corrected path

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Transformation pipeline
transform = transforms.Compose([
    transforms.Resize((144,144)),
    transforms.ToTensor(),
    transforms.RandomErasing(p=1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False),
    transforms.ToPILImage()
])

counter = 1
# Augment images and save to output directory
for f in os.listdir(image_dir):
    if f.lower().endswith(('jpg', 'png', 'jpeg')):
        img_path = os.path.join(image_dir, f)
        try:
            img = Image.open(img_path)
            # Convert RGBA to RGB for saving as JPEG (PS: Don't delete this or we will lose datasets that are png)
            if img.mode == 'RGBA':
                img = img.convert('RGB')
            
            # Extract the part before the first underscore to use as a subfolder name
            subfolder_name = f.split('_')[0]
            subfolder_path = os.path.join(output_dir)

            # Create subfolder if it doesn't exist
            # os.makedirs(subfolder_path, exist_ok=True)
            
            # Save four different variations of images in the subfolder
            for i in range(5):
                random_erased_augmented_image = transform(img)
                output_path = os.path.join(subfolder_path, f"{f.split('_')[0]}_p2_{counter}.jpeg")
                random_erased_augmented_image.save(output_path, 'JPEG')  # Save as JPEG
                counter += 1
                
        except Exception as e:
            print(f"Error processing file {f}: {e}")

# Mosaic Augmentation

In [4]:
def mosaic_augmentation_with_test(background_picture_for_mosaic, main_data_picture, output_path, grid_size=2, image_size=(144, 144), repetitions=5):
    # Get all dataset and test dataset image paths
    dataset_images = []
    for img in os.listdir(background_picture_for_mosaic):
        if img.endswith(('jpg', 'png', 'jpeg')):
            dataset_images.append(os.path.join(background_picture_for_mosaic, img))

    test_images = []
    for img in os.listdir(main_data_picture):
        if img.endswith(('jpg', 'png', 'jpeg')):
            test_images.append(os.path.join(main_data_picture, img))
    
    random.shuffle(dataset_images)
    
    test_index = 0
    
    # Mosaic creation loop
    while test_index < len(test_images):
        # Select the next test image (sequentially)
        test_image = test_images[test_index]
        test_index += 1
        
        # Load and resize the test image based on given image_size
        test_image_resized = Image.open(test_image).resize(image_size)

        # Convert to RGB
        if test_image_resized.mode in ('RGBA', 'P'):
            test_image_resized = test_image_resized.convert('RGB')

        # Extract the name before the first "_" in the test image path
        test_image_name = os.path.basename(test_image)
        base_name = test_image_name.split("_")[0]
        
        # Creating subfolder for the base name (before the first "_")
        # subfolder_path = os.path.join(output_path, base_name)
        # os.makedirs(subfolder_path, exist_ok=True)
        
        for rep in range(repetitions):  # Loop for three repetitions
            # Select 3 random images from the dataset
            selected_dataset_images = random.sample(dataset_images, 3)
            
            # Load and resize images based on given image_size
            images = []
            for img in selected_dataset_images:
                resized_image = Image.open(img).resize(image_size)
                # Convert to RGB (JPEG format doesn't support transparency)
                if resized_image.mode in ('RGBA', 'P'):
                    resized_image = resized_image.convert('RGB')
                images.append(resized_image)
            
            images.append(test_image_resized)  # Add the test image to the list
            random.shuffle(images)  # Shuffle images to ensure random placement in the grid
            
            # Create a blank canvas for the mosaic
            mosaic_size = (grid_size * image_size[0], grid_size * image_size[1])
            mosaic = Image.new('RGB', mosaic_size, (255, 255, 255))  # Background color set to gray
            
            # Pasting shuffled images into the grid in a row-by-row manner
            img_index = 0
            for i in range(grid_size):  # For each row (we have 2 rows)
                for j in range(grid_size):  # For each column in the row
                    if img_index < len(images):
                        # Placing the image at the calculated position
                        mosaic.paste(images[img_index], (j * image_size[0], i * image_size[1]))
                        img_index += 1
            
            # Save the mosaic as JPEG in the subfolder with the base name from the test image and repetition count
            output_file = os.path.join(output_path, f'{base_name}_{test_index}_{rep + 1}.jpeg')  # Saving as JPEG
            mosaic.save(output_file, 'JPEG')

background_picture_for_mosaic = "Dataset/Mosaic_bg_image"        # Path to our dataset of bacground images
main_data_picture = "Dataset/Phase 2 Data aug/output_random_erased"   # Path to our test dataset of images
output_path = "Dataset/Phase 2 Data aug/output_mosaic"          # Path to save the mosaic images
os.makedirs(output_path, exist_ok=True)

mosaic_augmentation_with_test(background_picture_for_mosaic, main_data_picture, output_path)

# Data citation: https://www.kaggle.com/datasets/pankajkumar2002/random-image-sample-dataset

# Geometric Augmentation

In [10]:
# Image data generator for augmentation
generate_data = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Input and output directories
image_dir = "Dataset/Phase 2 Data aug/output_mosaic"  # Corrected path
output_dir = "Dataset/Phase 2 Data aug/output_geometric"  # Corrected path

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Loop through images in the input directory
count = 1
for f in os.listdir(image_dir):
    if f.lower().endswith(('jpg', 'png', 'jpeg')):
        img_path = os.path.join(image_dir, f)  # Combine directory and filename
        try:
            # Load the image and preprocess
            img = load_img(img_path)  # Load the image
            x = img_to_array(img)     # Convert image to numpy array
            x = x.reshape((1,) + x.shape)  # Add batch dimension

            # Extract the part before the first underscore to use as a subfolder name
            subfolder_name = f.split('_')[0]
            subfolder_path = os.path.join(output_dir, subfolder_name)

            # Create subfolder if it doesn't exist
            # os.makedirs(subfolder_path, exist_ok=True)
            i = 0
            for batch in generate_data.flow(x, batch_size=1, save_to_dir=None, save_prefix= f"{subfolder_name}_geometric_p2_{count}", save_format='jpeg'):
                output_filename = f"{subfolder_name}_geometric_p2_{count}.jpeg"
                output_path = os.path.join(output_dir, output_filename)
                augmented_image = batch[0].astype('uint8')  # Convert back to uint8 format
                img_to_save = Image.fromarray(augmented_image)
                img_to_save.save(output_path, 'JPEG')
                count += 1
                i += 1
                if i > 4:  # Generate up to 4 augmented images per input image
                    break
        except Exception as e:
            print(f"Error processing file {f}: {e}")
# Code Cite: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html