# CSE 455 WEAPON CLASSIFICATION - DATA AUGMENTATION

# Necessary Libraries

In [None]:
import tensorflow
import numpy as np
# import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import cv2
import torch
import torchvision.transforms as transforms
from torchvision.transforms import ColorJitter, RandomGrayscale, Lambda, GaussianBlur, RandomPosterize
from PIL import Image
import random
import os

# Data Augmentation (Transformations)

In [None]:
# Please change the location to the dataset we are using from the "Weapon-Classification/Dataset/images"
# Image data generator for augmentation
generate_data = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Input and output directories
image_dir = "Dataset/images"  # Corrected path
output_dir = "Dataset/transformed augmented data"  # Corrected path

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Loop through images in the input directory
for f in os.listdir(image_dir):
    if f.lower().endswith(('jpg', 'png', 'jpeg')):
        img_path = os.path.join(image_dir, f)  # Combine directory and filename
        try:
            # Load the image and preprocess
            img = load_img(img_path)  # Load the image
            x = img_to_array(img)     # Convert image to numpy array
            x = x.reshape((1,) + x.shape)  # Add batch dimension

            # Extract the part before the first underscore to use as a subfolder name
            subfolder_name = f.split('_')[0]
            subfolder_path = os.path.join(output_dir, subfolder_name)

            # Create subfolder if it doesn't exist
            os.makedirs(subfolder_path, exist_ok=True)

            i = 0
            for batch in generate_data.flow(x, batch_size=1, save_to_dir=subfolder_path, save_prefix= subfolder_name, save_format='jpeg'):
                i += 1
                if i > 4:  # Generate up to 4 augmented images per input image
                    break
        except Exception as e:
            print(f"Error processing file {f}: {e}")
# Cite: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

# Random Erasing based Augmentation

In [None]:
# Please change the location to the dataset we are using from the "Weapon-Classification/Dataset/images"

# Define the paths
image_dir = "Dataset/images"  # Corrected path
output_dir = "Dataset/random erased augmented data"  # Corrected path

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Transformation pipeline
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomErasing(p=1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False),
    transforms.ToPILImage()
])

# Augment images and save to output directory
for f in os.listdir(image_dir):
    if f.lower().endswith(('jpg', 'png', 'jpeg')):
        img_path = os.path.join(image_dir, f)
        try:
            img = Image.open(img_path)
            # Convert RGBA to RGB for saving as JPEG (PS: Don't delete this or we will lose datasets that are png)
            if img.mode == 'RGBA':
                img = img.convert('RGB')
            
            # Extract the part before the first underscore to use as a subfolder name
            subfolder_name = f.split('_')[0]
            subfolder_path = os.path.join(output_dir, subfolder_name)

            # Create subfolder if it doesn't exist
            os.makedirs(subfolder_path, exist_ok=True)
            
            # Save four different variations of images in the subfolder
            for i in range(4):
                random_erased_augmented_image = transform(img)
                output_path = os.path.join(subfolder_path, f"{os.path.splitext(f)[0]}_{i+1}.jpeg")
                random_erased_augmented_image.save(output_path, 'JPEG')  # Save as JPEG
                
        except Exception as e:
            print(f"Error processing file {f}: {e}")


# Color Transformation based Augmentation

In [None]:
# Please change the location to the dataset we are using from the "Weapon-Classification/Dataset/images"

# For Classification Tasks (Pre-trained Models)
# transform = transforms.Compose([
# transforms.RandomHorizontalFlip(),
# transforms.PILToTensor(),
# transforms.ConvertImageDtype(torch.float),
# transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
# transforms.RandomErasing(),
# ])

"""
PS: The pipeline above is for flipping and also remvoing parts of images from the augmented images but for now its just normalized color transformed image.
Below are more ways to perform color transformation.
"""
# Color Jittering: It randomly changes the brightness, contrast, saturation, or hue of the image to simulate varying lighting conditions and camera settings.

# transform = transforms.Compose([
#     transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),  # Randomly adjust color properties
#     transforms.ToTensor(),
# ])

# Random Grayscale Conversion: It converts the image to grayscale with a given probability which makes the model more robust to images with reduced or missing color information.

# transform = transforms.Compose([
#     transforms.RandomGrayscale(p=0.2),  # 20% chance to convert to grayscale
#     transforms.ToTensor(),
# ])

# Gamma Correction: It adjusts the gamma of an image to make it appear brighter or darker to simulate overexposed or underexposed images.

# transform = transforms.Compose([
#     Lambda(lambda img: img.point(lambda x: x ** 0.8)),  # Apply gamma correction
#     transforms.ToTensor(),
# ])

# Hue Rotation: It rotates the hue channel of the image which simulates images taken under different light sources (e.g., daylight vs. fluorescent lighting).

# transform = transforms.Compose([
#     transforms.ColorJitter(hue=0.3),  # Rotate hue randomly
#     transforms.ToTensor(),
# ])

# Gaussian Blur: It applies a Gaussian blur filter to the image which simulates out-of-focus images or motion blur.

# transform = transforms.Compose([
#     transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 2.0)),  # Randomly blur the image
#     transforms.ToTensor(),
# ])

#Posterization: It reduces the number of bits used for each color channel, creating a "posterized" effect that simulates compression artifacts or low-quality images.

# transform = transforms.Compose([
#     transforms.RandomPosterize(bits=4, p=0.5),  # Reduce to 4 bits with a 50% chance
#     transforms.ToTensor(),
# ])

# image_dir = "Dataset/small augment data"
# output_dir = "Dataset/augmented data"
# os.makedirs(output_dir, exist_ok=True)

# # transformation pipeline
# transform = transforms.Compose([
#     transforms.PILToTensor(),               # Convert image to tensor
#     transforms.ConvertImageDtype(torch.float),  # Scale to [0, 1]
#     transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalize
# ])

# # Augment images and save to output directory
# for f in os.listdir(image_dir):
#     if f.lower().endswith(('jpg', 'png', 'jpeg')):
#         img_path = os.path.join(image_dir, f)
#         try:
#             img = Image.open(img_path)
#             # Convert RGBA to RGB for saving as JPEG
#             if img.mode == 'RGBA':
#                 img = img.convert('RGB')
#             augmented_img_tensor = transform(img)
#             augmented_img = transforms.ToPILImage()(augmented_img_tensor)
#             # Save the augmented image
#             output_path = os.path.join(output_dir, f"{os.path.splitext(f)[0]}_aug_{i+1}.jpeg")
#             augmented_img.save(output_path)
#         except Exception as e:
#             print(f"Error processing file {f}: {e}")


# Mosaic Augmentation

In [None]:
def mosaic_augmentation_with_test(background_picture_for_mosaic, main_data_picture, output_path, grid_size=2, image_size=(150, 150), repetitions=3):
    # Get all dataset and test dataset image paths
    dataset_images = []
    for img in os.listdir(background_picture_for_mosaic):
        if img.endswith(('jpg', 'png', 'jpeg')):
            dataset_images.append(os.path.join(background_picture_for_mosaic, img))

    test_images = []
    for img in os.listdir(main_data_picture):
        if img.endswith(('jpg', 'png', 'jpeg')):
            test_images.append(os.path.join(main_data_picture, img))
    
    random.shuffle(dataset_images)
    
    test_index = 0
    
    # Mosaic creation loop
    while test_index < len(test_images):
        # Select the next test image (sequentially)
        test_image = test_images[test_index]
        test_index += 1
        
        # Load and resize the test image based on given image_size
        test_image_resized = Image.open(test_image).resize(image_size)

        # Convert to RGB (JPEG format doesn't support transparency)
        if test_image_resized.mode in ('RGBA', 'P'):
            test_image_resized = test_image_resized.convert('RGB')

        # Extract the name before the first "_" in the test image path
        test_image_name = os.path.basename(test_image)
        base_name = test_image_name.split("_")[0]
        
        # Create subfolder for the base name (before the first "_")
        subfolder_path = os.path.join(output_path, base_name)
        os.makedirs(subfolder_path, exist_ok=True)
        
        for rep in range(repetitions):  # Loop for three repetitions
            # Select 3 random images from the dataset
            selected_dataset_images = random.sample(dataset_images, 3)
            
            # Load and resize images based on given image_size
            images = []
            for img in selected_dataset_images:
                resized_image = Image.open(img).resize(image_size)
                # Convert to RGB (JPEG format doesn't support transparency)
                if resized_image.mode in ('RGBA', 'P'):
                    resized_image = resized_image.convert('RGB')
                images.append(resized_image)
            
            images.append(test_image_resized)  # Add the test image to the list
            random.shuffle(images)  # Shuffle images to ensure random placement in the grid
            
            # Create a blank canvas for the mosaic
            mosaic_size = (grid_size * image_size[0], grid_size * image_size[1])
            mosaic = Image.new('RGB', mosaic_size, (155, 155, 155))  # Background color set to gray
            
            # Paste shuffled images into the grid in a row-by-row manner
            img_index = 0
            for i in range(grid_size):  # For each row (we have 2 rows)
                for j in range(grid_size):  # For each column in the row
                    if img_index < len(images):
                        # Place the image at the calculated position
                        mosaic.paste(images[img_index], (j * image_size[0], i * image_size[1]))
                        img_index += 1
            
            # Save the mosaic as JPEG in the subfolder with the base name from the test image and repetition count
            output_file = os.path.join(subfolder_path, f'{base_name}_{test_index}_{rep + 1}.jpeg')  # Saving as JPEG
            mosaic.save(output_file, 'JPEG')

# Example usage
background_picture_for_mosaic = "Dataset/data"        # Path to your dataset of images
main_data_picture = "Dataset/Images"   # Path to your test dataset of images
output_path = "Dataset/output mosaic augmentation"          # Path to save the mosaic images
os.makedirs(output_path, exist_ok=True)

mosaic_augmentation_with_test(background_picture_for_mosaic, main_data_picture, output_path)

# citation; https://www.kaggle.com/datasets/pankajkumar2002/random-image-sample-dataset