# <u> Q3: Data Augmentation in mainstream machine learning frameworks <u>

## <u> Data augmentation in PyTorch <u>
    
### <u> In this Jupyter Notebook I will be demostrating how data augmentation is achieved in PyTorch <U>

### Installing PyTorch, torchvision and pillow:

In [1]:
#pip install torch torchvision
#pip install pillow

### Importing necessary libraries:

In [2]:
import torch
import torchvision.transforms as transforms
from PIL import Image
import os
import random
import logging
import matplotlib.pyplot as plt
import numpy as np

logging.basicConfig(level=logging.INFO)

### Setting the random seed for reproducibility:

Set the random seed for reproducibility (optional). This ensures that you can obtain the same augmented images every time you run the code. (Remove this part if you want more randomness)

In [3]:
# Set the random seed for reproducibility
random_seed = 42
torch.manual_seed(random_seed)
random.seed(random_seed)

### Defining the transformations for the desired augmentations:

In [4]:
# Create a sequence of image transformations for data augmentation
transform = transforms.Compose([
    # Randomly flip the image horizontally with a probability of 0.5
    transforms.RandomHorizontalFlip(p=0.5),
    # Randomly rotate the image within a range of Â±20 degrees
    transforms.RandomRotation(degrees=20),
    # Apply a random affine transformation with translation, scaling, and shearing
    transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.8, 1.2), shear=(-10, 10)),
    # Randomly adjust brightness, contrast, and saturation
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    # Randomly apply a perspective transformation
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
])

### Loading the COTS dataset images and setting the path to save augmented images:

In [5]:
# Set the path to the COTS dataset images
image_directory = 'COTS_Dataset'

# Set the path to save the augmented images
output_directory = 'Augmented_images'
# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

### Defining the augment_images function:

In [6]:
def augment_images(image_directory, output_directory, transform, num_augmentations=5):
    # Define the allowed file extensions for image files
    allowed_extensions = {'.jpg', '.jpeg', '.png', '.bmp'}
    
    # Iterate through all files in the input directory
    for filename in os.listdir(image_directory):
        # Check if the file has a valid image extension
        if os.path.splitext(filename)[1].lower() not in allowed_extensions:
            # Skip this file if it does not have a valid extension
            continue
        
        # Error handling for image processing
        try:
            # Open each image file and convert it to a PyTorch tensor
            img = Image.open(os.path.join(image_directory, filename)).convert('RGB')
            img = transforms.ToTensor()(img)

            # Apply the defined transform to each image num_augmentations times
            for i in range(num_augmentations):
                # Create an augmented version of the image using the specified transform
                augmented_img = transform(img)
                # Set the output filename for the augmented image
                output_filename = os.path.join(output_directory, f'aug_{i}_{filename}')
                # Save the augmented image in the output directory
                transforms.ToPILImage()(augmented_img).save(output_filename)
            
            # Log that the image has been processed successfully
            logging.info(f"Processed image {filename}")
        
        # Error handling for image processing
        except Exception as e:
            # Log the error and continue processing other images
            logging.error(f"Error processing image {filename}: {e}")

###  Running the augment_images function:

In [7]:
augment_images(image_directory, output_directory, transform, num_augmentations=5)

INFO:root:Processed image beer_mug_colour.jpeg
INFO:root:Processed image bowl_colour.jpeg
INFO:root:Processed image cardboard_v1_colour.jpeg
INFO:root:Processed image cordovado_colour.jpeg
INFO:root:Processed image digital_natives_book_colour.jpeg
INFO:root:Processed image elephant_colour.jpeg
INFO:root:Processed image headphones_colour.jpeg
INFO:root:Processed image lp_switz_colour.jpeg
INFO:root:Processed image macbook_colour.jpeg
INFO:root:Processed image shampoo_colour.jpeg


### Comparison

In [None]:
def compare_histograms(image_directory, augmented_directory, image_name, augmented_image_name):
    # Open the original image
    original_img = Image.open(os.path.join(image_directory, image_name))
    # Convert the original image to grayscale and resize
    original_img = original_img.convert('L').resize((256, 256))
    
    # Open the augmented image
    augmented_img = Image.open(os.path.join(augmented_directory, augmented_image_name))
    # Convert the augmented image to grayscale and resize
    augmented_img = augmented_img.convert('L').resize((256, 256))

    # Set the figure size
    plt.figure(figsize=(12, 6))

    # Display the original and augmented images
    plt.subplot(2, 2, 1)
    plt.imshow(original_img, cmap='gray')
    plt.title('Original Image')

    plt.subplot(2, 2, 2)
    plt.imshow(augmented_img, cmap='gray')
    plt.title('Augmented Image')

    # Display histograms for the original and augmented images
    plt.subplot(2, 2, 3)
    plt.hist(np.asarray(original_img).ravel(), bins=256, color='black')
    plt.title('Original Image Histogram')

    plt.subplot(2, 2, 4)
    plt.hist(np.asarray(augmented_img).ravel(), bins=256, color='black')
    plt.title('Augmented Image Histogram')

    # Add space between subplots
    plt.subplots_adjust(wspace=0.5, hspace=0.5)

    plt.show()

# Call the function
compare_histograms(image_directory, output_directory, 'beer_mug_colour.jpeg', 'aug_0_beer_mug_colour.jpeg')