# Augmenting training images

README: 
1. First run the "overlaying_images.ipynb"
2. Create folder "augmented" and copy images and labels from the parent folder to there
3. Then set the correct folders in the following cell
4. Then just run all of the cells in this notebook. Can take around 20 minutes.

In [10]:
# Note: Copy original images folder to this folder. The augmented images will then be added to this folder also.
INPUT_IMAGES_FOLDER = "/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images"

# Note: Should be the "augmented/labels" folder. Watch out that it isn't the labels folder of the orignal images.
DUPLICATE_LABELS_FOLDER_PATH = "/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/labels"

# Note: The final number of training images will then be num(original images) + 4*num(orginal images). Because original images are kept.
NUM_OF_AUGMENTED_IMAGES = 4

In [11]:
import os
def get_files_in_subdirectories(folder_path, file_extension='', file_contains=''):
    files = []
    for root, directories, filenames in os.walk(folder_path):
        for filename in filenames:
            if file_extension == '' and file_contains == '':
                files.append(os.path.join(root, filename))
            elif file_extension != '' and file_contains == '':
                if filename.endswith(file_extension):
                    files.append(os.path.join(root, filename))
            elif file_extension == '' and file_contains != '':
                if file_contains in filename:
                    files.append(os.path.join(root, filename))
            else:
                if file_contains in filename and filename.endswith(file_extension):
                    files.append(os.path.join(root, filename))
    return files

In [12]:
import cv2
import numpy as np
import random
import matplotlib.pyplot as plt


def color_jitter(image, brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1):
    # Convert the image to the HSV color space
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Adjust brightness
    hsv[:, :, 2] = np.clip(hsv[:, :, 2] * (1 + brightness), 0, 255)

    # Adjust contrast
    hsv[:, :, 1] = np.clip(hsv[:, :, 1] * (1 + contrast), 0, 255)

    # Adjust saturation
    hsv[:, :, 1] = np.clip(hsv[:, :, 1] * (1 + saturation), 0, 255)

    # Adjust hue
    hsv[:, :, 0] = (hsv[:, :, 0] + hue * 360) % 180

    # Convert the image back to BGR
    jittered_image = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    return jittered_image

def apply_gaussian_blur(image, sigma=0.3):
    # Apply Gaussian blur
    blurred_image = cv2.GaussianBlur(image, (0, 0), sigma)

    return blurred_image

def apply_random_noise(image, mean=0, std=25):
    # Generate random noise
    noise = np.random.normal(mean, std, image.shape)

    # Add noise to the image
    noisy_image = np.clip(image + 0.25*noise, 0, 255).astype(np.uint8)

    return noisy_image

def augment_image(image_path, output_path):
    # Read the original image
    image = cv2.imread(image_path)

    # Apply color jittering
    jittered_image = color_jitter(image)

    # Apply Gaussian blur
    blurred_image = apply_gaussian_blur(jittered_image)

    # Apply random noise
    noisy_image = apply_random_noise(blurred_image)

    # Save the augmented image
    cv2.imwrite(output_path, noisy_image)


files_paths = get_files_in_subdirectories(INPUT_IMAGES_FOLDER, file_extension=".png")
for file_path in files_paths:
    for i in range(NUM_OF_AUGMENTED_IMAGES):
        output_filepath = file_path # Since augmented images are added to copied original images.
        output_filepath = output_filepath.replace(".", "_augmented_" + str(i+1) + ".")
        print(output_filepath)
        augment_image(file_path, output_filepath)

# TODO: Shuffle the folder images

/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images/122_augmented_1.png
/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images/122_augmented_2.png
/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images/122_augmented_3.png
/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images/122_augmented_4.png
/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images/425_augmented_1.png
/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images/425_augmented_2.png
/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images/425_augmented_3.png
/home/jetracer/Documents/3d_mai/data/train_threads_b/overlaying/train_val_split/val/augmented/images/425_augmented_4.png
/home/jetracer/Documents/3d_mai/

### Watch out. Really quick. Don't run twice...

In [13]:
import shutil
# Duplicate all label files and add _augmented to the filename
# List all files in the folder
files = os.listdir(DUPLICATE_LABELS_FOLDER_PATH)

# Filter only TXT files
txt_files = [file for file in files if file.lower().endswith(".txt")]

# Duplicate and rename each TXT file
for txt_file in txt_files:
    if "_augmented" in txt_file:
        print("Already run before!")
        break 
    
    original_path = os.path.join(DUPLICATE_LABELS_FOLDER_PATH, txt_file)

    for i in range(NUM_OF_AUGMENTED_IMAGES):    
        # Create a new file name with "_augmented" suffix
        new_name, extension = os.path.splitext(txt_file)
        new_name += "_augmented_" + str(i+1) + extension

        # Create the new path
        new_path = os.path.join(DUPLICATE_LABELS_FOLDER_PATH, new_name)

        # Duplicate the TXT file
        shutil.copy(original_path, new_path)