# Implementation of Data Augmentation Techniques

In [142]:
import os
import cv2
import shutil
import numpy as np

## Implementation of Photometric Distortions

In [143]:
def adjust_brightness(image, factor):
    return cv2.convertScaleAbs(image, alpha=1.0, beta=factor).astype(np.uint8)

def adjust_contrast(image, factor):
    return cv2.convertScaleAbs(image, alpha=factor, beta=1.0).astype(np.uint8)

def shift_color_channels(image, r_shift, g_shift, b_shift):
    shifted_image = image.copy()
    shifted_image[:, :, 0] = np.clip(shifted_image[:, :, 0] + b_shift, 0, 255).astype(np.uint8)
    shifted_image[:, :, 1] = np.clip(shifted_image[:, :, 1] + g_shift, 0, 255).astype(np.uint8)
    shifted_image[:, :, 2] = np.clip(shifted_image[:, :, 2] + r_shift, 0, 255).astype(np.uint8)
    return shifted_image

def adjust_saturation(image, factor):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv_image[:, :, 1] = np.clip(hsv_image[:, :, 1] * factor, 0, 255)
    return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2BGR).astype(np.uint8)

def add_gaussian_noise(image, mean=0, std=25):
    noise = np.random.normal(mean, std, image.shape)
    return np.clip(image + noise, 0, 255).astype(np.uint8)


## Implementation of Geometric Distortions

In [144]:
def rotate_image(image, angle):
    rows, cols = image.shape[:2]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    return cv2.warpAffine(image, M, (cols, rows)).astype(np.uint8)

def scale_image(image, scale_factor):
    return cv2.resize(image, None, fx=scale_factor, fy=scale_factor).astype(np.uint8)

def translate_image(image, tx, ty):
    M = np.float32([[1, 0, tx], [0, 1, ty]])
    return cv2.warpAffine(image, M, (image.shape[1], image.shape[0])).astype(np.uint8)

def shear_image(image, shear_factor):
    M = np.float32([[1, shear_factor, 0], [0, 1, 0]])
    return cv2.warpAffine(image, M, (image.shape[1], image.shape[0])).astype(np.uint8)

def flip_image(image, flip_code):
    return cv2.flip(image, flip_code).astype(np.uint8)

In [145]:
def load_images(path: str):
    images = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".jpeg"):
                image = cv2.imread(os.path.join(root, file))
                images.append({"image": image, "name": file.split(".")[0]})
    return images

def generate_random_value(value1, value2, excluded_range):
    while True:
        random_value = np.random.uniform(value1, value2)
        if not (excluded_range[0] <= random_value <= excluded_range[1]):
            return random_value

In [148]:
print("Loading images")
images = load_images("images")
print("Images loaded!")

def generate_augmented_images(image: dict[str, np.ndarray], num_augmentations: int = 5, type: str = "all"):
    image_data = image["image"]
    image_name = image["name"]
    already_augmentations = []
    augmented_images: list = []
    for _ in range(num_augmentations):
        print(f"Augmenting image {image_name} with type {type}")
        choices: list[str]
        if type == "photometric":
            choices = ["contrast", "color_shift", "saturation"]
            if "gaussian_noise" not in already_augmentations:
                choices.append("gaussian_noise")
        elif type == "geometric":
            choices = ["rotate", "scale", "translate", "shear", "flip"]
        else:
            choices = ["contrast", "color_shift", "saturation", "rotate", "scale", "translate", "shear", "flip"]
            if "gaussian_noise" not in already_augmentations:
                choices.append("gaussian_noise")

        choice = np.random.choice(choices)
        already_augmentations.append(choice)

        if choice == "contrast":
            factor = generate_random_value(0.1, 2, (0.8, 1.2))
            augmented_image = adjust_contrast(image_data, factor)
        elif choice == "color_shift":
            r_shift, g_shift, b_shift = np.random.randint(-50, 50, size=3)
            augmented_image = shift_color_channels(image_data, r_shift, g_shift, b_shift)
        elif choice == "saturation":
            factor = generate_random_value(0.1, 2, (0.8, 1.2))
            augmented_image = adjust_saturation(image_data, factor)
        elif choice == "gaussian_noise":
            augmented_image = add_gaussian_noise(image_data)
        elif choice == "rotate":
            angle = generate_random_value(-180, 180, (-10, 10))
            augmented_image = rotate_image(image_data, angle)
        elif choice == "scale":
            scale_factor = generate_random_value(0.1, 2, (0.8, 1.2))
            augmented_image = scale_image(image_data, scale_factor)
        elif choice == "translate":
            tx, ty = np.random.randint(-100, 100, size=2)
            augmented_image = translate_image(image_data, tx, ty)
        elif choice == "shear":
            shear_factor = generate_random_value(-0.5, 0.5, (-0.1, 0.1))
            augmented_image = shear_image(image_data, shear_factor)
        elif choice == "flip":
            flip_code = np.random.choice([-1, 0, 1])
            augmented_image = flip_image(image_data, flip_code)

        augmented_images.append({"image": augmented_image, "name": image_name, "type": choice})

    return augmented_images

output_geometric_path = "./augmented_geometric_images/"
if os.path.exists(output_geometric_path):
    shutil.rmtree(output_geometric_path)
os.makedirs(output_geometric_path, exist_ok=True)
output_photometric_path = "./augmented_photometric_images/"
if os.path.exists(output_photometric_path):
    shutil.rmtree(output_photometric_path)
os.makedirs(output_photometric_path, exist_ok=True)

num_augmentations = 5

for i, input_image in enumerate(images):
    augmented_photometric_images = generate_augmented_images(input_image, type="photometric", num_augmentations=num_augmentations)
    augmented_geometric_images = generate_augmented_images(input_image, type="geometric", num_augmentations=num_augmentations)

    for j, augmented_image in enumerate(augmented_photometric_images):
        image = augmented_image["image"]
        name = augmented_image["name"]
        type = augmented_image["type"]
        filename = f"augmented_{name}_{j}_{type}.jpeg"
        cv2.imwrite(os.path.join(output_geometric_path, filename), image)

    for j, augmented_image in enumerate(augmented_geometric_images):
        image = augmented_image["image"]
        name = augmented_image["name"]
        type = augmented_image["type"]
        filename = f"augmented_{name}_{j}_{type}.jpeg"
        cv2.imwrite(os.path.join(output_photometric_path, filename), image)

print(f"Saved {len(images) * num_augmentations * 2} augmented images to {output_geometric_path} and {output_photometric_path}!")

Loading images
Images loaded!
Augmenting image 3_colour with type photometric
Augmenting image 3_colour with type photometric
Augmenting image 3_colour with type photometric
Augmenting image 3_colour with type photometric
Augmenting image 3_colour with type photometric
Augmenting image 3_colour with type geometric
Augmenting image 3_colour with type geometric
Augmenting image 3_colour with type geometric
Augmenting image 3_colour with type geometric
Augmenting image 3_colour with type geometric
Augmenting image books with type photometric
Augmenting image books with type photometric
Augmenting image books with type photometric
Augmenting image books with type photometric
Augmenting image books with type photometric
Augmenting image books with type geometric
Augmenting image books with type geometric
Augmenting image books with type geometric
Augmenting image books with type geometric
Augmenting image books with type geometric
Augmenting image boots_colour with type photometric
Augmenti