### Data Augmentation

We need to have our folder with the base training and another folder where we will store the transormed images

In [1]:
import os
import cv2 as cv
from tqdm import tqdm
from helpers import *

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import tensorflow as tf

from tensorflow.keras import layers

In [2]:
# Get the current working directory
current_directory = os.getcwd()

# Print the current directory
print("Current Directory:", current_directory)

BASE_TRAINING = '../uavid_v1.5_official_release_image/all_images_combined/'
BASE_TRAIN_IMAGES = BASE_TRAINING + 'Images/'
BASE_TRAIN_GROUNDTRUTH = BASE_TRAINING + 'binary_labels/'

TRAINING = '../uavid_v1.5_official_release_image/all_images_combined_augmented/'
TRAIN_IMAGES = TRAINING + 'Images/'
TRAIN_GROUNDTRUTH = TRAINING + 'binary_labels/'

# BASE_TRAINING = 'base_training/'
# BASE_TRAIN_IMAGES = BASE_TRAINING + 'images/'
# BASE_TRAIN_GROUNDTRUTH = BASE_TRAINING + 'groundtruth/'

# TRAINING = 'training/'
# TRAIN_IMAGES = TRAINING + 'images/'
# TRAIN_GROUNDTRUTH = TRAINING + 'groundtruth/'

Current Directory: C:\Users\thami\OneDrive\Dokumente\EPFL\Master\Projet_de_Semestre\CSE_Project\unet-road-segmentation


### Add 90, 180 and 270º rotations

In [3]:
%run helpers.py # To refresh the helpers file if there is an error

In [4]:
# Generate the rotated images
for img_name in tqdm(os.listdir(BASE_TRAIN_IMAGES), total=len(os.listdir(BASE_TRAIN_IMAGES))):
    
    # Skip directories
    if os.path.isdir(img_name):
        continue
    
    image = cv.imread(BASE_TRAIN_IMAGES + img_name)
    img_train_rotations = get_rotations_0_90_180_270(image)
    for i, rotated_image in enumerate(img_train_rotations[1:]): # Avoid original image
        cv.imwrite(os.path.join(TRAIN_IMAGES , img_name[:-4] + '_rotation_' + str((i+1)*90) + ".png"), rotated_image)

  return np.array(img_rotations)
100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [07:11<00:00,  1.60s/it]


In [5]:
# Generate the rotated groundtruths
for img_name in tqdm(os.listdir(BASE_TRAIN_GROUNDTRUTH), total=len(os.listdir(BASE_TRAIN_GROUNDTRUTH))):
    
    # Skip directories
    if os.path.isdir(img_name):
        continue
    
    image = cv.imread(BASE_TRAIN_GROUNDTRUTH + img_name)
    image = image[:, :, 0]
    img_train_rotations = get_rotations_0_90_180_270(image)
    for i, rotated_image in enumerate(img_train_rotations[1:]): # Avoid original image
        cv.imwrite(os.path.join(TRAIN_GROUNDTRUTH , img_name[:-4] + '_rotation_' + str((i+1)*90) + ".png"), rotated_image)

100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [00:28<00:00,  9.46it/s]


### Add randomly rotated images

In [6]:
%run helpers.py # To refresh the helpers file if there is an error

In [7]:
# Pick how many rotations you want per image, 
# if you want more rotations delete all images and change the number here to avoid name conflicts
ROTATIONS_PER_IMAGE = 3
degrees = []
centers = []
for i in range(ROTATIONS_PER_IMAGE*len(os.listdir(BASE_TRAIN_IMAGES))):
    degrees.append(random.randint(0, 90)) # random rotation of degree between 0 and 90
    centers.append((random.randint(40, 60), random.randint(40, 60))) # random center between 40% and 60% of image width & length

In [8]:
# Generate the rotated images
index = 0
for img_name in tqdm(os.listdir(BASE_TRAIN_IMAGES), total=len(os.listdir(BASE_TRAIN_IMAGES))):
    
    # Skip directories
    if os.path.isdir(img_name):
        continue
    
    image = cv.imread(BASE_TRAIN_IMAGES + img_name)
    for i in range(ROTATIONS_PER_IMAGE):
        random_rotation = get_rotation_deg_n(image, degrees[index], centers[index])
        cv.imwrite(os.path.join(TRAIN_IMAGES , img_name[:-4] + '_random_rotation_' + str(i) + ".png"), random_rotation)
        index += 1

100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [06:00<00:00,  1.33s/it]


In [9]:
# Generate the rotated groundtruths
index = 0
for img_name in tqdm(os.listdir(BASE_TRAIN_GROUNDTRUTH), total=len(os.listdir(BASE_TRAIN_GROUNDTRUTH))):
    
    # Skip directories
    if os.path.isdir(img_name):
        continue
    
    image = cv.imread(BASE_TRAIN_GROUNDTRUTH + img_name)
    image = image[:, :, 0]
    for i in range(ROTATIONS_PER_IMAGE):
        random_rotation = get_rotation_deg_n(image, degrees[index], centers[index])
        cv.imwrite(os.path.join(TRAIN_GROUNDTRUTH , img_name[:-4] + '_random_rotation_' + str(i) + ".png"), random_rotation)
        index += 1

100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [00:36<00:00,  7.35it/s]


### Add horizontally and vertically flipped images

In [10]:
%run helpers.py # To refresh the helpers file if there is an error

In [11]:
flip_type = ['x', 'y'] # Types of wanted flips

In [12]:
# Generate the flipped images
for img_name in tqdm(os.listdir(BASE_TRAIN_IMAGES), total=len(os.listdir(BASE_TRAIN_IMAGES))):
    
    # Skip directories
    if os.path.isdir(img_name):
        continue
    
    image = cv.imread(BASE_TRAIN_IMAGES + img_name)
    img_train_flips = get_flipped_images(image)
    for i, flipped_image in enumerate(img_train_flips[1:]): # Avoid original image
        cv.imwrite(os.path.join(TRAIN_IMAGES , img_name[:-4] + '_flipped_' + flip_type[i] + ".png"), flipped_image)

100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [05:02<00:00,  1.12s/it]


In [13]:
# Generate the flipped groundtruths
for img_name in tqdm(os.listdir(BASE_TRAIN_GROUNDTRUTH), total=len(os.listdir(BASE_TRAIN_GROUNDTRUTH))):
    
    # Skip directories
    if os.path.isdir(img_name):
        continue
    
    image = cv.imread(BASE_TRAIN_GROUNDTRUTH + img_name)
    image = image[:, :, 0]
    img_train_flips = get_flipped_images(image)
    for i, flipped_image in enumerate(img_train_flips[1:]): # Avoid original image
        cv.imwrite(os.path.join(TRAIN_GROUNDTRUTH , img_name[:-4] + '_flipped_' + flip_type[i] + ".png"), flipped_image)

100%|████████████████████████████████████████████████████████████████████████████████| 270/270 [00:23<00:00, 11.62it/s]


### Add gaussian noise

In [26]:
%run helpers.py # To refresh the helpers file if there is an error

In [27]:
# Variance of the gaussian noise
VARIANCE = 150

In [28]:
# Generate the noisy images
for img_name in tqdm(os.listdir(BASE_TRAIN_IMAGES), total=len(os.listdir(BASE_TRAIN_IMAGES))):
    image = cv.imread(BASE_TRAIN_IMAGES + img_name)
    img_train_noise = noisy('gauss', image, var=VARIANCE)
    cv.imwrite(os.path.join(TRAIN_IMAGES , img_name[:-4] + '_noise_' + 'gauss_var_' + str(VARIANCE) + ".png"), img_train_noise)

100%|██████████| 100/100 [00:06<00:00, 16.43it/s]


In [29]:
# Copy the groundtruths as they do not change
for img_name in tqdm(os.listdir(BASE_TRAIN_GROUNDTRUTH), total=len(os.listdir(BASE_TRAIN_GROUNDTRUTH))):
    image = cv.imread(BASE_TRAIN_GROUNDTRUTH + img_name)
    image = image[:, :, 0]
    cv.imwrite(os.path.join(TRAIN_GROUNDTRUTH , img_name[:-4] + '_noise_' + 'gauss_var_' + str(VARIANCE) + ".png"), image)

100%|██████████| 100/100 [00:00<00:00, 100.85it/s]


### Add salt and pepper noise

In [30]:
%run helpers.py # To refresh the helpers file if there is an error

In [31]:
# Ratio of pixels to be corrupted
CORRUPTION_RATIO = 0.01

In [32]:
# Generate the noisy images
for img_name in tqdm(os.listdir(BASE_TRAIN_IMAGES), total=len(os.listdir(BASE_TRAIN_IMAGES))):
    image = cv.imread(BASE_TRAIN_IMAGES + img_name)
    img_train_noise = noisy('s&p', image, var=VARIANCE)
    cv.imwrite(os.path.join(TRAIN_IMAGES , img_name[:-4] + '_noise_' + 's&p_corrupt_' + str(CORRUPTION_RATIO) + ".png"), img_train_noise)

100%|██████████| 100/100 [00:05<00:00, 19.21it/s]


In [33]:
# Copy the groundtruths as they do not change
for img_name in tqdm(os.listdir(BASE_TRAIN_GROUNDTRUTH), total=len(os.listdir(BASE_TRAIN_GROUNDTRUTH))):
    image = cv.imread(BASE_TRAIN_GROUNDTRUTH + img_name)
    image = image[:, :, 0]
    cv.imwrite(os.path.join(TRAIN_GROUNDTRUTH , img_name[:-4] + '_noise_' + 's&p_corrupt_' + str(CORRUPTION_RATIO) + ".png"), image)

100%|██████████| 100/100 [00:00<00:00, 108.57it/s]


### Add original images

In [34]:
# Get the original images
for img_name in tqdm(os.listdir(BASE_TRAIN_IMAGES), total=len(os.listdir(BASE_TRAIN_IMAGES))):
    image = cv.imread(BASE_TRAIN_IMAGES + img_name)
    cv.imwrite(os.path.join(TRAIN_IMAGES + img_name), image)

100%|██████████| 100/100 [00:04<00:00, 24.06it/s]


In [35]:
# Get the original groundtruths
for img_name in tqdm(os.listdir(BASE_TRAIN_GROUNDTRUTH), total=len(os.listdir(BASE_TRAIN_GROUNDTRUTH))):
    image = cv.imread(BASE_TRAIN_GROUNDTRUTH + img_name)
    image = image[:, :, 0]
    cv.imwrite(os.path.join(TRAIN_GROUNDTRUTH + img_name), image)

100%|██████████| 100/100 [00:00<00:00, 108.09it/s]


### Delete images

In [13]:
# Remove all images and groundtruths in the folders, do this whenever you want to try new augmentation sets
for filename in tqdm(os.listdir(TRAIN_IMAGES),total=len(os.listdir(TRAIN_IMAGES))):
    file_path = os.path.join(TRAIN_IMAGES, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))
        
for filename in tqdm(os.listdir(TRAIN_GROUNDTRUTH),total=len(os.listdir(TRAIN_GROUNDTRUTH))):
    file_path = os.path.join(TRAIN_GROUNDTRUTH, filename)
    try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
            os.unlink(file_path)
        elif os.path.isdir(file_path):
            shutil.rmtree(file_path)
    except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))

  0%|          | 0/976 [00:00<?, ?it/s]100%|██████████| 976/976 [00:03<00:00, 291.57it/s]
100%|██████████| 1400/1400 [00:04<00:00, 344.12it/s]
