### Data Augmentation

We need to have our folder with the base training and another folder where we will store the transormed images

In [1]:
import os
import cv2
from tqdm import tqdm
from helpers import *

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf


from tensorflow.keras import layers

In [2]:
BASE_TRAINING = 'base_training/'
BASE_TRAIN_IMAGES = BASE_TRAINING + '/images/'
BASE_TRAIN_GROUNDTRUTH = BASE_TRAINING + 'groundtruth/'

TRAINING = 'training'
TRAIN_IMAGES = TRAINING + '/images/'
TRAIN_GROUNDTRUTH = TRAINING + '/groundtruth/'

### Add 90, 180 and 270º rotations

In [3]:
for path, subdirs, files in os.walk(BASE_TRAINING):
    dirname = path.split("/")[-1]
    if dirname == 'images':   #Find all 'images' directories
        images = os.listdir(path)  #List of all image names in this subdirectory
        for i, image_name in tqdm(enumerate(images), total = len(images)):
            if image_name.endswith(".png"):   #Only read jpg images...
                image = cv2.imread(path+"/"+image_name, 1)
                img_train_rotations = get_rotations_0_90_180_270(image)
                for i in range(1,4):
                    cv2.imwrite(os.path.join(TRAIN_IMAGES , image_name[:-4] + '_rotation_' + str(i) + ".png"), img_train_rotations[i])

for path, subdirs, files in os.walk(BASE_TRAINING):
    dirname = path.split("/")[-1]
    if dirname == 'groundtruth':   #Find all 'images' directories
        images = os.listdir(path)  #List of all image names in this subdirectory
        for i, mask_name in tqdm(enumerate(images), total=len(images)):
            if mask_name.endswith(".png"):   #Only read jpg images...
                image = cv2.imread(path+"/"+mask_name, 1)
                image = image[:, :, 0]
                img_train_rotations = get_rotations_0_90_180_270(image)
                for i in range(1,4):
                    cv2.imwrite(os.path.join(TRAIN_GROUNDTRUTH , mask_name[:-4] + '_rotation_' + str(i) + ".png"), img_train_rotations[i])                    

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:03<00:00, 33.27it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 233.65it/s]


### Add horizontally, vertically, and both flipped images

In [4]:
for path, subdirs, files in os.walk(BASE_TRAINING):
    dirname = path.split("/")[-1]
    if dirname == 'images':   #Find all 'images' directories
        images = os.listdir(path)  #List of all image names in this subdirectory
        for i, image_name in tqdm(enumerate(images), total = len(images)):
            if image_name.endswith(".png"):   #Only read jpg images...
                image = cv2.imread(path+"/"+image_name, 1)
                img_train_flipped = get_flipped_images(image)
                for i in range(1,4):
                    cv2.imwrite(os.path.join(TRAIN_IMAGES , image_name[:-4] + '_flipped_' + str(i) + ".png"), img_train_flipped[i])

for path, subdirs, files in os.walk(BASE_TRAINING):
    dirname = path.split("/")[-1]
    if dirname == 'groundtruth':   #Find all 'images' directories
        images = os.listdir(path)  #List of all image names in this subdirectory
        for i, mask_name in tqdm(enumerate(images), total=len(images)):
            if mask_name.endswith(".png"):   #Only read jpg images...
                image = cv2.imread(path+"/"+mask_name, 1)
                image = image[:, :, 0]
                img_train_flipped = get_flipped_images(image)
                for i in range(1,4):
                    cv2.imwrite(os.path.join(TRAIN_GROUNDTRUTH , mask_name[:-4] + '_flipped_' + str(i) + ".png"), img_train_flipped[i])  

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:02<00:00, 33.98it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 224.72it/s]


### Add noise

In [3]:
for path, subdirs, files in os.walk(BASE_TRAINING):
    dirname = path.split("/")[-1]
    if dirname == 'images':   #Find all 'images' directories
        images = os.listdir(path)  #List of all image names in this subdirectory
        for i, image_name in tqdm(enumerate(images), total = len(images)):
            if image_name.endswith(".png"):   #Only read jpg images...
                image = cv2.imread(path+"/"+image_name, 1)
                image = noisy("s&p",image)
                cv2.imwrite(os.path.join(TRAIN_IMAGES , image_name[:-4] + '_noise_' + ".png"), image)

for path, subdirs, files in os.walk(BASE_TRAINING):
    dirname = path.split("/")[-1]
    if dirname == 'groundtruth':   #Find all 'images' directories
        images = os.listdir(path)  #List of all image names in this subdirectory
        for i, mask_name in tqdm(enumerate(images), total=len(images)):
            if mask_name.endswith(".png"):   #Only read jpg images...
                image = cv2.imread(path+"/"+mask_name, 1)
                image = image[:, :, 0]
                cv2.imwrite(os.path.join(TRAIN_GROUNDTRUTH , mask_name[:-4] + '_noise_' + ".png"), image)

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:04<00:00, 22.79it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 692.94it/s]


### Add original images

In [5]:
for path, subdirs, files in os.walk(BASE_TRAINING):
    dirname = path.split("/")[-1]
    if dirname == 'images':   #Find all 'images' directories
        images = os.listdir(path)  #List of all image names in this subdirectory
        for i, image_name in tqdm(enumerate(images), total = len(images)):
            if image_name.endswith(".png"):   #Only read jpg images...
                image = cv2.imread(path+"/"+image_name, 1)
                cv2.imwrite(os.path.join(TRAIN_IMAGES , image_name), image)

for path, subdirs, files in os.walk(BASE_TRAINING):
    dirname = path.split("/")[-1]
    if dirname == 'groundtruth':   #Find all 'images' directories
        images = os.listdir(path)  #List of all image names in this subdirectory
        for i, mask_name in tqdm(enumerate(images), total=len(images)):
            if mask_name.endswith(".png"):   #Only read jpg images...
                image = cv2.imread(path+"/"+mask_name, 1)
                image = image[:, :, 0]
                cv2.imwrite(os.path.join(TRAIN_GROUNDTRUTH , mask_name), image)

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 84.60it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 588.23it/s]
