# **Augmentor**

<font size = 4>Data augmentation can improve training progress by amplifying differences in the dataset. This can be useful if the available dataset is small since, in this case, it is possible that a network could quickly learn every example in the dataset (overfitting), without augmentation. Augmentation is not necessary for training and if your training dataset is large you should disable it.


---

<font size = 4>*Disclaimer*:

<font size = 4>This notebook is part of the *Zero-Cost Deep-Learning to Enhance Microscopy* project (https://github.com/HenriquesLab/DeepLearning_Collab/wiki). Jointly developed by the Jacquemet (link to https://cellmig.org/) and Henriques (https://henriqueslab.github.io/) laboratories.

<font size = 4>[Augmentor](https://github.com/mdbloice/Augmentor) was described in the following article:

<font size = 4>Marcus D Bloice, Peter M Roth, Andreas Holzinger, Biomedical image augmentation using Augmentor, Bioinformatics, https://doi.org/10.1093/bioinformatics/btz259

<font size = 4>**Please also cite this original paper when using or developing this notebook.**

# **2. Install Augmentor and Dependencies**
---


In [None]:
Notebook_version = '1.13'
Network = 'Augmentor'

import Augmentor
import os

# ------- Common variable to all ZeroCostDL4Mic notebooks -------
import numpy as np
from matplotlib import pyplot as plt
import urllib
import os, random
import shutil 
import zipfile
from tifffile import imread, imsave
import time
import sys
from pathlib import Path
import pandas as pd
import csv
from glob import glob
from scipy import signal
from scipy import ndimage
from skimage import io
from sklearn.linear_model import LinearRegression
from skimage.util import img_as_uint
import matplotlib as mpl
from skimage.metrics import structural_similarity
from skimage.metrics import peak_signal_noise_ratio as psnr
from astropy.visualization import simple_norm
from skimage import img_as_float32
from skimage.util import img_as_ubyte
from tqdm import tqdm 

class bcolors:
  WARNING = '\033[31m'

import warnings
warnings.filterwarnings("ignore")

# **3. Data augmentation**
---
<font size = 4>





In [None]:
#Data augmentation

Training_source = "/home/meyer/code-project/HistoQuant/HistoQuant/nuc_seg/train/source" 
Matching_Training_target = True 
Training_target = "/home/meyer/code-project/HistoQuant/HistoQuant/nuc_seg/train/mask" 
Random_Crop = False 
Crop_size = 1024  

####Choose a factor by which you want to multiply your original dataset

Multiply_dataset_by = 4 
Saving_path = "/home/meyer/code-project/HistoQuant/HistoQuant/nuc_seg/train_aug" 


###If not, please choose the probability of the following image manipulations to be used to augment your dataset (1 = always used; 0 = disabled ):

####Mirror and rotate images
rotate_90_degrees = 0.5 
rotate_270_degrees = 0.5 
flip_left_right = 0.5 
flip_top_bottom = 0.5 

####Random image Zoom

random_zoom = 0 
random_zoom_magnification = 0 

####Random image distortion

random_distortion = 0 

####Image shearing and skewing  

image_shear = 0 
max_image_shear = 1 
skew_image = 0 
skew_image_magnitude = 0 


list_files = os.listdir(Training_source)
Nb_files = len(list_files)

Nb_augmented_files = (Nb_files * Multiply_dataset_by)


Augmented_folder =  Saving_path+"/Augmented_Folder"
if os.path.exists(Augmented_folder):
  shutil.rmtree(Augmented_folder)
os.makedirs(Augmented_folder)

  
Training_source_augmented = Saving_path+"/source"

if os.path.exists(Training_source_augmented):
  shutil.rmtree(Training_source_augmented)
os.makedirs(Training_source_augmented)

if Matching_Training_target:
  Training_target_augmented = Saving_path+"/mask"

  if os.path.exists(Training_target_augmented):
    shutil.rmtree(Training_target_augmented)
  os.makedirs(Training_target_augmented)


# Here we generate the augmented images
#Load the images
p = Augmentor.Pipeline(Training_source, Augmented_folder)

#Define the matching images
if Matching_Training_target:
  p.ground_truth(Training_target)
#Define the augmentation possibilities


if Random_Crop:
  p.crop_by_size(probability=1, width=Crop_size, height=Crop_size, centre=False)

if not rotate_90_degrees == 0:
  p.rotate90(probability=rotate_90_degrees)
  
if not rotate_270_degrees == 0:
  p.rotate270(probability=rotate_270_degrees)

if not flip_left_right == 0:
  p.flip_left_right(probability=flip_left_right)

if not flip_top_bottom == 0:
  p.flip_top_bottom(probability=flip_top_bottom)

if not random_zoom == 0:
  p.zoom_random(probability=random_zoom, percentage_area=random_zoom_magnification)
 
if not random_distortion == 0:
  p.random_distortion(probability=random_distortion, grid_width=4, grid_height=4, magnitude=8)

if not image_shear == 0:
  p.shear(probability=image_shear,max_shear_left=20,max_shear_right=20)
  
if not skew_image == 0:
  p.skew(probability=skew_image,magnitude=skew_image_magnitude)

p.sample(int(Nb_augmented_files))

print(int(Nb_augmented_files),"images generated")

# Here we sort through the images and move them back to augmented trainning source and targets folders

augmented_files = os.listdir(Augmented_folder)

for f in augmented_files:

  if (f.startswith("_groundtruth_(1)_")):
    shortname_noprefix = f[17:]
    shutil.copyfile(Augmented_folder+"/"+f, Training_target_augmented+"/"+shortname_noprefix) 
  if not (f.startswith("_groundtruth_(1)_")):
    shutil.copyfile(Augmented_folder+"/"+f, Training_source_augmented+"/"+f)
      

for filename in os.listdir(Training_source_augmented):
  os.chdir(Training_source_augmented)
  os.rename(filename, filename.replace('_original', ''))
  
  #Here we clean up the extra files
shutil.rmtree(Augmented_folder)
