<a href="https://colab.research.google.com/github/robert-pineau/CIND-860-Capstone/blob/main/CIND860_augment_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import random
import glob
import re
import numpy as np
import albumentations as albu

In [None]:
#This utility takes only the original "train" images, both with cancer, and without, and creates random augments of each, expanding the original image into 32 flavours.
#
#The possible augments are as follows:
#
#a) Horizontal Flips
#b) Vertical Flips
#c) Apply CLATHE (Contrast Limited Adaptice Hostogram Equilization)
#d) RandomToneCurve (to highlight areas from bright to dark)
#e) RandomeBrightness/Contract adjustment.


#Other planned augments that have not been tried yet:
#
#RandomGamma adjustment
#HueSaturation adjustment
#ColorJitter adustment
#InvertImg (blacks to white, and whites to black)
#Solarize (like InvertImg, but only for pixels above a certain threshold)


In [None]:
transform = albu.Compose([
   albu.HorizontalFlip(p=0.5),
   albu.VerticalFlip(p=0.5),
   albu.CLAHE(clip_limit=(1,10),p=1),
   albu.OneOf([
      albu.RandomToneCurve(scale=0.3, p=0.5),
      albu.RandomBrightnessContrast(brightness_limit=(-0.1, 0.2), contrast_limit=(-0.4, 0.5), brightness_by_max=True, always_apply=False, p=0.5)
      ], p=0.5)
])


#This method uses the above transformations to create the augments.
def aug_fn(image):
    aug_img = transform(image = image)["image"]
    return aug_img


In [None]:
image_dir = "/mnt/wd/CIND860/database/square_cc_images"

In [None]:
#Only augmented the original images in the train directory.
#those are the ones in the "train" subdir, in the format (\d+)_(\d+)\.png
image_list = glob.glob(f"{image_dir}/train/*.png")
random.shuffle(image_list)

count = len(image_list)
image_cnt = 0

for png_name in image_list:
   copy = 0
   image_cnt += 1

   #Only considering original images
   result = re.search(r"(train)\/(\d+)_(\d+)\.png", png_name)
   if not result:
      continue

   cnn_use = result.group(1)
   patient_id = int(result.group(2))
   image_id = int(result.group(3))
   img = cv2.imread(png_name)

   #Create 31 augments, for every orignal training image.
   #With the original image, this means 32 total images for every original image.
   for j in range(1,32):
     #gets a random augment based on the original.
     img2 = aug_fn(img)
     #Writing the new "augmented" image with an extra digit on the end.
     #(the original, still part of the dataset will not have the extra digit on the end)
     cv2.imwrite(f"{image_dir}/{cnn_use}/{patient_id}_{image_id}_{j}.png", img2)
     image_cnt += 1

   print(f"Patient ID:***{patient_id}*** Image ID:***{image_id}***")


print(f"FOUND ***{count}*** Images")
print(f"Now Total of ***{image_cnt}*** Images")
