# Generate augmented images for Class 2
### Class 2: {'CMC': 0, 'Ceramic': 1, 'MMC': 2, 'Metal': 3, 'PMC': 4, 'Polymer': 5}
### 200 respectively

# Install/check Tensorflow 2.0

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
!pip install --upgrade pip

In [None]:
pip install gast==0.3.3

In [None]:
!pip install -q tensorflow-gpu==2.4.1  #(optional, the default tensorflow1.15 (The final version of TensorFlow 1.x.) is installed, you can upgrade to tensorflow2.0)
import tensorflow as tf
print("GPU Available: ", tf.config.list_physical_devices('GPU'))
print("Tensorflow Version: ", tf.__version__)
print("Keras Version: ", tf.keras.__version__)

# Load Data

In [None]:
import warnings
warnings.filterwarnings('ignore')

import os
import tempfile

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

import sklearn
from sklearn.metrics import confusion_matrix
import seaborn as sns

import random
from scipy import ndarray
import math

# image processing library
import skimage as ski
from skimage import transform
from skimage import util
from skimage import io
import pylab as pl

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
!pip install patool

Collecting patool
[?25l  Downloading https://files.pythonhosted.org/packages/43/94/52243ddff508780dd2d8110964320ab4851134a55ab102285b46e740f76a/patool-1.12-py2.py3-none-any.whl (77kB)
[K     |████▎                           | 10kB 12.8MB/s eta 0:00:01[K     |████████▌                       | 20kB 16.8MB/s eta 0:00:01[K     |████████████▊                   | 30kB 11.4MB/s eta 0:00:01[K     |█████████████████               | 40kB 10.7MB/s eta 0:00:01[K     |█████████████████████▏          | 51kB 7.6MB/s eta 0:00:01[K     |█████████████████████████▍      | 61kB 7.7MB/s eta 0:00:01[K     |█████████████████████████████▋  | 71kB 8.3MB/s eta 0:00:01[K     |████████████████████████████████| 81kB 4.2MB/s 
[?25hInstalling collected packages: patool
Successfully installed patool-1.12


In [None]:
import patoolib
zip_file_path = "/content/gdrive/Shareddrives/[CMPE295B] Using CNNs for Material Classification/3. Image Dataset/Material_images_2nd_labels_REDO.zip"
patoolib.extract_archive(zip_file_path, outdir='_Images')

patool: Extracting /content/gdrive/Shareddrives/[CMPE295B] Using CNNs for Material Classification/3. Image Dataset/Material_images_2nd_labels_REDO.zip ...
patool: running /usr/bin/7z x -o_Images -- "/content/gdrive/Shareddrives/[CMPE295B] Using CNNs for Material Classification/3. Image Dataset/Material_images_2nd_labels_REDO.zip"
patool: ... /content/gdrive/Shareddrives/[CMPE295B] Using CNNs for Material Classification/3. Image Dataset/Material_images_2nd_labels_REDO.zip extracted to `_Images'.


'_Images'

In [None]:
base_dir = "/content/_Images"

* Split base directory into the new directory of train, validation, and test dataset in order to apply data augmenation only to train dataset

In [None]:
!pip install split-folders

Collecting split-folders
  Downloading https://files.pythonhosted.org/packages/b8/5f/3c2b2f7ea5e047c8cdc3bb00ae582c5438fcdbbedcc23b3cc1c2c7aae642/split_folders-0.4.3-py3-none-any.whl
Installing collected packages: split-folders
Successfully installed split-folders-0.4.3


In [None]:
#https://pypi.org/project/split-folders/
import splitfolders

input_dir = base_dir 
split_base_dir = '/content/_Images_split'

# Split '_Images' folder into train, validation and test dataset with a ratio.
splitfolders.ratio(input_dir, output=split_base_dir, seed=1337, ratio=(.8, .1, .1))

Copying files: 221 files [00:00, 350.02 files/s]


In [None]:
# Split val/test with a fixed number of items e.g. 100 for each set.
splitfolders.fixed(input_dir, output=split_base_dir, seed=1337, fixed=(8, 8), oversample=False, group_prefix=None)

Copying files: 221 files [00:00, 351.44 files/s]


# Data preprocessing

In [None]:
# Chaning diectory to train dataset splitted from the original directory
new_base_dir = split_base_dir + '/train'
new_base_dir

'/content/_Images_split/train'

### A. With Data Augmentation for training dataset

In [None]:
# Define image and batch size for training
# image dimensions for VGG16, VGG19 are 224, 224
# image dimensions for InceptionV3 and Xception are 299, 299
IMAGE_SIZE = 224
BATCH_SIZE = 64

# Use ImageDataGenerator to rescale the images
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
    )

# Create the train generator and specify where the train dataset directory, image size, batch size.
train_generator = datagen.flow_from_directory(
    os.path.join(split_base_dir,'train'),
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
    )

# Create the validation generator with similar approach as the train generator with the flow_from_directory() method.
val_generator = datagen.flow_from_directory(
    os.path.join(split_base_dir,'val'),
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
    )

# Create the test generator with similar approach as the train generator with the flow_from_directory() method.
test_generator = datagen.flow_from_directory(
    os.path.join(split_base_dir,'test'),
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE
    )

In [None]:
image_batch_train, label_batch_train = next(iter(train_generator))
print("Image batch shape: ", image_batch_train.shape)
print("Label batch shape: ", label_batch_train.shape)

In [None]:
labels = dict((v,k) for k,v in train_generator.class_indices.items())
labels

In [None]:
plt.figure(figsize=(10,10))
for i in range(9):
  plt.subplot(3, 3, i+1)
  plt.imshow(image_batch_train[i])
  cls_num = np.argmax(label_batch_train[i])
  plt.title(labels[cls_num])
  plt.axis("off")

#### 1) Example of Augmented Images

In [None]:
IMAGE_NUM = 0
image = tf.expand_dims(image_batch_train[IMAGE_NUM], 0)

In [None]:
data_aug = tf.keras.Sequential([tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
                                tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
                                ])

In [None]:
plt.imshow(image_batch_train[IMAGE_NUM])
plt.title("input image")
plt.axis("off")
plt.show()

In [None]:
print("augmented images")
plt.figure(figsize=(10, 10))
for i in range(9):
  augmented_image = data_aug(image)
  plt.subplot(3, 3, i + 1)
  plt.imshow(augmented_image[0])
  plt.axis("off")
plt.show()

#### 2) Generate Augmented Images

In [None]:
def flip_horizontal(image):
    return image[:, ::-1]

def flip_vertical(image):
    return image[::-1, :]

def sigmoid_correction(image):
    return ski.exposure.adjust_sigmoid(image)

def random_gamma_bright(image):
    return ski.exposure.adjust_gamma(image, gamma=0.4, gain=0.9)

def random_gamma_dark(image):
    return ski.exposure.adjust_gamma(image, gamma=1.5, gain=0.9)    

def crop(image):
    return image[100:(image.shape[0]-100),100:(image.shape[1]-100)]    

def shear(image):
    tf = ski.transform.AffineTransform(shear=-0.5)
    return ski.transform.warp(image, tf, order=1, preserve_range=True, mode='wrap')

In [None]:
augument_method = {
    'horizontal_flip': flip_horizontal,
    'vertical_flip': flip_vertical,
    'sigmoid': sigmoid_correction,
    'gamma_bright': random_gamma_bright,
    'gamma_dark': random_gamma_dark,
    'crop': crop,
    'shear': shear,
}

In [None]:
# Check files in each folders before applying data augmentation
num_files_indir = {}
for root, dirs, files in os.walk(new_base_dir):
  if root != new_base_dir:
    new_root = root.replace('/content/_Images_split/train/', '')  
    num_files_indir[new_root] = len(files)
print('Number of images in each class: ', num_files_indir)

max_key = max(num_files_indir, key=lambda k: num_files_indir[k])
MAX_NUM_IMAGES = num_files_indir[max_key]
print('Maximum number of class is \"{}\" with {} files.'.format(max_key, MAX_NUM_IMAGES))

Number of images in each class:  {'MMC': 28, 'Polymer': 30, 'PMC': 21, 'Ceramic': 17, 'Metal': 66, 'CMC': 13}
Maximum number of class is "Metal" with 66 files.


In [None]:
# Changing directory to base directory
os.chdir(new_base_dir)
# Listing all directories, each of which represents a class for classification
list_class = os.listdir()
list_class

['MMC', 'Polymer', 'PMC', 'Ceramic', 'Metal', 'CMC']

In [None]:
os.chdir(new_base_dir)
print(os.getcwd())

stop_flag = 0
for temp in range (len(list_class)):
    os.chdir(list_class[temp])
    list_images = os.listdir()
    number_of_images = len(list_images)

    # Define how many augumented images of original image to be added
    MAX_NUM_IMAGES = 200
    NUM_OF_AUG_IMAGES = MAX_NUM_IMAGES / number_of_images
    print('Number of augmented images: ', round(NUM_OF_AUG_IMAGES))

    # Set threshold to generate the maximum number of augmented images
    THRESHOLD = MAX_NUM_IMAGES - number_of_images
    threshold_count = 0
    
    for image in range (number_of_images):
        if stop_flag == 1 or NUM_OF_AUG_IMAGES <= 1:
          stop_flag = 0
          break

        #print(image)
        print(list_images[image])

        i = 1
        while i <= round(NUM_OF_AUG_IMAGES):
            if (THRESHOLD - threshold_count) != 0:
              # read image using skimage
              image_to_augument = ski.io.imread(list_images[image])
              augumented_image = None
              
              key = random.choice(list(augument_method))
              print(key)
              augumented_image = augument_method[key](image_to_augument)    
              
              # write image to the disk
              new_file_path = 'ai_%s' % (i) + list_images[image]
              io.imsave(new_file_path, augumented_image.astype("uint8"))
              i += 1
              
              # increase threshold count
              threshold_count += 1
            else:
              stop_flag = 1
              break   

    os.chdir("../")

/content/_Images_split/train
Number of augmented images:  7
SLM_MMC_33.png
horizontal_flip
gamma_dark
crop
crop
sigmoid
gamma_bright
gamma_bright
SLM_MMC_49.png
shear
crop
shear
sigmoid
crop
horizontal_flip
shear
SLM_MMC_83.png
sigmoid
gamma_dark
horizontal_flip
gamma_bright
vertical_flip
vertical_flip
shear
SLM_MMC_41.png
shear
sigmoid
crop
crop
gamma_dark
gamma_bright
gamma_dark
SLM_MMC_48.png
sigmoid
shear
vertical_flip
gamma_dark
horizontal_flip
crop
gamma_dark
SLM_MMC_22.png
gamma_dark
gamma_dark
shear
horizontal_flip
horizontal_flip
gamma_bright
crop
SLM_MMC_1.png
gamma_dark
horizontal_flip
crop
horizontal_flip
crop
vertical_flip
vertical_flip
SLM_MMC_20.png
horizontal_flip
sigmoid
vertical_flip
gamma_bright
crop
shear
gamma_dark
SLM_MMC_142.png
sigmoid
horizontal_flip
shear
gamma_dark
sigmoid
gamma_bright
gamma_dark
SLM_MMC_30.png
shear
vertical_flip
gamma_dark
horizontal_flip
shear
gamma_dark
crop
SLM_MMC_19.png
crop
sigmoid
shear
vertical_flip
gamma_dark
vertical_flip
gamma_da

In [None]:
# check number of images in each folder after data augmentation 
new_num_files_indir = {}
for root, dirs, files in os.walk(new_base_dir):
  if root != new_base_dir:
    new_root = root.replace('/content/_Images_split/train/', '')  
    new_num_files_indir[new_root] = len(files)
print('Number of augmented images in each class: ', new_num_files_indir)

Number of augmented images in each class:  {'MMC': 200, 'Polymer': 200, 'PMC': 200, 'Ceramic': 200, 'Metal': 200, 'CMC': 200}


#### 3) Export images in zip (Optional)

In [None]:
# zip augmented images
!zip -r /content/AUG_Material_images_2nd_labels_REDO_200.zip /content/_Images_split

  adding: content/_Images_split/ (stored 0%)
  adding: content/_Images_split/val/ (stored 0%)
  adding: content/_Images_split/val/MMC/ (stored 0%)
  adding: content/_Images_split/val/MMC/SLM_MMC_22.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_30.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_19.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_63.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_7.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_50.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_28.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_8.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_144.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_76.png (deflated 1%)
  adding: content/_Images_split/val/MMC/SLM_MMC_55.png (deflated 1%)
  adding: content/_Images_split/val/Polymer/ (stored 0%)
  adding: content/_Images_split/val/Po

In [None]:
from google.colab import files
# download zipped augmented images
#files.download("/content/AUG_Material_images_2nd_labels.zip")