# Notebook 2 - Offline Data Augmentation

### 1. Setup constants

In [1]:
from constants import *

NUM_AUG_IMAGES_WANTED = 6000
IMAGE_SIZE = 224

### 2. Imports and notebook setup

In [2]:
# Set up multiple outputs for cells
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Printing with markdown
from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

In [3]:
# Default imports
import os
import random
import shutil
from send2trash import send2trash
import numpy as np
import pandas as pd

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow import set_random_seed

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### 3. Data augmentation offline and save to directory

In [4]:
# Check number of examples of each class in the train set
for cls in BINARY_CLASSES:
    print(cls, len(os.listdir(os.path.join(TRAIN_PATH, cls))))

other 6248
mel 3122


In [5]:
np.random.seed(100)
set_random_seed(100)

# Note that we are not augmenting class 'nv' which is the majority class
aug_classes = [x for x in BINARY_CLASSES if x != 'other']

for cls in aug_classes:
    aug_dir = 'aug_dir'
    os.mkdir(aug_dir)
    
    img_dir = os.path.join(aug_dir, 'img_dir')
    os.mkdir(img_dir)

    images = os.listdir(os.path.join(TRAIN_PATH, cls))

    for img in images:
            origin = os.path.join(TRAIN_PATH, cls, img)
            destiny = os.path.join(img_dir, img)
            _ = shutil.copyfile(origin, destiny)

    save_path = os.path.join(TRAIN_PATH, cls)
    
    datagen = ImageDataGenerator(rotation_range=180,
                                 width_shift_range=0.1,
                                 height_shift_range=0.1,
                                 zoom_range=0.1,
                                 horizontal_flip=True,
                                 vertical_flip=True,
                                 fill_mode='constant',
                                 cval=0,
                                 brightness_range=(0.9, 1.1)
                                )

    batch_size = 32

    aug_datagen = datagen.flow_from_directory(aug_dir,
                                              save_to_dir=save_path,
                                              save_format=IMG_FORMAT,
                                              target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                              batch_size=batch_size)

    num_files = len(os.listdir(img_dir))
    num_batches = int(np.ceil((NUM_AUG_IMAGES_WANTED - num_files) / batch_size))

    # run the generator and create about 6000 augmented images
    for i in range(0, num_batches):
        imgs, labels = next(aug_datagen)
    
    # delete temporary directory with the raw image files
    send2trash(aug_dir)

Found 3122 images belonging to 1 classes.


In [6]:
# Check examples of each class
for cls in BINARY_CLASSES:
    print(cls, len(os.listdir(os.path.join(DATA_PROCESSED_DIR_PATH, SPLIT_DIRS[0], cls))))

other 6248
mel 6002
