# Notebook 2 - Offline Data Augmentation

### 1. Setup constants

In [1]:
from constants import *

NUM_AUG_IMAGES_WANTED = 1000

IMAGE_SIZE = 224

### 2. Imports and notebook setup

In [2]:
# Set up multiple outputs for cells
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Printing with markdown
from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

In [3]:
# Default imports
import os
import random
import shutil
from send2trash import send2trash
import numpy as np
import pandas as pd

### 3. Data augmentation offline and save to directory

In [4]:
# Check number of examples of each class in the train set
for cls in CLASSES_2019:
    print(cls, len([x for x in os.listdir(os.path.join(TRAIN_PATH, cls)) if x[0] != '.']))

ak 616
bcc 2369
bkl 1804
df 163
nv 3500
vasc 185
mel 3207
scc 452


In [5]:
###########################################################
# Augmentation for classes other than melanoma
###########################################################
np.random.seed(100)

aug_classes = [x for x in CLASSES_2019 if x != 'mel']

for cls in aug_classes:
    images = [x for x in os.listdir(os.path.join(TRAIN_PATH, cls)) if x[0] != '.']
    
    if len(images) < NUM_AUG_IMAGES_WANTED:
        remainder = NUM_AUG_IMAGES_WANTED % len(images)
        multiple = -1 + round((NUM_AUG_IMAGES_WANTED - remainder) / len(images))
        extra_imgs = np.random.choice(images, size=remainder, replace=False)
        
        if multiple > 0:
            for img in images:
                    for i in range(multiple):
                        origin = os.path.join(TRAIN_PATH, cls, img)
                        destiny = os.path.join(TRAIN_PATH, cls, str(i + 1) + '_' + img)
                        _ = shutil.copyfile(origin, destiny)
        
        if remainder > 0:
            for img in extra_imgs:
                origin = os.path.join(TRAIN_PATH, cls, img)
                destiny = os.path.join(TRAIN_PATH, cls, str(multiple + 1) + '_' + img)
                _ = shutil.copyfile(origin, destiny)

In [6]:
# Check examples of each class
for cls in CLASSES_2019:
    print(cls, len([x for x in os.listdir(os.path.join(DATA_PROCESSED_DIR_PATH, SPLIT_DIRS[0], cls)) if x[0] != '.']))

ak 1000
bcc 2369
bkl 1804
df 1000
nv 3500
vasc 1000
mel 3207
scc 1000


In [7]:
os.mkdir(os.path.join(TRAIN_PATH, 'other'))

In [8]:
for cls in aug_classes:
    images = [x for x in os.listdir(os.path.join(TRAIN_PATH, cls)) if x[0] != '.']
    for img in images:
        origin = os.path.join(TRAIN_PATH, cls, img)
        destiny = os.path.join(TRAIN_PATH, 'other', img)
        _ = shutil.copyfile(origin, destiny)

In [9]:
for cls in aug_classes:
    send2trash(os.path.join(TRAIN_PATH, cls))

In [10]:
len([x for x in os.listdir(os.path.join(TRAIN_PATH, 'other')) if x[0] != '.'])

11673