## Importing all the required modules

In [52]:
from __future__ import division, print_function
import os
import click
import numpy as np
from PIL import Image, ImageFilter
from tqdm import tqdm
import data1
from data1 import STD, MEAN, BALANCE_WEIGHTS, U, EV, no_augmentation_params
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical


## Initializing arrays for different classes

In [53]:
image_class_0 = []
image_class_1 = []
image_class_2 = []
image_class_3 = []
image_class_4 = []
image_labels_0 = 0 
image_labels_1 = 0
image_labels_2 = 0
image_labels_3 = 0
image_labels_4 = 0


## Read csv file to get all the labels

In [54]:
true_labels = pd.read_csv("temp.csv",index_col=0)
col_name = "level"
image_labels_0 = len(true_labels[true_labels['level']==0].index)
image_labels_1 = len(true_labels[true_labels['level']==1].index)
image_labels_2 = len(true_labels[true_labels['level']==2].index)
image_labels_3 = len(true_labels[true_labels['level']==3].index)
image_labels_4 = len(true_labels[true_labels['level']==4].index)


In [55]:
label0 = [0 for i in range(image_labels_0)]
label1 = [1 for i in range(image_labels_1)]
label2 = [2 for i in range(image_labels_2)]
label3 = [3 for i in range(image_labels_3)]
label4 = [4 for i in range(image_labels_4)]


#### Method required to getting the coordinates of bounding box
Code - https://github.com/sveitser/

In [56]:
def square_bbox(img):
    w, h = img.size
    left = max((w-h)//2, 0)
    upper = 0
    right = min(w - (w-h) // 2, w)
    lower = h
    return (left, upper, right, lower)

## Blur the image and convert it to required size

In [57]:
def convert(fname, crop_size):
    img = Image.open(fname)
    
    blurred = img.filter(ImageFilter.BLUR)
    after_blur = np.array(blurred)
    h, w, _ = after_blur.shape
    
    # why check this?
    # got after analyzing the input
    
    if w > 1.2*h:
        lmax = after_blur[:, : w//32, :].max(axis=(0,1)).astype(int)
        rmax = after_blur[:, -w//32:, :].max(axis=(0,1)).astype(int)
        max_background = np.maximum(lmax, rmax)
        foreground = (after_blur > max_background + 10).astype(np.uint8)
        bbox = Image.fromarray(foreground).getbbox()
        
        if bbox is None:
            print('bbox none for {} (???)'.format(fname))
        else:
            left, upper, right, lower = bbox
            if right - left < 0.8 * h or lower - upper < 0.8 * h:
                print("box too small")
                bbox = None
    else:
        bbox = None
    if bbox is None:
        bbox = square_bbox(img)
    cropped = img.crop(bbox)
    resized = cropped.resize([crop_size, crop_size])
    return resized
       

In [58]:
#def save(img, fname):
#    img.save(fname, quality=97)

In [59]:
#def get_convert_fname(fname, directory, convert_directory):
#    return fname.replace(directory, convert_directory)

### Appending the image arrays to the corresponding image classes

In [60]:
def main(directory, crop_size):
    global image_array_0,image_array_1,image_array_2,image_array_3,image_array_4 
    #try:
    #    os.mkdir(convert_directory)
    #except OSError:
    #    pass
    # get the filenames - 
    filenames = [f for dp, dn, fn in os.walk(directory) 
                for f in fn if f.endswith('jpeg') or f.endswith('tiff')]
    # sort it
    filenames = sorted(filenames)
    #converted_name_to_save = ""
    
    for x in filenames:
        #converted_name_to_save = get_convert_fname(x, directory, convert_directory)
        img = convert(directory+x, crop_size)
        #print(type(img))
        #save(img, converted_name_to_save)
        if true_labels.loc[x[:-5],col_name] == 0:
            image_class_0.append(np.array(img))
        elif true_labels.loc[x[:-5],col_name] == 1:
            image_class_1.append(np.array(img))
        elif true_labels.loc[x[:-5],col_name] == 2:
            image_class_2.append(np.array(img))
        elif true_labels.loc[x[:-5],col_name] == 3:
            image_class_3.append(np.array(img))
        else:
            image_class_4.append(np.array(img))
        #image_array.append(np.array(img))
        #image_labels.append(true_labels.loc[x[23:-5],col_name])
        #print(image_array)

Call the main method and provide the required crop_size

In [61]:
main("try/", 512)

In [62]:
#len(image_labels)
#labels = [to_categorical(y) for y in image_labels]
#labels
#np.array(image_class_0).shape

## Data augmentation done as means to avoid overfitting
### Reason - Class imbalance

In [63]:
x = ImageDataGenerator(featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    zca_epsilon=1e-6,
    rotation_range=30,
    width_shift_range=0.1,
    height_shift_range=0.,
    shear_range=0.,
    zoom_range=[0.9,1.1],
    channel_shift_range=0.,
    fill_mode='nearest',
    cval=0.,
    horizontal_flip=True,
    vertical_flip=False,
    rescale=None,
    preprocessing_function=None,
    )

In [64]:
label0 = to_categorical(label0, num_classes=5)
label1 = to_categorical(label1, num_classes=5)
label2 = to_categorical(label2, num_classes=5)
label3 = to_categorical(label3, num_classes=5)
label4 = to_categorical(label4, num_classes=5)

#### Initializing rounds argument and batch_size for data augmentation using keras


In [65]:
round0 = 1
round1 = 4
round2 = 2
round3 = 6
round4 = 8
total_loop_count = 5

In [66]:
batch_size = 64
no_batches = 0


## Final numpy arrays consisting of images after data_augmentation

In [67]:
final_X = np.empty(shape=(0, 512, 512, 3))
final_Y = np.empty(shape=(0, 5))

In [68]:
def augment(no_batches, X_train_aug):
    global final_X, final_Y
    x_temp = 0
    y_temp = 0
    for i in range(no_batches):
        x_temp = np.array(X_train_aug.next()[0])
        y_temp = np.array(X_train_aug.next()[1])
        #print(x_temp.shape)
        final_X = np.concatenate((final_X, x_temp), axis=0)
        final_Y = np.concatenate((final_Y, y_temp), axis=0)
    

### Code for data augmentation

In [69]:
for loop_count in range(total_loop_count):
    if loop_count==0:
        x.fit(np.array(image_class_0), rounds=round0)
        if image_labels_0 % batch_size==0:
            no_batches = image_labels_0 // batch_size
        else:
            no_batches = image_labels_0 // batch_size + 1
        X_train_aug = x.flow(np.array(image_class_0), label0, seed=0, batch_size=batch_size)
        #augment(no_batches, X_train_aug)
        for i in range(no_batches):
            x_temp = np.array(X_train_aug.next()[0])
            y_temp = np.array(X_train_aug.next()[1])
            #print(x_temp.shape)
            final_X = np.concatenate((final_X, x_temp), axis=0)
            final_Y = np.concatenate((final_Y, y_temp), axis=0)

            
    elif loop_count==1:
        x.fit(np.array(image_class_1), rounds=round1)
        if image_labels_1 % batch_size==0:
            no_batches = image_labels_1 // batch_size
        else:
            no_batches = image_labels_1 // batch_size + 1
        X_train_aug = x.flow(np.array(image_class_1), label1, seed=0, batch_size=batch_size)
        augment(no_batches, X_train_aug)

    elif loop_count==2:
        x.fit(np.array(image_class_2), rounds=round2)
        if image_labels_2 % batch_size==0:
            no_batches = image_labels_2 // batch_size
        else:
            no_batches = image_labels_2 // batch_size + 1
        X_train_aug = x.flow(np.array(image_class_2), label2, seed=0, batch_size=batch_size)
        augment(no_batches, X_train_aug)

    elif loop_count==3:
        x.fit(np.array(image_class_3), rounds=round3)
        if image_labels_3 % batch_size==0:
            no_batches = image_labels_3 // batch_size
        else:
            no_batches = image_labels_3 // batch_size + 1
        X_train_aug = x.flow(np.array(image_class_3), label3, seed=0, batch_size=batch_size)
        augment(no_batches, X_train_aug)

    elif loop_count==4:
        x.fit(np.array(image_class_4), rounds=round4)
        if image_labels_4 % batch_size==0:
            no_batches = image_labels_4 // batch_size
        else:
            no_batches = image_labels_4 // batch_size + 1
        X_train_aug = x.flow(np.array(image_class_4), label4, seed=0, batch_size=batch_size)
        augment(no_batches, X_train_aug)    

In [70]:
np.save("X_DR1.npy", final_X)
np.save("Y_DR1.npy", final_Y)