# Trying different Image dropout implementations for Tensorflow.

**Todo**
* Create Benchmark performance for each
* Remove for loop implementation on Hide and Seek Dropout

Code implementation primarily comes from Chris Deotte great [post](https://www.kaggle.com/c/siim-isic-melanoma-classification/discussion/169721) on TPU/GPU augmentation.


In [None]:
from matplotlib import pyplot as plt
import math, os, re, gc
import numpy as np, pandas as pd
from sklearn.metrics import classification_report, accuracy_score

import tensorflow as tf
import tensorflow.keras.backend as K

import pandas as pd
import random

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
train_mnist = pd.read_csv("../input/fashionmnist/fashion-mnist_train.csv")

### Dropout Code

In [None]:
def random_dropout(image, p = 0.5, s_l = 0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, seed=None):
    
    probability = tf.cast(tf.random.uniform([], 0, 1) < p, tf.int32)
    if (probability == 0): return image
    
    img_height, img_width, img_depth = image.shape
    
    s = tf.random.uniform(shape=[], minval=s_l, maxval=s_h, dtype = tf.float32) * img_height * img_width
    r =  tf.random.uniform(shape=[], minval=r_1, maxval=r_2, dtype =  tf.float32)
    width = tf.cast(tf.math.sqrt(s / r), tf.int32)
    height = tf.cast(tf.math.sqrt(s * r), tf.int32)

    left = tf.random.uniform(shape=[], minval=0, maxval=img_width, dtype =  tf.int32)
    bottom = tf.random.uniform(shape=[], minval=0, maxval=img_height, dtype =  tf.int32)

    ya = tf.math.maximum(0, bottom)
    yb = tf.math.minimum(img_height, bottom+height)
    xa = tf.math.maximum(0, left)
    xb = tf.math.minimum(img_width, left+width)


    middle_one = image[ya:yb,0:xa,:]
    middle_two = tf.zeros([yb-ya,xb-xa,img_depth], dtype=tf.int64) 
    middle_three = image[ya:yb,xb:img_width,:]

    middle = tf.concat([middle_one,middle_two,middle_three],axis=1)
    image = tf.concat([image[0:ya,:,:],middle,image[yb:img_height,:,:]],axis=0)

    return image

def has_dropout(image, p=0.5, hide_p=0.25, grid_sizes = [3, 6, 8]):
    
    probability = tf.cast(tf.random.uniform([], 0, 1) < p, tf.int32)
    
    if (probability == 0)|(len(grid_sizes) == 0)|(hide_p == 0): return image
    
    img_height, img_width, img_depth = image.shape
    img_depth = tf.cast(img_depth, tf.int32)
    grid_size = grid_sizes[random.randint(0,len(grid_sizes)-1)] # random grid size
    
    for x in range(0, img_height, grid_size):
        for y in range(0, img_width, grid_size):
            
            hide_probability = tf.cast(tf.random.uniform([], 0, 1) < hide_p, tf.int32)
            
            if (hide_probability == 1):
                xa = tf.cast(x, tf.int32)
                ya = tf.cast(y, tf.int32)
                xb = tf.math.minimum(img_height ,tf.cast(x+grid_size, tf.int32))
                yb = tf.math.minimum(img_width, tf.cast(y+grid_size, tf.int32))

                middle_one = image[ya:yb,0:xa,:]
                middle_two = tf.zeros([yb-ya, xb-xa, img_depth], dtype=tf.int64)
                middle_three = image[ya:yb,xb:img_width,:]
                middle = tf.concat([middle_one, middle_two, middle_three], axis=1)
                image = tf.concat([image[0:ya,:,:],middle,image[yb:img_height,:,:]],axis=0)
                
    return image

def coarse_dropout(image, p=0.5, count=8, size = 0.15):
    
    probability = tf.cast(tf.random.uniform([], 0, 1) < p, tf.int32)
    img_height, img_width, img_depth = image.shape
    
    if (probability == 0)|(count == 0)|(size == 0): return image
    
    for k in range(count):
        
        x = tf.cast( tf.random.uniform([],0,img_width), tf.int32)
        y = tf.cast( tf.random.uniform([],0,img_height), tf.int32)
        
        width = tf.cast(size * img_width, tf.int32) * probability
        ya = tf.math.maximum(0, y-width//2)
        yb = tf.math.minimum(img_height, y+width//2)
        xa = tf.math.maximum(0, x-width//2)
        xb = tf.math.minimum(img_width, x+width//2)
        
        
        # Dropout Image
        middle_one = image[ya:yb,0:xa,:]
        middle_two = tf.zeros([yb-ya, xb-xa, img_depth], dtype=tf.int64)
        middle_three = image[ya:yb,xb:img_width,:]
        middle = tf.concat([middle_one, middle_two, middle_three], axis=1)
        image = tf.concat([image[0:ya,:,:],middle,image[yb:img_height,:,:]],axis=0)
        
    return image

def gridmask_dropout(image, p = 0.5, d1=6, d2=12, ratio=0.5):

    probability = tf.cast(tf.random.uniform([], 0, 1) < p, tf.int32)
    if (probability == 0): return image
    
    img_height, img_width, img_depth = image.shape
    h, w = img_height, img_width
    
    hh = int(np.ceil(np.sqrt((img_height**2)+(img_width**2))))
    hh = hh+1 if hh%2==1 else hh
    
    d = tf.random.uniform(shape=[], minval=d1, maxval=d2, dtype=tf.int32)
    l = tf.cast(tf.cast(d,tf.float32)*ratio+0.5, tf.int32)
    
    st_h = tf.random.uniform(shape=[], minval=0, maxval=d, dtype=tf.int32)
    st_w = tf.random.uniform(shape=[], minval=0, maxval=d, dtype=tf.int32)

    y_ranges = tf.range(-1 * d + st_h, -1 * d + st_h + l)
    x_ranges = tf.range(-1 * d + st_w, -1 * d + st_w + l)
    
    for i in range(0, hh//d+1):
        s1 = i * d + st_h
        s2 = i * d + st_w
        y_ranges = tf.concat([y_ranges, tf.range(s1,s1+l)], axis=0)
        x_ranges = tf.concat([x_ranges, tf.range(s2,s2+l)], axis=0)
    
    x_clip_mask = tf.logical_or(x_ranges <0 , x_ranges > hh-1)
    y_clip_mask = tf.logical_or(y_ranges <0 , y_ranges > hh-1)
    clip_mask = tf.logical_or(x_clip_mask, y_clip_mask)

    x_ranges = tf.boolean_mask(x_ranges, tf.logical_not(clip_mask))
    y_ranges = tf.boolean_mask(y_ranges, tf.logical_not(clip_mask))
    
    hh_ranges = tf.tile(tf.range(0,hh), [tf.cast(tf.reduce_sum(tf.ones_like(x_ranges)), tf.int32)])
    x_ranges = tf.repeat(x_ranges, hh)
    y_ranges = tf.repeat(y_ranges, hh)
    
    y_hh_indices = tf.transpose(tf.stack([y_ranges, hh_ranges]))
    x_hh_indices = tf.transpose(tf.stack([hh_ranges, x_ranges]))

    y_mask_sparse = tf.SparseTensor(tf.cast(y_hh_indices, tf.int64),  tf.zeros_like(y_ranges), [hh, hh])
    y_mask = tf.sparse.to_dense(y_mask_sparse, 1, False)

    x_mask_sparse = tf.SparseTensor(tf.cast(x_hh_indices, tf.int64), tf.zeros_like(x_ranges), [hh, hh])
    x_mask = tf.sparse.to_dense(x_mask_sparse, 1, False)
    
    mask = tf.expand_dims(tf.clip_by_value(x_mask + y_mask, 0, 1), axis=-1)
    
    mask = tf.image.crop_to_bounding_box(mask, (hh-h)//2, (hh-w)//2, img_height, img_width)
    
    if img_depth == 3:
        mask = tf.concat([mask, mask, mask], axis=-1)

    return image * tf.cast(mask, tf.int64)

### Helper Code

In [None]:
def image_reshape(target, image):
    image = tf.reshape(image, (28,28, 1))
    return target, image

def get_dataset(targets, features, augment=False, batch_size=32, gridmask= False, coarse= False, rand_dropout= False, HaS = False):
    
    targets = tf.constant(targets)
    features = tf.constant(features)
    dataset = tf.data.Dataset.from_tensor_slices((targets, features))
    dataset = dataset.map(image_reshape)
    
    if gridmask:
        dataset = dataset.map(lambda target, features: (target, gridmask_dropout(features, p=1)))

    if coarse:
        dataset = dataset.map(lambda target, features: (target, coarse_dropout(features, p=1)))

    if rand_dropout:
        dataset = dataset.map(lambda target, features: (target, random_dropout(features, p=1)))
        
    if HaS:
        dataset = dataset.map(lambda target, features: (target, has_dropout(features, p=1)))
    
    
    dataset = dataset.cache().shuffle(2000)
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    
    return dataset

def show_batch_images(data):
    
    for labels, images in data.take(1):
        labels = labels.numpy()
        images = images.numpy()

        plt.figure(figsize=(24,10))
        col = 5
        row = 2
        for idx, image in enumerate(images[:10]):
            plt.subplot(row, col, idx+1)
            plt.imshow(image)
        plt.show()

# MNIST Raw

In [None]:
%%time
labels_mnist = train_mnist["label"].values
train_data_mnist = train_mnist.drop(columns=["label"]).values
dataset_fashion_mnist = get_dataset(labels_mnist, train_data_mnist)
show_batch_images(dataset_fashion_mnist)

# MNIST Random Dropout

In [None]:
%%time
dataset_fashion_mnist = get_dataset(labels_mnist, train_data_mnist, rand_dropout=True)
show_batch_images(dataset_fashion_mnist)

# MNIST HaS (Hide and Seek) Dropout

In [None]:
%%time
dataset_fashion_mnist = get_dataset(labels_mnist, train_data_mnist, HaS=True)
show_batch_images(dataset_fashion_mnist)

# MNIST COARSE DROPOUT

In [None]:
%%time
dataset_fashion_mnist = get_dataset(labels_mnist, train_data_mnist, coarse=True)
show_batch_images(dataset_fashion_mnist)

# MNIST GRIDMASK DROPOUT

In [None]:
%%time
dataset_fashion_mnist = get_dataset(labels_mnist, train_data_mnist, gridmask=True)
show_batch_images(dataset_fashion_mnist)