### Task Details

Trains a simple convnet on the CIFAR10 dataset for ONLY cats and dogs.
The focus of this is to understand image augmentation well enough to do custom work.

So don't worry as much about accuracy, worry more about adding augmentation to the existing method, 
understanding it, explaining it, and if it is significant enough, then trying to merge it into Keras.

TASK DETAILS:
Make a copy of:
from keras.preprocessing.image import ImageDataGenerator

Add functionality to ImageDataGenerator() beyond what is offered. You have flexibility here, one idea would be to add image histogram modification methods such as this: http://scikit-image.org/docs/dev/auto_examples/color_exposure/plot_equalize.html

You are welcome to do anything you want related to ImageDataGenerator to make it better, but histogram modification could be a great starting point. With your work it helps to show before/after modifications in your notebook/blog. In the end this work is for you, to help give you visibility, so focus more on something that can be shared on LinkedIn rather than just a notebook. 

13 seconds per epoch on a 2 GHz Intel Core i5.

In [None]:
import keras
from keras import backend as K
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator

#----------------------------------------------------------

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Dropout, Flatten

#----------------------------------------------------------

import warnings
import numpy as np

#----------------------------------------------------------

from skimage.io import imread
from skimage import exposure, color
from skimage import data, img_as_float

#----------------------------------------------------------

%matplotlib inline  
import matplotlib.pyplot as plt

### Load Data - CIFAR 

In [None]:
# Initialize Parameters
epochs = 10
num_classes = 2
batch_size = 64

# input image dimensions
img_rows, img_cols = 32, 32   

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = cifar10.load_data()   

# Only look at cats [=3] and dogs [=5]
train_picks = np.ravel(np.logical_or(y_train==3,y_train==5))  
test_picks = np.ravel(np.logical_or(y_test==3,y_test==5))     

# Initialize training and test data corresponding to 3s and 5s
y_train = np.array(y_train[train_picks]==5,dtype=int)
y_test = np.array(y_test[test_picks]==5,dtype=int)

x_train = x_train[train_picks]
x_test = x_test[test_picks]

### Preprocess - Image Samples

In [None]:
# Convert test and training data to Tensor
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 3, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 3, img_rows, img_cols)
    input_shape = (3, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 3)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 3)
    input_shape = (img_rows, img_cols, 3)
  

In [None]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(np.ravel(y_train), num_classes)
y_test = keras.utils.to_categorical(np.ravel(y_test), num_classes)

In [None]:
# Normalize test and training data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train /= 255
x_test /= 255

In [None]:
# Display Train and Test Sample Count
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

### Define - ConvNet Architecture

In [None]:
model = Sequential()
model.add(Conv2D(4, kernel_size=(3, 3),activation='relu',input_shape=input_shape))
model.add(Conv2D(8, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

In [None]:
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

### Function - Contrast Adjustment Of Image Using CLAHE Algorithm

In [None]:
def image_contrast_adjusment(img):
    
    # Convert pixel intensities to float value
    img_start = img_as_float(img)

    # Transorm image to HSV color spaace
    img_hsv = color.rgb2hsv(img_start)

    # Retrieve the value/brightness componenet of image
    brightness = img_hsv[:,:,2]

    # Apply CLAHE algorithm on brighntess component of image to adjust image contrast 
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        b_adapteq = exposure.equalize_adapthist(brightness, clip_limit=0.03, nbins=48)

    # Restore brighntess component subject to CLAHE to original HSV color space
    img_hsv[:,:,2] = b_adapteq

    # Convert image from HSV to RGB color space
    img_end = color.hsv2rgb(img_hsv)
    
    return img_end

### Train Model

In [None]:
augmentation=True

if augmentation==True:

    # Data/Image Augmentation Parameters With CLAHE Algorithm
    datagen = ImageDataGenerator(
            zoom_range=0.2,
            shear_range=0.2,
            rotation_range=40,
            fill_mode='nearest',        
            horizontal_flip=True,
            preprocessing_function = image_contrast_adjusment)
    
    datagen.fit(x_train)
    
    print("Running augmented training now, with augmentation")
    history = model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=x_train.shape[0] // batch_size,
                    epochs=epochs,
                    validation_data=(x_test, y_test))
else:
    print("Running regular training, no augmentation")
    history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))

### Visualize - Accuracy Of Convnet Model

In [None]:
plt.plot(history.epoch,history.history['val_acc'],'-o',label='validation')
plt.plot(history.epoch,history.history['acc'],'-o',label='training')

plt.grid(True)
plt.legend(loc=0)

plt.xlabel('epochs')
plt.ylabel('accuracy')