# Preamble

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set(style="white")

# Allows for interactive shell - outputs all non variable statements
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
np.set_printoptions(precision=4, linewidth=100)

from matplotlib import pyplot as plt

from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input, decode_predictions
import numpy as np

model = VGG16(weights='imagenet', include_top=True)

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
import os
import shutil
from glob import glob
np.random.seed(10)

current_dir = os.getcwd()
DATASET_DIR=os.path.join(current_dir, 'dataset')
CROSSVALID_DIR=os.path.join(DATASET_DIR, 'cross_valid')
TRAIN_DIR = os.path.join(DATASET_DIR, 'train')
TEST_DIR = os.path.join(DATASET_DIR, 'test')
CROSSVALID_DIR = os.path.join(DATASET_DIR, 'cross_valid')
SAMPLE_DIR = os.path.join(DATASET_DIR, 'sample')

WEIGHTS_DIR = os.path.join(current_dir, 'weights')

# Use Keras Vgg16 to get the predictions

* Download the dataset in the current directory.
```
kg download -c 'dogs-vs-cats-redux-kernels-edition'
```
* Inspect the data
* Prepare a single image
* Feed it into pretrained vgg16

## Inspect the data

Graph the image

In [None]:
# Unzip a single file to test on the pretrained model
!unzip -oj "test.zip" "test/1.jpg" -d "/tmp/cats_dogs"

# Load the image
img_path = '/tmp/cats_dogs/1.jpg'
img = image.load_img(img_path, target_size=(224, 224))

# Plot the single image
f = plt.figure(figsize=(10, 5))
sp = f.add_subplot(1, 1, 1) ## (rows, cols, index)
sp.axis('On')
sp.set_title(img_path, fontsize=16)
plt.imshow(img)

## Predict using Keras Vgg16

In [None]:
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
preds = model.predict(x)
decode_predictions(preds)

In [None]:
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
preds = model.predict(x)
decode_predictions(preds)

# Kaggle Competition

## Synopsis

1. Prepare dataset
    1. Download the dataset
    1. Unzip training and test dataset
    1. Create the training, validation, sample batch dataset
    1. Create the labels
1. Model preparation
    1. Finetune the keras model
       1. Pop the last layer, freeze all layers, add a softmax layer and update set of classes
    1. Fit the keras model
       1. Train the updated keras model
    1. Save and load the model after couple of epochs
1. Perform predictions
1. Debug
   1. View the confusion matrix
   1. Visual Inspection
       1. Inspect correct labels
       1. Inspect incorrect labels
       1. Inspect correct labels with high probability
       1. Inspect incorrect label with high probability
       1. Inspect correct labels with medium probability
1. Kaggle Submission
    1. Prepare csv file
    1. Submit

## Model preparation

### Finetune the keras model
* Pop the last layer, freeze all layers, add a softmax layer and update set of classes

In [3]:
from keras.layers.core import Dense
from keras.models import Model
from keras.optimizers import Adam

ttl_outputs = 2
learning_rate = 0.01

base_model = VGG16(weights='imagenet', include_top=True)

inputs = base_model.input
outputs = Dense(ttl_outputs, activation='softmax')(base_model.output)

model = Model(inputs=inputs, outputs=outputs)

for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(lr = learning_rate), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

### Fit and save the keras model

* Train the updated keras model with the new data for couple of epochs

In [None]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

batch_size = 4
epochs = 3
train_dir = SAMPLE_DIR + '/train'
crossvalid_dir = SAMPLE_DIR + '/cross_valid'

nb_train_samples = sum([len(files) for r, d, files in os.walk(train_dir)])
nb_validation_samples = sum([len(files) for r, d, files in os.walk(crossvalid_dir)])

def process_img(img_np):
    #print(img_path.shape)
    #img = image.load_img(img_path, target_size=(224,244))
    #f = img.img_to_array(img)
    f = np.expand_dims(img_np, axis=0)
    f = preprocess_input(f)
    return f

# datagen = ImageDataGenerator(preprocessing_function=process_img)
datagen = ImageDataGenerator(
    data_format='channels_last',
    preprocessing_function=process_img)

train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical'
)

validation_generator = datagen.flow_from_directory(
    crossvalid_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical'
)
# add preprocessing to the image?

model.fit_generator(
        train_generator,
        steps_per_epoch=nb_train_samples // batch_size,
        epochs=1,
        validation_data=validation_generator,
        validation_steps=nb_validation_samples // batch_size)

In [None]:
classes = list(iter(train_generator.class_indices))
for c in train_generator.class_indices:
    classes[train_generator.class_indices[c]] = c

train_generator.class_indices
classes

for epoch in range(epochs):
    model.fit_generator(
        train_generator,
        steps_per_epoch=nb_train_samples // batch_size,
        epochs=1,
        validation_data=validation_generator,
        validation_steps=nb_validation_samples // batch_size)

    os.makedirs(WEIGHTS_DIR, exist_ok=True)
    model.save_weights(os.path.join(WEIGHTS_DIR, 'intial_sample_run_{}.h5'.format(epoch)))

## Perform predictions

In [None]:
model.load_weights(os.path.join(WEIGHTS_DIR, 'intial_sample_run_2.h5'))

def get_data_as_np(path, batch_size=5):
    batches = datagen.flow_from_directory(
        path,
        target_size=(224, 224),
        batch_size=10,
        class_mode=None,
        shuffle=False
    )
    return np.concatenate([batches.next() for i in range(len(batches))])

model.predict(get_data_as_np(crossvalid_dir, 5), batch_size=5)


In [None]:

test_batches = self.get_batches(path, shuffle=False, batch_size=batch_size, class_mode=None)
test_batches, self.model.predict_generator(test_batches, test_batches.nb_sample)

preds[1:4]
preds.shape