**Transfer Learning**

Do Transfer Learning using VGG16 to classify a cat vs. dog.

**Inspiration**

* https://www.kaggle.com/jeffd23/catdognet-keras-convnet-starter/notebook
* https://www.kaggle.com/inoryy/api-for-keras-models-pre-trained-angles/notebook
* https://gist.github.com/carlos-aguayo/7786f6150ee6702a7416d5dd4e925fb9
* https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

In [None]:
import os
import numpy as np
import os, cv2
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
import matplotlib.pyplot as plt
%matplotlib inline 

In [None]:
print(os.listdir("../input"))
print(os.listdir("../input/c/dogs-vs-cats-redux-kernels-edition"))

In [None]:
!unzip "../input/c/dogs-vs-cats-redux-kernels-edition/train.zip" >> /dev/null
!unzip "../input/c/dogs-vs-cats-redux-kernels-edition/test.zip" >> /dev/null

In [None]:
vgg16_weights = "../input/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5"

In [None]:
from keras.applications import VGG16

In [None]:
ROWS, COLS = 150, 150
img_width, img_height = 150, 150
CHANNELS = 3

In [None]:
vgg16_model = VGG16(include_top=False, weights=vgg16_weights, input_shape=(img_width, img_height, CHANNELS))
print (vgg16_model.summary())

In [None]:
# TRAIN_DIR = '../input/c/dogs-vs-cats-redux-kernels-edition/train/'
# TEST_DIR = '../input/c/dogs-vs-cats-redux-kernels-edition/test/'

TRAIN_DIR = 'train/'
TEST_DIR = 'test/'

In [None]:
# os.listdir(TRAIN_DIR)
# is something like:
# ['cat.9491.jpg', 'cat.11613.jpg', 'cat.11841.jpg', 'dog.423.jpg', 'cat.11501.jpg', ...]

train_images = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)] # use this for full dataset
train_dogs =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'dog' in i]
train_cats =   [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR) if 'cat' in i]

test_images =  [TEST_DIR+i for i in os.listdir(TEST_DIR)]

print("Total training images: {:,}".format(len(train_images)))
print("Total test images: {:,}".format(len(test_images)))

In [None]:
# slice datasets for memory efficiency on Kaggle Kernels, delete if using full dataset
train_images = train_dogs[:1000] + train_cats[:1000]
test_images =  test_images[:25]

In [None]:
labels = []
for i in train_images:
    if 'dog' in i.replace('dogs-vs-cats-redux-kernels-edition', ''):
        labels.append(1)
    else:
        labels.append(0)
        
print ("len labels: {:,}".format(len(labels)))
print ("number of dogs: {:,}".format(np.array(labels).sum()))

In [None]:
%%time 

def read_image(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR) #cv2.IMREAD_GRAYSCALE
    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)

def prep_data(images):
    count = len(images)
    data = np.ndarray((count, ROWS, COLS, CHANNELS), dtype=np.uint8)

    for i, image_file in enumerate(images):
        image = read_image(image_file)
        data[i] = image
        if i%250 == 0: print('Processed {} of {}'.format(i, count))
    
    return data

train = prep_data(train_images)
test = prep_data(test_images)

In [None]:
print("Train shape: {}".format(train.shape))
print("Test shape: {}".format(test.shape))

In [None]:
def show_cats_and_dogs(idx):
    cat = read_image(train_cats[idx])
    dog = read_image(train_dogs[idx])
    pair = np.concatenate((cat, dog), axis=1)
    plt.axis("off")
    plt.figure(figsize=(10,5))
    plt.imshow(cv2.cvtColor(pair, cv2.COLOR_BGR2RGB))
    plt.show()
    
for idx in range(0,5):
    show_cats_and_dogs(idx)

In [None]:
datagen = ImageDataGenerator(rescale=1. / 255)
generator = datagen.flow(train,
                         labels,
                         shuffle=False)

In [None]:
%%time

bottleneck_features_train = vgg16_model.predict_generator(generator)

In [None]:
nb_train_samples = bottleneck_features_train.shape[0]
epochs = 50
batch_size = 16

train_data = bottleneck_features_train
train_labels = np.array([0] * int(nb_train_samples / 2) + [1] * int(nb_train_samples / 2))

In [None]:
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
print (model.summary())

In [None]:
%%time

model.fit(train_data, 
          train_labels,
          epochs=epochs,
          batch_size=batch_size,
          verbose=2)

In [None]:
complete_model = Model(inputs=vgg16_model.input, outputs=model(vgg16_model.output))

In [None]:
%%time

predictions = complete_model.predict(test)

In [None]:
for i in range(0,10):
    if predictions[i, 0] >= 0.5: 
        print('I am {:.2%} sure this is a Cat'.format(predictions[i][0]))
    else: 
        print('I am {:.2%} sure this is a Dog'.format(1-predictions[i][0]))
        
    plt.imshow(cv2.cvtColor(test[i], cv2.COLOR_BGR2RGB))  
    plt.axis("off")
    plt.show()

In [None]:
from skimage import io

img_url = 'https://yt3.ggpht.com/ytc/AKedOLRvxGYSdEHqu0X4EYcJ2kq7BttRKBNpfwdHJf3FSg=s900-c-k-c0x00ffffff-no-rj'

img = io.imread(img_url)
img = cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
img = img / 255.0
img = img.reshape(1,ROWS,COLS,3)

prediction = complete_model.predict(img)[0][0]
if prediction >= 0.5:
    print('I am {:.2%} sure this is a Cat'.format(prediction))
else: 
    print('I am {:.2%} sure this is a Dog'.format(1-prediction))
        
plt.imshow(img[0], 'Blues')
plt.axis("off")
plt.show()