# CNN Transfer Learning on Cats-Dogs Classification


### VGG16 trained on ImageNet data is used as pre-trained model from which to extract features.  Features are then saved, and passed through neural network with ReLu hidden layer to classify cats vs. dogs.

#### Adapted from fchollet/classifier_from_little_data_script_2.py (https://gist.github.com/fchollet/f35fbc80e066a49d65f1688a7e99f069) and blog https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

In [None]:
import keras

In [None]:
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dropout, Flatten, Dense
from keras import backend as K
from keras import applications
import numpy as np

In [None]:
import tensorflow as tf
print <<<FILL-IN>>>
print <<<FILL-IN>>>

In [None]:
# Set logging level
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

In [None]:
# Set random generator seed
from numpy.random import seed
seed(1)

from tensorflow import set_random_seed
set_random_seed(2)

### Set dimensions, number, and location of images

In [None]:
# Image dimensions
img_width, img_height = <<<FILL-IN>>>

# Location of images
train_data_dir = <<<FILL-IN>>>
validation_data_dir = <<<FILL-IN>>>

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)
    
print (input_shape)

# Number of images
nb_train_samples = <<<FILL-IN>>>
nb_validation_samples = <<<FILL-IN>>>

# Batch size
batch_size = 16

### Method to extract features from pre-trained network and save features

In [None]:
def save_features():
    
    # Scale pixel values in image
    datagen = ImageDataGenerator(rescale=1. / 255)

    # Load the VGG16 network's imagenet weights, not including the last fully connected layers.
    model = applications.VGG16(include_top=False, weights='imagenet')

    # Generator that will read pictures found in subfolders of training data directory,
    # and indefinitely generate batches of image data (scaled)
    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,        # Generator will only yield batches of data, no labels
        shuffle=False)          # Data will be presented in order, i.e., 1000 cat images, then 1000 dog images
    
    # The predict_generator method returns the output of the model, given input provided by a generator
    # that yields batches of numpy data
    features_train = model.predict_generator(
        generator, nb_train_samples // batch_size)
    
    # Save model outputs (i.e., features) from model as numpy array
    np.save('features_train.npy', features_train) 

    # Generator to generator validation input for model
    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    
    # Get model output for validation data
    features_validation = model.predict_generator(
        generator, nb_validation_samples // batch_size)
    
    # Save model outputs (i.e., features) for validation data
    np.save('features_validation.npy', features_validation) 
    
    # Print out model architecture
    model.summary()

### Call method to extract and save features from pre-trained network

In [None]:
<<<FILL-IN>>>

### Load saved features

In [None]:
# Load saved features for train data
train_data = <<<FILL-IN>>>
    
# Create labels for train data.  Images were generated in order, so creating labels is easy.
train_labels = np.array([0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2)) 

# Load saved features for validation data
validation_data = <<<FILL-IN>>> 
    
# Create labels for validation data
validation_labels = np.array([0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2)) 
    
print (train_labels.shape, validation_labels.shape)

### Create top model

In [None]:
# Create fully connected layer on top of model
top_model = Sequential()
top_model.add(Flatten(input_shape=train_data.shape[1:]))  # Convert 3D feature maps to 1D feature vectors
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))

# Create model
top_model.compile(optimizer='rmsprop',
              loss='binary_crossentropy', 
              metrics=['accuracy'])
print(top_model)
top_model.summary()


### Train top model

In [None]:
# Set up model
epochs = <<<FILL-IN>>>

# Train model, keeping track of history
from keras.callbacks import History
hist = top_model.fit(train_data, train_labels,
                 epochs=epochs,
                 batch_size=batch_size,
                 validation_data=(validation_data, validation_labels))

### Save model and weights

In [None]:
# Save model & weights to HDF5 file
top_model_file = <<<FILL-IN>>> 
top_model.save(top_model_file + '.h5')

# Save model to JSON file & weights to HDF5 file
top_model_json = top_model.to_json()
with open(top_model_file + '.json','w') as json_file:
    json_file.write(top_model_json)
top_model.save_weights(top_model_file+'-wts.h5')

# Results on validation set
print (top_model.metrics_names)
results = top_model.evaluate (validation_data, validation_labels)
print (results)

### Load model again and test

In [None]:
top_model2 = keras.models.load_model(top_model_file+'.h5')
print (validation_labels.shape)

print (top_model2.metrics_names)
results = top_model2.evaluate(validation_data, validation_labels)
print <<<FILL-IN>>>

### Print training history

In [None]:
print (hist.history)

### Plot accuracies

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

result = hist.history

# summarize history for accuracy
plt.plot(result['acc'])
plt.plot(result['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()