In [26]:
'''This script goes along the blog post
"Building powerful image classification models using very little data"
from blog.keras.io.
It uses data that can be downloaded at:
https://www.kaggle.com/c/dogs-vs-cats/data
In our setup, we:
- created a data/ folder
- created train/ and validation/ subfolders inside data/
- created cats/ and dogs/ subfolders inside train/ and validation/
- put the cat pictures index 0-999 in data/train/cats
- put the cat pictures index 1000-1400 in data/validation/cats
- put the dogs pictures index 12500-13499 in data/train/dogs
- put the dog pictures index 13500-13900 in data/validation/dogs
So that we have 1000 training examples for each class, and 400 validation 
examples for each class.
In summary, this is our directory structure:
```
data/
    train/
        dogs/
            dog001.jpg
            dog002.jpg
            ...
        cats/
            cat001.jpg
            cat002.jpg
            ...
    validation/
        dogs/
            dog001.jpg
            dog002.jpg
            ...
        cats/
            cat001.jpg
            cat002.jpg
            ...
```
'''
import numpy as np
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications

train_data_dir = '../data/train'
validation_data_dir = '../data/validation'

train_features_path = 'VGG16_exp1_train_features.npy'
train_labels_path = 'VGG16_exp1_train_labels.npy'
validation_features_path = 'VGG16_exp1_validation_features.npy'
validation_labels_path = 'VGG16_exp1_validation_labels.npy'
weights_path = 'VGG16_exp1_top_model.h5'

# TODO: set properly
width, height = 150, 150
train_samples = 1152
validation_samples = 288
batch_size = 4
epochs = 20

# build the VGG16 network
# VGG16 = applications.VGG16(include_top=False, weights='imagenet')

In [9]:
def generate_train_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(width, height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    train_features = VGG16.predict_generator(generator,
                                             train_samples // batch_size)
    np.save(open(train_features_path, 'w'), train_features)

In [5]:
def generate_train_labels():
    datagen = ImageDataGenerator(rescale=1. / 255)
    generator = datagen.flow_from_directory(
        train_data_dir,
        batch_size=batch_size * (nb_train_samples // batch_size),
        class_mode='categorical',
        shuffle=False)
    train_labels = generator.next()[1]
    np.save(open(train_labels_path, 'w'), train_labels)

In [10]:
def generate_validation_features():
    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(width, height),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
    validation_features = VGG16.predict_generator(
        generator, validation_samples // batch_size)
    np.save(open(validation_features_path, 'w'), validation_features)

In [7]:
def generate_validation_labels():
    generator = datagen.flow_from_directory(
        validation_data_dir,
        batch_size=batch_size * (nb_validation_samples // batch_size),
        class_mode='categorical',
        shuffle=False)
    validation_labels = generator.next()[1]
    np.save(open(validation_labels_path, 'w'), validation_labels)

In [None]:
def train_top_model():
    train_features = np.load(open(train_features_path)).reshape((1152, 4, 512))
    train_labels = np.load(open(train_labels_path))

    validation_features = np.load(open(validation_features_path)).reshape(
        (288, 4, 512))
    validation_labels = np.load(open(validation_labels_path))

    print train_features.shape
    print train_labels.shape
    print validation_features.shape
    print validation_labels.shape

    model = Sequential()
    model.add(Flatten(input_shape=train_features.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(21, activation='softmax'))

    model.compile(
        optimizer='rmsprop',
        loss='categorical_crossentropy',
        metrics=['accuracy'])

    model.fit(train_features,
              train_labels,
              batch_size=batch_size,
              nb_epoch=epochs,
              validation_data=(validation_features, validation_labels))
    model.save_weights(weights_path)


# generate_train_features()
# generate_train_labels()
# generate_validation_features()
# generate_validation_labels()

train_top_model()

# revisar esto para hacer checkpoints
# http://stackoverflow.com/questions/35074549/how-to-load-a-model-from-an-hdf5-file-in-keras