In [3]:
'''This script goes along the blog post
"Building powerful image classification models using very little data"
from blog.keras.io.
It uses data that can be downloaded at:
https://www.kaggle.com/c/dogs-vs-cats/data
In our setup, we:
- created a data/ folder
- created train/ and validation/ subfolders inside data/
- created cats/ and dogs/ subfolders inside train/ and validation/
- put the cat pictures index 0-999 in data/train/cats
- put the cat pictures index 1000-1400 in data/validation/cats
- put the dogs pictures index 12500-13499 in data/train/dogs
- put the dog pictures index 13500-13900 in data/validation/dogs
So that we have 1000 training examples for each class, and 400 validation 
examples for each class.
In summary, this is our directory structure:
```
data/
    train/
        dogs/
            dog001.jpg
            dog002.jpg
            ...
        cats/
            cat001.jpg
            cat002.jpg
            ...
    validation/
        dogs/
            dog001.jpg
            dog002.jpg
            ...
        cats/
            cat001.jpg
            cat002.jpg
            ...
```
'''
import numpy as np
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications

# dimensions of our images.
# TODO: set properly
img_width, img_height = 150, 150

train_data_dir = '../data/train'
validation_data_dir = '../data/validation'

top_train_features_path = 'VGG16_exp1_bottleneck_features_train.npy'
top_train_labels_path = 'VGG16_exp1_bottleneck_labels_train.npy'
top_validation_features_path = 'VGG16_exp1_bottleneck_features_validation.npy'
top_validation_labels_path = 'VGG16_exp1_bottleneck_labels_validation.npy'
top_model_weights_path = 'VGG16_exp1_bottleneck_fc_model.h5'

nb_train_samples = 1152
nb_validation_samples = 288
batch_size = 4
epochs = 10


# build the VGG16 network
# model = applications.VGG16(include_top=False, weights='imagenet')

datagen = ImageDataGenerator(rescale=1. / 255)

In [89]:
# generate train features

generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)
bottleneck_features_train = model.predict_generator(
    generator, nb_train_samples // batch_size)
np.save(
    open(top_train_features_path, 'w'),
    bottleneck_features_train)

# train_features = np.load(open(top_train_features_path))

In [200]:
# generate train labels

generator = datagen.flow_from_directory(
    train_data_dir,
    batch_size=nb_train_samples - nb_train_samples % batch_size,
    class_mode='categorical',
    shuffle=False)
bottleneck_labels_train = generator.next()[1]
np.save(
    open(top_train_labels_path, 'w'),
    bottleneck_labels_train)

# train_labels = np.load(open(top_train_labels_path))

Found 1152 images belonging to 21 classes.


In [92]:
# generate validation features

generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)
bottleneck_features_validation = model.predict_generator(
    generator, nb_validation_samples // batch_size)
np.save(
    open(top_validation_features_path, 'w'),
    bottleneck_features_validation)

# validation_features = np.load(open(top_validation_features_path))

Found 288 images belonging to 21 classes.


In [13]:
# generate validation labels

generator = datagen.flow_from_directory(
    validation_data_dir,
    batch_size=nb_validation_samples - nb_validation_samples % batch_size,
    class_mode='categorical',
    shuffle=False)
bottleneck_labels_validation = generator.next()[1]
np.save(
    open(top_validation_labels_path, 'w'),
    bottleneck_labels_validation)

# validation_labels = np.load(open(top_validation_labels_path))

Found 288 images belonging to 21 classes.


array([], dtype=float64)

In [21]:
# train the model

train_data = np.load(open(top_train_features_path)).reshape((1152, 4, 512))
train_labels = np.load(open(top_train_labels_path))#.reshape((288, 4, 21))

validation_data = np.load(open(top_validation_features_path)).reshape((288, 4, 512))
validation_labels = np.load(open(top_validation_labels_path))#.reshape((72, 4, 21))

print train_data.shape
print train_labels.shape
print validation_data.shape
print validation_labels.shape

model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(21, activation='softmax'))

model.compile(
    optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(train_data,
          train_labels,
          batch_size=batch_size,
          validation_data=(validation_data,validation_labels))
model.save_weights(top_model_weights_path)

(288, 4, 4, 512)
(288, 4, 21)
(72, 4, 4, 512)
(72, 4, 21)


Exception: Error when checking model target: expected dense_18 to have 2 dimensions, but got array with shape (288, 4, 21)

In [16]:
train_labels = np.load(open(top_train_labels_path)).reshape((288, 4, 21))
train_labels

array([[[ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       [[ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.],
        [ 1.,  0.,  0., ...,  0.,  0.,  0.]],

       ..., 
       [[ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.]],

       [[ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.,  0.,  1.]],

       [[ 0.,  0.,  0., ...,  0.,  0.,  1.],
        [ 0.,  0.,  0., ...,  0.

In [17]:
train_labels = np.load(open(top_train_labels_path))
train_labels

array([[ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.]], dtype=float32)