In [10]:
'''This script goes along the blog post
"Building powerful image classification models using very little data"
from blog.keras.io.
It uses data that can be downloaded at:
https://www.kaggle.com/c/dogs-vs-cats/data
In our setup, we:
- created a data/ folder
- created train/ and validation/ subfolders inside data/
- created cats/ and dogs/ subfolders inside train/ and validation/
- put the cat pictures index 0-999 in data/train/cats
- put the cat pictures index 1000-1400 in data/validation/cats
- put the dogs pictures index 12500-13499 in data/train/dogs
- put the dog pictures index 13500-13900 in data/validation/dogs
So that we have 1000 training examples for each class, and 400 validation examples for each class.
In summary, this is our directory structure:
```
data/
    train/
        dogs/
            dog001.jpg
            dog002.jpg
            ...
        cats/
            cat001.jpg
            cat002.jpg
            ...
    validation/
        dogs/
            dog001.jpg
            dog002.jpg
            ...
        cats/
            cat001.jpg
            cat002.jpg
            ...
```
'''
import tensorflow as tf
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D
from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.python.keras import backend as K


# dimensions of our images.
img_width, img_height = 150, 150

train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 30 #50
batch_size = 16

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size)

model.save_weights('first_try.h5')


Found 2000 images belonging to 2 classes.
Found 802 images belonging to 2 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [11]:
score = model.evaluate_generator(validation_generator, nb_validation_samples // batch_size)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.5434517912737286
Test accuracy: 0.7379134860050891


In [12]:
model.save('kerasModel.h5')

In [13]:
del model

In [14]:
from tensorflow.python.keras.models import load_model
model2 = load_model('kerasModel.h5')

In [15]:
import os
from tensorflow.python.keras.preprocessing.image import img_to_array, load_img
import numpy as np

def predict():
    print(os.listdir('data/test/'))
    for image_file in os.listdir('data/test/'):
        x = img_to_array(load_img('data/test/' + image_file, False, target_size=(img_width, img_height)))
        #x = x // 255
        x = np.expand_dims(x, axis = 0)
        prediction = model2.predict_classes(x)
        probability = model2.predict_proba(x)
        print('the image: {} has prediction class:{} and prediction probability: {}'.format(image_file, prediction, probability))
        
predict()

['11.jpg', '9.jpg', '7.jpg', '17.jpg', '6.jpg', '19.jpg', '2.jpg', '14.jpg', '10.jpg', '13.jpg', '20.jpg', '3.jpg', '12.jpg', '15.jpg', '8.jpg', '4.jpg', '18.jpg', '16.jpg', '1.jpg', '5.jpg']
the image: 11.jpg has prediction class:[[0]] and prediction probability: [[0.]]
the image: 9.jpg has prediction class:[[0]] and prediction probability: [[0.]]
the image: 7.jpg has prediction class:[[0]] and prediction probability: [[0.]]
the image: 17.jpg has prediction class:[[1]] and prediction probability: [[1.]]
the image: 6.jpg has prediction class:[[1]] and prediction probability: [[1.]]
the image: 19.jpg has prediction class:[[1]] and prediction probability: [[1.]]
the image: 2.jpg has prediction class:[[1]] and prediction probability: [[1.]]
the image: 14.jpg has prediction class:[[1]] and prediction probability: [[1.]]
the image: 10.jpg has prediction class:[[0]] and prediction probability: [[0.]]
the image: 13.jpg has prediction class:[[1]] and prediction probability: [[1.]]
the image: 2

In [16]:
print(train_generator.class_indices)

{'cats': 0, 'dogs': 1}


In [17]:
print(validation_generator.class_indices)

{'cats': 0, 'dogs': 1}


In [18]:
print(type(train_generator))

<class 'tensorflow.python.keras._impl.keras.preprocessing.image.DirectoryIterator'>
