In [1]:
from numpy.random import seed
seed(42)
from tensorflow import set_random_seed
set_random_seed(42)
import os, shutil
import keras
keras.__version__

# run this every now and then to clear the state
from keras import backend as K
K.clear_session()

Using TensorFlow backend.


In [2]:
# The path to the directory where the original dataset was uncompressed
file_name = original_dataset_dir = os.path.join('output', 'train')
base_dir =  os.path.join('output', 'smaller_set')
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
train_house_dir = os.path.join(train_dir, 'house')
train_not_house_dir = os.path.join(train_dir, 'not_house')
validation_house_dir = os.path.join(validation_dir, 'house')
validation_not_house_dir = os.path.join(validation_dir, 'not_house')
test_house_dir = os.path.join(test_dir, 'house')
test_not_house_dir = os.path.join(test_dir, 'not_house')

generate = True

if generate:
    # The directory where we will store our smaller dataset
    os.mkdir(base_dir)

    # Directories for our training, validation and test splits
    os.mkdir(train_dir)
    os.mkdir(validation_dir)
    os.mkdir(test_dir)

    # Directory with our training cat pictures
    os.mkdir(train_house_dir)

    # Directory with our training dog pictures
    os.mkdir(train_not_house_dir)

    # Directory with our validation cat pictures
    os.mkdir(validation_house_dir)

    # Directory with our validation dog pictures
    os.mkdir(validation_not_house_dir)

    # Directory with our validation cat pictures
    os.mkdir(test_house_dir)

    # Directory with our validation dog pictures
    os.mkdir(test_not_house_dir)

    # Copy first 1000 cat images to train_cats_dir
    fnames = ['house.{}.jpg'.format(i) for i in range(200)]
    for fname in fnames:
        src = os.path.join(original_dataset_dir, fname)
        dst = os.path.join(train_house_dir, fname)
        shutil.copyfile(src, dst)

    # Copy next 500 cat images to validation_cats_dir
    fnames = ['house.{}.jpg'.format(i) for i in range(200, 300)]
    for fname in fnames:
        src = os.path.join(original_dataset_dir, fname)
        dst = os.path.join(validation_house_dir, fname)
        shutil.copyfile(src, dst)

    # Copy next 500 cat images to test_cats_dir
    fnames = ['house.{}.jpg'.format(i) for i in range(300, 400)]
    for fname in fnames:
        src = os.path.join(original_dataset_dir, fname)
        dst = os.path.join(test_house_dir, fname)
        shutil.copyfile(src, dst)

    # Copy first 1000 dog images to train_dogs_dir
    fnames = ['not_house.{}.jpg'.format(i) for i in range(200)]
    for fname in fnames:
        src = os.path.join(original_dataset_dir, fname)
        dst = os.path.join(train_not_house_dir, fname)
        shutil.copyfile(src, dst)

    # Copy next 500 dog images to validation_dogs_dir
    fnames = ['not_house.{}.jpg'.format(i) for i in range(200, 300)]
    for fname in fnames:
        src = os.path.join(original_dataset_dir, fname)
        dst = os.path.join(validation_not_house_dir, fname)
        shutil.copyfile(src, dst)

    # Copy next 500 dog images to test_dogs_dir
    fnames = ['not_house.{}.jpg'.format(i) for i in range(300, 400)]
    for fname in fnames:
        src = os.path.join(original_dataset_dir, fname)
        dst = os.path.join(test_not_house_dir, fname)
        shutil.copyfile(src, dst)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'output\\smaller_set'

In [None]:
print('total validation not house images:', len(os.listdir(validation_not_house_dir)))
print('total validation house images:', len(os.listdir(validation_house_dir)))
print('total training not house images:', len(os.listdir(train_not_house_dir)))
print('total training house images:', len(os.listdir(train_house_dir)))
print('total test house images:', len(os.listdir(test_house_dir)))
print('total test not_house images:', len(os.listdir(test_not_house_dir)))

In [None]:
from keras import layers
from keras import models

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
                        input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
model.summary()

In [None]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model, show_shapes=True, show_layer_names=True).create(prog='dot', format='svg'))

In [None]:
from keras import optimizers

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

In [None]:
from keras.preprocessing.image import ImageDataGenerator

# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        # This is the target directory
        train_dir,
        # All images will be resized to 150x150
        target_size=(150, 150),
        batch_size=20,
        # Since we use binary_crossentropy loss, we need binary labels
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

In [None]:
for data_batch, labels_batch in train_generator:
    print('data batch shape:', data_batch.shape)
    print('labels batch shape:', labels_batch.shape)
    break

In [None]:
history = model.fit_generator(
      train_generator,
      steps_per_epoch=100,
      epochs=30,
      validation_data=validation_generator,
      validation_steps=50)

In [None]:
save_model = True
if save_model == True:
    model.save('house_not_house_small_2.h5')
    import pickle
    with open('/model/train_history_2.pkl', 'wb') as history_file:
        pickle.dump(history.history, history_file)



In [None]:
from keras.models import load_model
file_name = os.path.join('data', 'h5', 'house_not_house_small_2.h5')
model = load_model(file_name)

with open('/model/train_history_2.pkl', 'rb') as history_file:
    history_pickle = pickle.load(history_file)



In [None]:
import matplotlib.pyplot as plt

acc = history_pickle['acc']
val_acc = history_pickle['val_acc']
loss = history_pickle['loss']
val_loss = history_pickle['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
datagen = ImageDataGenerator(
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

In [None]:
# This is module with image preprocessing utilities
from keras.preprocessing import image

fnames = [os.path.join(train_house_dir, fname) for fname in os.listdir(train_house_dir)]

# We pick one image to "augment"
img_path = fnames[3]

# Read the image and resize it
img = image.load_img(img_path, target_size=(150, 150))

# Convert it to a Numpy array with shape (150, 150, 3)
x = image.img_to_array(img)

# Reshape it to (1, 150, 150, 3)
x = x.reshape((1,) + x.shape)

# The .flow() command below generates batches of randomly transformed images.
# It will loop indefinitely, so we need to `break` the loop at some point!
i = 0
for batch in datagen.flow(x, batch_size=1):
    plt.figure(i)
    imgplot = plt.imshow(image.array_to_img(batch[0]))
    i += 1
    if i % 4 == 0:
        break

plt.show()

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
                        input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['acc'])

In [None]:
test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

test_loss, test_acc = model.evaluate_generator(test_generator, steps=50)
print('test acc:', test_acc)

In [None]:
image_path = os.path.join("web_images", "house.0.jpg")
predict(image_path)