In [1]:
import numpy as np
from keras.applications import vgg16
from keras.models import Model, Sequential
from keras.layers import Flatten, Dense, Dropout, Input, InputLayer
import keras
import glob
import shutil
import os
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import LabelEncoder
from keras import optimizers

Using TensorFlow backend.


In [2]:
files = glob.glob('dogs-vs-cats/train/*')

cat_files = [fn for fn in files if 'cat' in fn]
dog_files = [fn for fn in files if 'dog' in fn]
len(cat_files), len(dog_files)

(25000, 25000)

In [3]:
cat_train = np.random.choice(cat_files, size=1500, replace=False)
dog_train = np.random.choice(dog_files, size=1500, replace=False)
cat_files = list(set(cat_files) - set(cat_train))
dog_files = list(set(dog_files) - set(dog_train))

cat_val = np.random.choice(cat_files, size=500, replace=False)
dog_val = np.random.choice(dog_files, size=500, replace=False)
cat_files = list(set(cat_files) - set(cat_val))
dog_files = list(set(dog_files) - set(dog_val))

cat_test = np.random.choice(cat_files, size=500, replace=False)
dog_test = np.random.choice(dog_files, size=500, replace=False)

print('Cat datasets:', cat_train.shape, cat_val.shape, cat_test.shape)
print('Dog datasets:', dog_train.shape, dog_val.shape, dog_test.shape)

Cat datasets: (1500,) (500,) (500,)
Dog datasets: (1500,) (500,) (500,)


In [4]:
train_dir = 'training_data'
val_dir = 'validation_data'
test_dir = 'test_data'

train_files = np.concatenate([cat_train, dog_train])
validate_files = np.concatenate([cat_val, dog_val])
test_files = np.concatenate([cat_test, dog_test])

os.mkdir(train_dir) if not os.path.isdir(train_dir) else None
os.mkdir(val_dir) if not os.path.isdir(val_dir) else None
os.mkdir(test_dir) if not os.path.isdir(test_dir) else None

for fn in train_files:
    shutil.copy(fn, train_dir)

for fn in validate_files:
    shutil.copy(fn, val_dir)
    
for fn in test_files:
    shutil.copy(fn, test_dir)

In [5]:
IMG_DIM = (150, 150)

train_files = glob.glob('training_data/*')
train_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in train_files]
train_imgs = np.array(train_imgs)
train_labels = [fn.split('\\')[1].split('.')[0].strip() for fn in train_files]

validation_files = glob.glob('validation_data/*')
validation_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in validation_files]
validation_imgs = np.array(validation_imgs)
validation_labels = [fn.split('\\')[1].split('.')[0].strip() for fn in validation_files]

print('Train dataset shape:', train_imgs.shape, '\tValidation dataset shape:', validation_imgs.shape)

train_imgs_scaled = train_imgs.astype('float32')
validation_imgs_scaled  = validation_imgs.astype('float32')
train_imgs_scaled /= 255
validation_imgs_scaled /= 255

Train dataset shape: (2918, 150, 150, 3) 	Validation dataset shape: (992, 150, 150, 3)


In [6]:
le = LabelEncoder()
le.fit(train_labels)
train_labels_enc = le.transform(train_labels)
validation_labels_enc = le.transform(validation_labels)

print(train_labels[1430:1450],train_labels_enc[1430:1450])

['dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog'] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [7]:
input_shape = (150, 150, 3)

In [8]:
vgg = vgg16.VGG16(include_top=False, weights='imagenet', input_shape=input_shape)
output = vgg.layers[-1].output
output = keras.layers.Flatten()(output)
vgg_model = Model(vgg.input, output)
vgg_model.trainable = False
for layer in vgg_model.layers:
    layer.trainable = False
for layer in vgg_model.layers:
    print(layer, layer.name, layer.trainable)



<keras.engine.input_layer.InputLayer object at 0x0000019A07E924E0> input_1 False
<keras.layers.convolutional.Conv2D object at 0x0000019A07AB8CF8> block1_conv1 False
<keras.layers.convolutional.Conv2D object at 0x0000019A07AFABE0> block1_conv2 False
<keras.layers.pooling.MaxPooling2D object at 0x0000019A69821A20> block1_pool False
<keras.layers.convolutional.Conv2D object at 0x0000019A698357F0> block2_conv1 False
<keras.layers.convolutional.Conv2D object at 0x0000019A6983AFD0> block2_conv2 False
<keras.layers.pooling.MaxPooling2D object at 0x0000019A6983DD68> block2_pool False
<keras.layers.convolutional.Conv2D object at 0x0000019A698449E8> block3_conv1 False
<keras.layers.convolutional.Conv2D object at 0x0000019A698514A8> block3_conv2 False
<keras.layers.convolutional.Conv2D object at 0x0000019A69851390> block3_conv3 False
<keras.layers.pooling.MaxPooling2D object at 0x0000019A6985D978> block3_pool False
<keras.layers.convolutional.Conv2D object at 0x0000019A69864A58> block4_conv1 Fa

In [None]:
def get_bottleneck_features(model, input_imgs):
    features = model.predict(input_imgs, verbose=0)
    return features
    
train_features_vgg = get_bottleneck_features(vgg_model, train_imgs_scaled)
validation_features_vgg = get_bottleneck_features(vgg_model, validation_imgs_scaled)

In [None]:
print('Train Bottleneck Features:', train_features_vgg.shape, 
      '\tValidation Bottleneck Features:', validation_features_vgg.shape)

In [None]:
input_shape = vgg_model.output_shape[1]

model = Sequential()
model.add(InputLayer(input_shape=(input_shape,)))
model.add(Dense(512, activation='relu', input_dim=input_shape))
model.add(Dropout(0.3))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-4),
              metrics=['accuracy'])

model.summary()

In [None]:
batch_size = 100
epochs = 30
model.fit(x=train_features_vgg, y=train_labels_enc, validation_data=(validation_features_vgg, validation_labels_enc),
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1)
model.save('cats_vs_dogs_transfer_learning.h5')