In [2]:
import keras
from keras import layers, models
from keras.applications import VGG16
from keras import optimizers
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from matplotlib import pyplot as plt
import os

Using TensorFlow backend.


In [3]:
# define parameters
IMAGE_SIZE = 150
BATCH_SIZE = 20
NUM_EPOCHS = 30
TOTAL_TRAIN_IMAGES = 2000
TOTAL_VALID_IMAGES = 1000
TOTAL_TEST_IMAGES = 1000

base_dir = 'drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle'
model_dir = os.path.join(base_dir, 'model_checkpoint/vgg16_aug_30epochs')
dataset_dir = os.path.join(base_dir, 'Dataset')
train_dir = os.path.join(dataset_dir, 'train')
validation_dir = os.path.join(dataset_dir, 'valid')
test_dir =  os.path.join(dataset_dir, 'test')

print(len(os.listdir(train_dir)))
print(len(os.listdir(model_dir)))

2
0


In [4]:
conv_base = VGG16(include_top=False, weights='imagenet', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
conv_base.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584   

In [5]:
# define total model
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 16,812,353
Trainable params: 16,812,353
Non-trainable params: 0
_________________________________________________________________


In [6]:
# freezing vgg16 pretrained model
conv_base.trainable = False
model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=2e-5), metrics=['acc'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 16,812,353
Trainable params: 2,097,665
Non-trainable params: 14,714,688
_________________________________________________________________


In [8]:
# create datagen for training with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    height_shift_range=0.2,
    width_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [0]:
# define model checkpoint
checkpoint_path = os.path.join(model_dir, 'valid-acc-improvement-{epoch:02d}-{val_acc:.2f}.hdf5')
val_acc_checkpoint = ModelCheckpoint(checkpoint_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')

In [0]:
# fit model
history = model.fit_generator(
    train_generator,
    steps_per_epoch=TOTAL_TRAIN_IMAGES // BATCH_SIZE,
    epochs=NUM_EPOCHS,
    callbacks=[val_acc_checkpoint],
    validation_data=validation_generator,
    validation_steps=TOTAL_VALID_IMAGES // BATCH_SIZE
)

Epoch 1/30

Epoch 00001: val_acc improved from -inf to 0.83800, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-01-0.84.hdf5
Epoch 2/30

Epoch 00002: val_acc improved from 0.83800 to 0.86000, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-02-0.86.hdf5
Epoch 3/30


Epoch 00003: val_acc improved from 0.86000 to 0.86000, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-03-0.86.hdf5
Epoch 4/30

Epoch 00004: val_acc improved from 0.86000 to 0.88300, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-04-0.88.hdf5
Epoch 5/30


Epoch 00005: val_acc did not improve from 0.88300
Epoch 6/30

Epoch 00006: val_acc did not improve from 0.88300
Epoch 7/30

Epoch 00007: val_acc improved from 0.88300 to 0.88800, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-07-0.89.hdf5
Epoch 8/30


Epoch 00008: val_acc improved from 0.88800 to 0.89000, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-08-0.89.hdf5
Epoch 9/30

Epoch 00009: val_acc improved from 0.89000 to 0.89300, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-09-0.89.hdf5
Epoch 10/30


Epoch 00010: val_acc did not improve from 0.89300
Epoch 11/30

Epoch 00011: val_acc did not improve from 0.89300
Epoch 12/30

Epoch 00012: val_acc improved from 0.89300 to 0.90100, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-12-0.90.hdf5
Epoch 13/30


Epoch 00013: val_acc improved from 0.90100 to 0.90300, saving model to drive/workspace/Cloud_Service/Google_Colab/DogVsCat_Kaggle/model_checkpoint/vgg16_aug_30epochs/valid-acc-improvement-13-0.90.hdf5
Epoch 14/30

In [0]:
# save last model
model.save(os.path.join(model_dir, 'last_model_vgg16_aug.h5'))

In [0]:
# plot history
loss = history.history['loss']
val_loss = history.history['val_loss']
acc = history.history['acc']
val_acc = history.history['val_acc']

fig = plt.figure()

plt.plot(acc, label='acc')
plt.plot(val_acc, label='val_acc')
plt.title('Training and validation accuracy')
plt.legend()

fig.savefig(os.path.join(model_dir, 'Training_Validation_Accuracy.jpg'))

fig = plt.figure()

plt.plot(loss, label='loss')
plt.plot(val_loss, label='val_loss')
plt.title('Training and validation loss')
plt.legend()

fig.savefig(os.path.join(model_dir, 'Training_Validation_Loss.jpg'))

plt.show()

In [0]:
# evalute model with test data
scores = model.evaluate_generator(test_generator, steps=TOTAL_TEST_IMAGES // BATCH_SIZE)
print(scores)


In [0]:
# load best validation model
best_val_model_path = os.path.join(model_dir, 'valid-acc-improvement-.hdf5')
model2 = models.load_model(best_val_model_path)

In [0]:
# evaluate best model
scores2 = model2.evaluate_generator(test_generator, steps=TOTAL_TEST_IMAGES // BATCH_SIZE)
print(scores2)