In [1]:
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [2]:
import glob 
import numpy as np 
import os 
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img 
np.random.seed(42) 

import matplotlib.pyplot as plt 
%matplotlib inline 

Using TensorFlow backend.


In [3]:
train_dir = 'data/training_data' 
val_dir = 'data/validation_data' 
test_dir = 'data/test_data'

In [4]:
IMG_DIM = (150, 150) 
 
train_files = glob.glob('data/training_data/*') 
train_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img  
              in train_files] 
train_imgs = np.array(train_imgs) 
train_labels = [fn.split('\\')[1].split('.')[0].strip() for fn in 
                train_files] 
 
validation_files = glob.glob('data/validation_data/*') 
validation_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for 
                   img in validation_files] 
validation_imgs = np.array(validation_imgs) 
validation_labels = [fn.split('\\')[1].split('.')[0].strip() for fn in 
                     validation_files] 
 
print('Train dataset shape:', train_imgs.shape,  
      'Validation dataset shape:', validation_imgs.shape) 

Train dataset shape: (3000, 150, 150, 3) Validation dataset shape: (1000, 150, 150, 3)


#### basic configuration parameters and also encode our text class labels into numeric values (otherwise, Keras will throw an error):

In [5]:
batch_size = 30 
num_classes = 2 
epochs = 30 
input_shape = (150, 150, 3) 
 
# encode text category labels 
from sklearn.preprocessing import LabelEncoder 
 
le = LabelEncoder() 
le.fit(train_labels) 
train_labels_enc = le.transform(train_labels) 
validation_labels_enc = le.transform(validation_labels) 
 
print(train_labels[1495:1505], train_labels_enc[1495:1505]) 

['cat', 'cat', 'cat', 'cat', 'cat', 'dog', 'dog', 'dog', 'dog', 'dog'] [0 0 0 0 0 1 1 1 1 1]


In [7]:
# data generators 
train_datagen = ImageDataGenerator(rescale=1./255, zoom_range=0.3,                  
                                   rotation_range=50, 
                                   width_shift_range=0.2,  
                                   height_shift_range=0.2,    
                                   shear_range=0.2,  
                                   horizontal_flip=True, 
                                   fill_mode='nearest') 

val_datagen = ImageDataGenerator(rescale=1./255) 

train_generator = train_datagen.flow(train_imgs, train_labels_enc,  
                                     batch_size=30) 
val_generator = val_datagen.flow(validation_imgs, 
                                 validation_labels_enc, 
                                 batch_size=20) 

Pretrained models are used in the following two popular ways when building new models or reusing them:

    Using a pretrained model as a feature extractor
    Fine-tuning the pretrained model


# Pretrained CNN model with fine-tuning

#### We will now leverage our VGG-16 model object stored in the vgg_model variable and unfreeze convolution blocks 4 and 5 while keeping the first three blocks frozen. 

In [12]:
from keras.applications import vgg16 
from keras.models import Model 
import keras 


vgg = vgg16.VGG16(include_top=False, weights='imagenet',  
                                     input_shape=input_shape) 

# we have removed the final part of the classifier pertaining to the VGG-16 model 
#since we will be building our own classifier and leveraging VGG as a feature extractor
output = vgg.layers[-1].output 
output = keras.layers.Flatten()(output) 
vgg_model = Model(vgg.input, output) 

 
vgg_model.trainable = True 
set_trainable = False
 
for layer in vgg_model.layers: 
    if layer.name in ['block5_conv1', 'block4_conv1']: 
        set_trainable = True 
    if set_trainable: 
        layer.trainable = True 
    else: 
        layer.trainable = False 

#### You can clearly see from the preceding output that the convolution and pooling layers pertaining to blocks 4 and 5 are now trainable, and you can also verify which layers are frozen and unfrozen using the following code:

In [11]:
import pandas as pd 
pd.set_option('max_colwidth', -1)

layers = [(layer, layer.name, layer.trainable) for layer in vgg_model.layers] 
pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])

Unnamed: 0,Layer Type,Layer Name,Layer Trainable
0,<keras.engine.input_layer.InputLayer object at 0x000002750A819AC8>,input_1,False
1,<keras.layers.convolutional.Conv2D object at 0x000002750A819E48>,block1_conv1,False
2,<keras.layers.convolutional.Conv2D object at 0x000002750A822160>,block1_conv2,False
3,<keras.layers.pooling.MaxPooling2D object at 0x000002750A84B588>,block1_pool,False
4,<keras.layers.convolutional.Conv2D object at 0x000002750A84BA90>,block2_conv1,False
5,<keras.layers.convolutional.Conv2D object at 0x000002750A86B860>,block2_conv2,False
6,<keras.layers.pooling.MaxPooling2D object at 0x000002750A885550>,block2_pool,False
7,<keras.layers.convolutional.Conv2D object at 0x000002750A885F98>,block3_conv1,False
8,<keras.layers.convolutional.Conv2D object at 0x000002750A8BC978>,block3_conv2,False
9,<keras.layers.convolutional.Conv2D object at 0x000002750A8DAB38>,block3_conv3,False


### We can clearly see that the last two blocks are now trainable, which means the weights for these layers will also get updated with backpropagation in each epoch as we pass each batch of data. 

In [13]:
print("Trainable layers:", vgg_model.trainable_weights)

Trainable layers: [<tf.Variable 'block4_conv1_1/kernel:0' shape=(3, 3, 256, 512) dtype=float32_ref>, <tf.Variable 'block4_conv1_1/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'block4_conv2_1/kernel:0' shape=(3, 3, 512, 512) dtype=float32_ref>, <tf.Variable 'block4_conv2_1/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'block4_conv3_1/kernel:0' shape=(3, 3, 512, 512) dtype=float32_ref>, <tf.Variable 'block4_conv3_1/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'block5_conv1_1/kernel:0' shape=(3, 3, 512, 512) dtype=float32_ref>, <tf.Variable 'block5_conv1_1/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'block5_conv2_1/kernel:0' shape=(3, 3, 512, 512) dtype=float32_ref>, <tf.Variable 'block5_conv2_1/bias:0' shape=(512,) dtype=float32_ref>, <tf.Variable 'block5_conv3_1/kernel:0' shape=(3, 3, 512, 512) dtype=float32_ref>, <tf.Variable 'block5_conv3_1/bias:0' shape=(512,) dtype=float32_ref>]


#### We reduce the learning rate slightly since we don't want to get stuck at any local minimal, and we also do not want to suddenly update the weights of the trainable VGG-16 model layers by a big factor that might adversely affect the model

In [15]:
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, InputLayer 
from keras.models import Sequential 
from keras import optimizers 

# build model architecture 
model = Sequential() 

model.add(vgg_model) 
model.add(Dense(512, activation='relu', input_dim=input_shape)) 
model.add(Dropout(0.3)) 
model.add(Dense(512, activation='relu')) 
model.add(Dropout(0.3)) 
model.add(Dense(1, activation='sigmoid')) 

model.compile(loss='binary_crossentropy', 
              optimizer=optimizers.RMSprop(lr=1e-5), 
              metrics=['accuracy']) 

In [None]:
history = model.fit_generator(train_generator, steps_per_epoch=100, 
                              epochs=2,  
                              validation_data=val_generator,   
                              validation_steps=50,  
                              verbose=1)

Epoch 1/2
Epoch 2/2

In [None]:
model.save('cats_dogs_tlearn_finetune_img_aug_cnn.h5')