#Transfer learning
In this exercise we are going to use a pretrained network (VGG-16) to classify a new dataset.

Learning Outcomes:
Loading & modifying keras pre-trained applications.
Use of Tensorflow Dataset API to setup dataset.
Use of built in loss functions.
Simple tensorflow Training Loop.

In [0]:
# Install tensorflow 2.0
!pip install -q tensorflow==2.0.0-rc
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 
import glob
import tensorflow_datasets as tfds
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.layers import Dense, Activation, Reshape
from tensorflow.keras import initializers
print(tf.__version__)

[K     |████████████████████████████████| 86.3MB 104kB/s 
[K     |████████████████████████████████| 4.3MB 33.1MB/s 
[K     |████████████████████████████████| 501kB 45.4MB/s 
[?25h2.0.0-rc0


Setup Parameters for Training.

In [0]:
H_trg =128
W_trg =128
batch_size=20
imagenet_rgb_values = [123.68, 116.779, 103.939]
target_names = ['Abyssinian','Bengal','Birman','Bombay','British Shorthair'
                ,'Egyptian Mau', 'Main Coon', 'Persian','Ragdoll','Russian Blue'
                ,'Siamese','Sphynx','American Buldog','American pit Bull'
                ,'Basset hound','Beagle','Boxer','Chihuahua','English Cocker Spaniel'
                ,'English Setter','German Shorthaired','Great Pyrenees','Havanese',
               'Japanese Chin','Keeshond','Leonberger','Minature Pinscher'
                ,'Newfoundland','Pomeranian','Pug','Saint Bernard','Samyoed'
               ,'Scottish Terrier','Shiba Inu','Staffordshire Bull Terrier'
               ,'Wheaten Terrier','Yorkshire Terrier']
EPOCHS = 20

Load Front VGG Front End. (note in parameters have set targe image size to 128x128, the vgg16 model input is nominally 224x224. Why is the change in image size OK when we are using the front end only? What implication is there for the backend because of this?) 

In [0]:

vgg16_model = tf.keras.applications.VGG16(weights='imagenet',include_top=False)
layer_name = 'block5_conv3'
red_model= keras.Model(inputs = vgg16_model.input, outputs=vgg16_model.get_layer(layer_name).output,trainable=False ) 
red_model.summary()

Build new Backend Model. It is important to setup the model to correctly interface with the front end.

In [0]:
def op_stage():
    input = keras.Input(shape=(8, 8, 512))
    reshape_1 = Reshape((1,8*8*512), input_shape=(8, 8,512))(input)
    dense_1= Dense(512, activation="relu",name='dense_1',kernel_initializer=initializers.he_normal(),bias_initializer=initializers.he_normal())(reshape_1) 
    dense_2= Dense(256, activation="relu",name='dense_2',kernel_initializer=initializers.he_normal(),bias_initializer=initializers.he_normal())(dense_1) 
    dense_3= Dense(37, activation="linear",name='dense_3',kernel_initializer=initializers.he_normal(),bias_initializer=initializers.he_normal())(dense_2) 
    act11_1  = Activation('softmax')(dense_3)
    
    model = keras.Model(inputs=input, outputs=[act11_1],trainable=True)
    return model

Combine the pre-trained Frontend and new backend Model (note that we only want to train the new backend part of the model during training. This can be achieved by setting layer.trainable = False as shown)

In [0]:
be_model = op_stage()
    
comb_model=tf.keras.Model(inputs = red_model.input, outputs= be_model(red_model.output) )
# modify model output for specified layers.
#for layer in comb_model.get_layer('model_2').layers:
i=0
for layer in comb_model.layers:
    if i<=17:  
       layer.trainable = False
    i+=1 
    
# Check number of Trainable parameters
comb_model.summary()

Use Dataset API to load dataset. Ensure to include augmentation.

Example 37 classes: https://www.tensorflow.org/datasets/catalog/oxford_iiit_pet

In [0]:

def resize(input_image, height, width):
  input_image = tf.image.resize(input_image, [height, width],method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
  return input_image

def random_jitter(input_image):
  input_image = resize(input_image, 84, 84)
  input_image = tf.image.random_crop(input_image, size=[ H_trg, W_trg, 3])
  #Random mirroring
  choice = tf.random.uniform(())
  input_image=tf.cond(choice < 0.25, lambda:  tf.image.flip_left_right(input_image), lambda: input_image)
  hoice = tf.random.uniform(())
  input_image=tf.cond(choice < 0.25, lambda:  tf.image.random_flip_up_down(input_image), lambda: input_image)
  hoice = tf.random.uniform(())
  input_image=tf.cond(choice < 0.25, lambda:  tf.image.rot90(input_image, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)), lambda: input_image)
  return input_image
     
def resize_test(input_image):
    input_image = resize(input_image, H_trg, W_trg)
    return input_image

def process_image(input_img):
    input_img.set_shape([None, None, 3])
    input_img = tf.cast(input_img, tf.float32)
    r = input_img[:, :, 0] - imagenet_rgb_values[0]
    g = input_img[:, :, 1] - imagenet_rgb_values[1]
    b = input_img[:, :, 2] - imagenet_rgb_values[2]
    op_img = tf.stack([b,g,r],axis = 2)
    return op_img


In [0]:
import tensorflow_datasets as tfds
src_data,info= tfds.load("oxford_iiit_pet",with_info=True)
print(info)

In [0]:
src_train_dataset = src_data["train"]
tr_img_dataset = src_train_dataset.map(lambda x: x['image'])  
for img in tr_img_dataset.take(10):
    print(np.shape(img))

In [0]:
def gen_datasets(src_data):    

    src_train_dataset = src_data["train"]
    src_test_dataset = src_data["test"]
  
    # Define Training Datasets 
    tr_img_dataset = src_train_dataset.map(lambda x: x['image'])  
    tr_img_dataset = tr_img_dataset.map(resize_test)
    #tr_img_dataset = tr_img_dataset.map(random_jitter)
    tr_img_dataset = tr_img_dataset.map(process_image)
     
    tr_label_dataset = src_train_dataset.map(lambda x: x['label'])  

    
    # Define Test Dataset
    test_img_dataset = src_test_dataset.map(lambda x: x['image'])  
    test_img_dataset = test_img_dataset.map(resize_test)
    test_img_dataset = test_img_dataset.map(process_image)

    test_label_dataset = src_test_dataset.map(lambda x: x['label']) 
 
    
    # Join datasets.
    train_dataset = tf.data.Dataset.zip((tr_img_dataset, tr_label_dataset))
    #train_dataset=train_dataset.shuffle(3000)
    train_dataset = train_dataset.batch(batch_size) 
    test_dataset = tf.data.Dataset.zip((test_img_dataset,test_label_dataset))
    #test_dataset=test_dataset.shuffle(250)
    test_dataset = test_dataset.batch(batch_size) 
    
    return train_dataset,test_dataset

In [0]:
'''
# Test Dataset works by displaying an image
tr_dataset,test_dataset = gen_datasets(src_data)
for img,labels in tr_dataset.take(6):
    model_output = comb_model(img)
    op_labels = tf.argmax(tf.squeeze(model_output),axis=-1)
    plt.figure(figsize=(6, 6))
    plt.imshow(img[1,:,:,:]/255.0)
    print(op_labels)
    print(labels)
    '''

Loss functions: 

Note that can use built in loss function.

Secondly the labels from the dataset must be converted into a on-hot representation

In [0]:
# Sematic label to One Hot Prediction Vector for loss function 
def _semlabel2onehot(sem_label): 
    sem_one_hot=tf.one_hot(sem_label,37)
    return sem_one_hot

loss_object = tf.keras.losses.CategoricalCrossentropy()
def total_loss(target_labels_one_hot, model_output):
  total_loss = loss_object(target_labels_one_hot,model_output)
  return total_loss

Train step and train loop

In [0]:
# optimiser for gradients
model_optimizer= tf.keras.optimizers.Adadelta(learning_rate=1e-2,rho=0.95,epsilon=1e-07,name='Adadelta')

def train_step(input_image, target_labels):
  with tf.GradientTape() as model_tape:
    model_output = comb_model(input_image)
    # Convert input labels to one-hot format for loss function 
    target_labels_one_hot = _semlabel2onehot(target_labels)
       
    # Loss function
    model_loss =  total_loss(target_labels_one_hot, tf.squeeze(model_output))

  model_gradients = model_tape.gradient(model_loss,
                                          comb_model.trainable_variables)

  model_optimizer.apply_gradients(zip(model_gradients,
                                          comb_model.trainable_variables))

  return model_loss


def train(tr_dataset, test_dataset, epochs):
  for epoch in range(epochs):
    print('Epoch Number = ',epoch)
    counter = 0 
    for input_image, target_labels  in tr_dataset:
      model_loss = train_step(input_image, target_labels)      
      counter = counter+1
      if counter % 50 == 0:   
          print('loss value= ',model_loss)
          print(counter)
  

In [0]:

# Run training Loop
train(tr_dataset,test_dataset, EPOCHS)

Verify model and obtain accuracy with Confusion matrix

In [0]:
pred_labels = []
true_labels = []
counter =0
for input_image, target_labels  in test_dataset.take(1000):
     model_output = comb_model(input_image)
     batch_pred_labels = tf.argmax(tf.squeeze(model_output),axis=-1)
     pred_labels.extend(batch_pred_labels)
     true_labels.extend(target_labels)
     counter+=1
print(counter)



In [0]:
#https://scikit-learn.org/0.16/auto_examples/model_selection/plot_confusion_matrix.html
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(target_names))
    plt.xticks(tick_marks, target_names, rotation=45)
    plt.yticks(tick_marks, target_names)
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(true_labels,pred_labels)
plt.figure(figsize=(15, 15))
plot_confusion_matrix(cm)

print('Test Set Accuracy = ',(np.cumsum(np.diag(cm))[-1])/(np.cumsum(cm))[-1])