## **Model Tuning: Transfer Learning with VGG16**

This script is used for the following:
1. load day or night sample images
2. load and train model on images
3. export model evaluaiton metrics 

In [1]:
# import necessary libraries
import os
import pickle
import pandas as pd
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K
import tensorflow.keras.models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Lambda
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.optimizers import RMSprop, SGD
from tensorflow.keras.applications import VGG16

## **Load image dataframe**

In [2]:
# point to source directory for pickle files
path = '/home/ubuntu/michael/my_pickles/'

In [3]:
# choose day or night dataframe for reference
#infile = 'day_df.pkl'
infile = 'night_df.pkl'

In [4]:
# set reference dataframe
sample_df = pd.read_pickle(path + infile)
sample_df

Unnamed: 0,filename,label,day_phase,phase,filepath
52183,A9-HM470-ID12831_20200101_0331.jpg,Fog,0,Night,/home/ubuntu/michael/nl_images/images_data/A9-...
51511,A9-HM467-ID12834_20191231_2051.jpg,Fog,0,Night,/home/ubuntu/michael/nl_images/images_data/A9-...
42537,A9-HM467-ID12834_20200101_0101.jpg,Fog,0,Night,/home/ubuntu/michael/nl_images/images_data/A9-...
37402,A50-HM1867-ID11639_20191231_0651.jpg,Fog,20,Night,/home/ubuntu/michael/nl_images/images_data/A50...
1220,A50-HM1868-ID11520_20190322_0201.jpg,Fog,0,Night,/home/ubuntu/michael/nl_images/images_data/A50...
...,...,...,...,...,...
8855,A27-HM675-ID10959_20170913_0421.jpg,No Fog,20,Night,/home/ubuntu/michael/nl_images/images_data/A27...
3946,A9-HM470-ID12806_20190504_0011.jpg,No Fog,0,Night,/home/ubuntu/michael/nl_images/images_data/A9-...
30941,A50-HM1888-ID11528_20181009_1901.jpg,No Fog,0,Night,/home/ubuntu/michael/nl_images/images_data/A50...
8528,A4-HM52-ID11043_20170906_2250.jpg,No Fog,0,Night,/home/ubuntu/michael/nl_images/images_data/A4-...


## **Set source directories to day or night images**

In [None]:
# to access day images only
train_dir = '/home/ubuntu/michael/day/train'
val_dir = '/home/ubuntu/michael/day/validate'

In [5]:
# to access night images only
train_dir = '/home/ubuntu/michael/night/train'
val_dir = '/home/ubuntu/michael/night/validate'

## **Loading images with Keras**

In [6]:
# clear memory
K.clear_session()

In [7]:
# set image size and shape
img_size = (224, 224)
img_shape = (224, 224, 3)

In [8]:
train_datagen = ImageDataGenerator(rescale = 1./255., rotation_range = 40, 
                                   width_shift_range = 0.2, height_shift_range = 0.2,
                                   shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True)

In [9]:
# create data generators
datagen = ImageDataGenerator(rescale = 1.0 / 255.0) #scale pixel values to be in the range of 0-1

In [10]:
# prepare training iterator
train_it = datagen.flow_from_directory(directory = train_dir, classes = ['nofog', 'fog'], class_mode = 'binary', 
                    batch_size = 64, target_size = img_size) 
# flip classes due to alphabetical order; fog would be 0 since it preceeds nofog in the alphabet

Found 2326 images belonging to 2 classes.


In [11]:
# lists all assignments of images [0 = nofog; 1 = fog]
class_names = train_it.classes 
class_indices = train_it.class_indices

In [12]:
# prepare validation iterator
valid_it = datagen.flow_from_directory(directory = val_dir, classes = ['nofog', 'fog'], class_mode = 'binary', 
                    batch_size = 16, target_size = img_size)
# flip classes due to alphabetical order; fog would be 0 since it preceeds nofog in the alphabet

Found 291 images belonging to 2 classes.


In [13]:
class_names = valid_it.classes
class_indices = valid_it.class_indices

## **Load VGG16 model as convoutional base**

In [14]:
# load convolutional base model
conv_base = VGG16(weights = 'imagenet', # weights specifies the weight checkpoint from which to initialize model
                 include_top=False, # refers to including or not including the densely connected classifier on top of the networks (By default it contains 1000 classes but we only need 2!)
                 input_shape= img_shape) # shape of image tensors fed to network (optional). If not passed, network can process inputs of any size.

In [15]:
# freeze base layers
conv_base.trainable = False

In [16]:
conv_base.summary() # architecture of the VGG16 convolutional base

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

# **Add classifier on top of convolutional base**

In [17]:
# load baseline model
model = Sequential()
model.add(conv_base)
model.add(Flatten())
model.add(Dense(128, activation = 'relu', kernel_initializer = 'he_uniform'))
model.add(Dense(1, activation = 'sigmoid', name = 'output'))

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 128)               3211392   
_________________________________________________________________
output (Dense)               (None, 1)                 129       
Total params: 17,926,209
Trainable params: 3,211,521
Non-trainable params: 14,714,688
_________________________________________________________________


In [19]:
# compile model
opt = SGD(learning_rate = 0.001, momentum = 0.6)
model.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy'])

In [20]:
# add early stopping to force model to terminate when learning has ceased
es = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', min_delta = 0, mode = 'auto', patience=7, verbose = 1) 

In [21]:
# choose day or night output folder
outputFolder = '/home/ubuntu/michael/model_output/VGG16/'
#outputFolder = '/home/ubuntu/michael/model_output/VGG16/day/'
#outputFolder = '/home/ubuntu/michael/model_output/VGG16/night/'
if not os.path.exists(outputFolder):
    os.makedirs(outputFolder)

In [22]:
# define chekpoint path and add checkpoints
checkpoint_path = outputFolder + 'VGG16_model.hdf5'

# create a model checkpoint when condtions are met
cp = ModelCheckpoint(checkpoint_path, monitor = 'val_loss', mode = 'auto', save_best_only=True)

In [23]:
# fit model to data and run evaluation
history = model.fit(train_it, steps_per_epoch = len(train_it), 
                epochs = 3, validation_data = valid_it, callbacks=[es, cp], validation_steps = len(valid_it), verbose = 1) 

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [24]:
# evaluate model
score = model.evaluate(valid_it, steps=len(valid_it), verbose = 1)



In [25]:
# save model validation score
with open(outputFolder + 'VGG16_score.pkl', 'wb') as f:pickle.dump(score, f)

## **Store data from history object in dataframe**

In [26]:
# access accuracy and loss values from history object
loss_values = history.history['loss']
acc_values = history.history['accuracy']
valLoss_values = history.history['val_loss']
valAccuracy_values = history.history['val_accuracy']

# store loss and accuracy values in a dataframe
history_df = pd.DataFrame()
history_df['Training Loss'] = history.history['loss']
history_df['Training Accuracy'] = history.history['accuracy']
history_df['Validation Loss'] = history.history['val_loss']
history_df['Validation Accuracy'] = history.history['val_accuracy']

In [27]:
# save history data
history_df.to_pickle(outputFolder + 'VGG16_history.pkl')