#  Hyperparameter tuning, transfer learning, and fine-tuning


In [67]:
import tensorflow as tf
from tensorflow import keras

In [70]:
#pip install -q -U keras-tuner
import keras_tuner as kt
(img_train, label_train), (img_test, label_test) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [71]:
# Normalize pixel values between 0 and 1
img_train = img_train.astype('float32') / 255.0
img_test = img_test.astype('float32') / 255.0

In [72]:
def model_builder(hp):
  model = keras.Sequential()
  model.add(keras.layers.Flatten(input_shape=(28, 28)))

  # Tune the number of units in the first Dense layer
  # Choose an optimal value between 32-512
  num_hidden_layers = hp.Choice('num_hidden_layer', values=[1,2,3])
  dropout_rate = hp.Float('dropout_rate',min_value = 0.1,max_value= 0.5)

  hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
  for _ in range(0,num_hidden_layers):
    model.add(keras.layers.Dense(hp_units,activation = 'relu'))
    model.add(keras.layers.Dropout(dropout_rate))
    

  model.add(keras.layers.Dense(units=hp_units, activation='relu'))
  model.add(keras.layers.Dense(10))

  # Tune the learning rate for the optimizer
  # Choose an optimal value from 0.01, 0.001, or 0.0001
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])

  return model

In [73]:
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

In [74]:
tuner.search_space_summary()

Search space summary
Default search space size: 4
num_hidden_layer (Choice)
{'default': 1, 'conditions': [], 'values': [1, 2, 3], 'ordered': True}
dropout_rate (Float)
{'default': 0.1, 'conditions': [], 'min_value': 0.1, 'max_value': 0.5, 'step': None, 'sampling': None}
units (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [75]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)

In [76]:
tuner.search(img_train, label_train, epochs=10, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")

Trial 30 Complete [00h 00m 14s]
val_accuracy: 0.8125

Best val_accuracy So Far: 0.887583315372467
Total elapsed time: 00h 07m 55s
INFO:tensorflow:Oracle triggered exit

The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 480 and the optimal learning rate for the optimizer
is 0.001.



In [77]:
# Build the model with the optimal hyperparameters and train it on the data for 50 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(img_train, label_train, epochs=50, validation_split=0.2)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Best epoch: 33


The best epoch for this model is 33.

In [78]:
hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(img_train, label_train, epochs=best_epoch, validation_split=0.2)

Epoch 1/33
Epoch 2/33
Epoch 3/33
Epoch 4/33
Epoch 5/33
Epoch 6/33
Epoch 7/33
Epoch 8/33
Epoch 9/33
Epoch 10/33
Epoch 11/33
Epoch 12/33
Epoch 13/33
Epoch 14/33
Epoch 15/33
Epoch 16/33
Epoch 17/33
Epoch 18/33
Epoch 19/33
Epoch 20/33
Epoch 21/33
Epoch 22/33
Epoch 23/33
Epoch 24/33
Epoch 25/33
Epoch 26/33
Epoch 27/33
Epoch 28/33
Epoch 29/33
Epoch 30/33
Epoch 31/33
Epoch 32/33
Epoch 33/33


<tensorflow.python.keras.callbacks.History at 0x1a31856d070>

In [79]:
eval_result = hypermodel.evaluate(img_test, label_test)
print("[test loss, test accuracy]:", eval_result)

[test loss, test accuracy]: [0.39926213026046753, 0.8842999935150146]


Here, the test accuracy of the best model is 0.88 and 

##  Transfer learning 


### For this part I have used `Mobilenet` network from keras applications as pretrained model and I have implemented my object and non-object images in this pretrained model.

In [102]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.applications.mobilenet import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential

import numpy as np
from glob import glob
import matplotlib.pyplot as plt



In [141]:
# re-size all the images to this
IMAGE_SIZE = [224, 224]

import splitfolders
object_dir = 'C:/Users/nabin/Desktop/DAB300/Images'
splitfolders.ratio(object_dir, output="C:/Users/nabin/Desktop/DAB300/Train-Image", seed=1337, ratio=(0.625, 0.1875, 0.1875),group_prefix=None)
train_dir = 'C:/Users/nabin/Desktop/DAB300/Train-Image/train'
val_dir = 'C:/Users/nabin/Desktop/DAB300/Train-Image/val'
test_dir = 'C:/Users/nabin/Desktop/DAB300/Train-Image/test'


Copying files: 160 files [00:00, 578.19 files/s]


In [142]:
# add preprocessing layer to the front of VGG
mnet = MobileNet(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

# don't train existing weights
for layer in mnet.layers:
  layer.trainable = False
  

  
  # useful for getting number of classes
folders = glob('C:/Users/nabin/Desktop/DAB300/Train-Image/train/*')
folders

['C:/Users/elnin/Desktop/DAB300/lab 4/Train-Image/train\\non-object',
 'C:/Users/elnin/Desktop/DAB300/lab 4/Train-Image/train\\object']

In [143]:
# our layers 
x = Flatten()(mnet.output)
prediction = Dense(len(folders), activation='softmax')(x)

In [144]:
# create a model object
model_frm3 = Model(inputs=mnet.input, outputs=prediction)

# view the structure of the model
model_frm3.summary()

Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 225, 225, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)     

In [145]:
# tell the model what cost and optimization method to use
model_frm3.compile(
  loss='categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

In [163]:
# load the same images that we used for lab4
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
image_size = [224,224]
batch_size = 32

train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='nearest')
test_datagen = ImageDataGenerator(rescale=1./255)


training_set = train_datagen.flow_from_directory('C:/Users/nabin/Desktop/DAB300/Train-Image/train',
                                                 target_size = (224, 224),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

test_set = test_datagen.flow_from_directory('C:/Users/nabin/Desktop/DAB300/Train-Image/test',
                                            target_size = (224, 224),
                                            batch_size = 32,
                                            class_mode = 'categorical')
val_dataset = test_datagen.flow_from_directory("C:/Users/nabin/Desktop/DAB300/Train-Image/val/",
                                          target_size=(224,224),
                                          batch_size =32,
                                          class_mode = 'binary')



Found 100 images belonging to 2 classes.
Found 30 images belonging to 2 classes.
Found 30 images belonging to 2 classes.


In [157]:
# fit the model
r = model_frm3.fit_generator(
  training_set,
  validation_data=test_set,
  epochs=5,
  steps_per_epoch=len(training_set),
  validation_steps=len(test_set)
)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [158]:
score_aug, acc_aug = model_frm3.evaluate(test_set,
                            batch_size=batch_size)



### Here, the test accuracy is 74% 

## Fine-tuning



In [45]:
#Resizing size of images to 148 by 148 and set batch size to 32

img_height=224 
img_width=224
batch_size=32

train = ImageDataGenerator(rescale=1/255)
test = ImageDataGenerator(rescale=1/255)
val = ImageDataGenerator(rescale=1/255)
datagen = ImageDataGenerator(rescale=1./255) # Divide each pixel by 255 (rescale)

train_dataset = train.flow_from_directory("C:/Users/nabin/Desktop/DAB300/Train-Image/train/",
                                          target_size=(img_height,img_width),
                                          batch_size = batch_size,
                                          class_mode = 'binary')
                                         
test_dataset = test.flow_from_directory("C:/Users/nabin/Desktop/DAB300/Train-Image/test/",
                                          target_size=(img_height,img_width),
                                          batch_size =batch_size,
                                          class_mode = 'binary')
                                         
val_dataset = test.flow_from_directory("C:/Users/nabin/Desktop/DAB300/Train-Image/val/",
                                          target_size=(img_height,img_width),
                                          batch_size =batch_size,
                                          class_mode = 'binary')

Found 101 images belonging to 2 classes.
Found 31 images belonging to 2 classes.
Found 30 images belonging to 2 classes.


In [47]:
import tensorflow as tf
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input


In [48]:
# Create the base model from the pre-trained model in part 3
IMG_SIZE = (224,224)
IMG_SHAPE = IMG_SIZE + (3,)
base_model = model_frm3

In [52]:
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
print(feature_batch.shape)

(32, 7, 7, 1280)


In [50]:
base_model.trainable = False

In [15]:
# Let's take a look at the base model architecture
base_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________

In [53]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

(32, 1280)


In [54]:
prediction_layer = tf.keras.layers.Dense(1)
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

(32, 1)


In [55]:
inputs = tf.keras.Input(shape=(224, 224, 3))
x = preprocess_input(inputs)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)
model_frm3 = tf.keras.Model(inputs, outputs)


In [56]:

base_learning_rate = 0.0001
model_frm3.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [86]:
model_frm3.summary()

Model: "functional_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_14 (InputLayer)        [(None, 224, 224, 3)]     0         
_________________________________________________________________
tf_op_layer_RealDiv_10 (Tens [(None, 224, 224, 3)]     0         
_________________________________________________________________
tf_op_layer_Sub_10 (TensorFl [(None, 224, 224, 3)]     0         
_________________________________________________________________
mobilenetv2_1.00_224 (Functi (None, 7, 7, 1280)        2257984   
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1280)              0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 1280)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)               

In [57]:
len(model_frm3.trainable_variables)

2

In [60]:
base_model.trainable = True
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine-tune from this layer onwards
fine_tune_at = 100

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False

Number of layers in the base model:  155


In [139]:
model_frm3.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate),
              metrics=['accuracy'])

In [152]:
r = model_frm3.fit_generator(
  training_set,
  validation_data=test_set,
  epochs=10,
  steps_per_epoch=len(training_set),
  validation_steps=len(test_set)
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [155]:
score_aug, acc_aug = model_frm3.evaluate(test_set,
                            batch_size=batch_size)

print('Test score:', score_aug)
print('Test accuracy:', acc_aug)

Test score: 3.7464439868927
Test accuracy: 0.774193525314331


#### After fine-tuning,  the test accuracy of the model is almost 78% which is greater than the tesst accuracy from part 3.  Hence, we can conclude that the transfer learning and fine tune is the best model to implement in our dataset, even if we have small number of data, we could get pretty good accuracy.