# Transfer Learning Goal

* **model_0** - Create a pretrained EfficientNetB0 model with 10 percent data
  * use 10 percent data - https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_10_percent.zip
* **model_1** - Create a pretrained EfficientNetB0 on 1 percent data with data augmentation
  * use 1 percent data  - https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_1_percent.zip
* **model_2** - Createa a pretrained EfficientNetB0 model with 10 percent data and data augmentation layer. 
* **model_3** - Create a model from feature extraction from model 2 and fine tune the model with last 10 layer unfrozen

## Import Dependency

In [6]:
#!wget https://github.com/rkumar-bengaluru/pythonbyexample/raw/main/tensorflow/utilities.py
from utilities import download_file,unzip_file
import os
import datetime 
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import random
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import Sequential
from tensorflow.keras.layers.experimental.preprocessing import RandomContrast,RandomCrop,RandomWidth
from tensorflow.keras.layers.experimental.preprocessing import RandomFlip,RandomRotation,RandomHeight,RandomZoom
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Input,GlobalAveragePooling2D,Dense
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard,ModelCheckpoint

## Data Preparation

In [66]:
# constants
ten_percent_data_link = 'https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_10_percent.zip'
one_percent_data_link = 'https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_1_percent.zip'
hundred_percent_data_link = 'https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_all_data.zip'
tensorboard_dir_name = 'transfer_learning'
modelcheckpoint_dir_name = 'modelcheckpoing'

# train directories
all_train_dir         = hundred_percent_data_link.split('/')[-1].split('.')[0] + '/train/'
ten_percent_train_dir = ten_percent_data_link.split('/')[-1].split('.')[0] + '/train/'
one_percent_train_dir = one_percent_data_link.split('/')[-1].split('.')[0] + '/train/'
# test directories
all_test_dir          = hundred_percent_data_link.split('/')[-1].split('.')[0] + '/test/'
ten_percent_test_dir  = ten_percent_data_link.split('/')[-1].split('.')[0] + '/test/'
one_percent_test_dir  = one_percent_data_link.split('/')[-1].split('.')[0] + '/test/'
# target image size
TARGET_IMG_SIZE       = (224,224)
# input shape
INPUT_SHAPE           = (224,224,3)
# matplot lib fig size
FIG_SIZE              = (10,7)
# labels size
LABELS_SIZE           = 10
# no of epochs
INITIAL_EPOCHS        = 5

# download both data sets
download_file(ten_percent_data_link)
download_file(one_percent_data_link)

# unzip data directories
unzip_file(ten_percent_data_link)
unzip_file(one_percent_data_link)

# prepare all train and test data
train_all_data = image_dataset_from_directory(directory=all_train_dir,
                                                 image_size=TARGET_IMG_SIZE,
                                                     label_mode='categorical')
test_all_data  = image_dataset_from_directory(directory=all_test_dir,
                                                     image_size=TARGET_IMG_SIZE,
                                                     label_mode='categorical')
# prepare 10% train and test data
train_ten_percent_data = image_dataset_from_directory(directory=ten_percent_train_dir,
                                                 image_size=TARGET_IMG_SIZE,
                                                     label_mode='categorical')
test_ten_percent_data  = image_dataset_from_directory(directory=ten_percent_test_dir,
                                                     image_size=TARGET_IMG_SIZE,
                                                     label_mode='categorical')
# prepare 1% train and test data
train_one_percent_data  = image_dataset_from_directory(directory=one_percent_train_dir,
                                                      image_size=TARGET_IMG_SIZE,
                                                     label_mode='categorical')
test_one_percent_data   = image_dataset_from_directory(directory=one_percent_test_dir,
                                                      image_size=TARGET_IMG_SIZE,
                                                     label_mode='categorical')
# create the data augmentation layer.
data_augmentation_layer = Sequential([
    RandomContrast(0.2),
    RandomWidth(0.2),
    RandomFlip(mode='horizontal'),
    RandomRotation(0.2),
    RandomHeight(0.2),
    RandomZoom(0.2)
],name='data_augmentation_layer')   

file already exists 10_food_classes_10_percent.zip
file already exists 10_food_classes_1_percent.zip
Found 750 files belonging to 10 classes.
Found 2500 files belonging to 10 classes.
Found 70 files belonging to 10 classes.
Found 2500 files belonging to 10 classes.


## Create Model 0

In [7]:
experiment_name_0 = 'model_0'

# step 1 - create a model with tensorflow applications
base_model = EfficientNetB0(include_top=False)

# step 2 - freeze the base model
base_model.trainable = False

# step 3 - create inputs in our model
inputs = Input(shape=INPUT_SHAPE,name='input_layer')

# step 4 - if using Resnet50V2 we need rescale.
# x = tf.keras.layers.Rescaling(scale=SCALE)(inputs)

# step 5 - pass the inputs to base model
x = base_model(inputs)
print(f'shape after passing the inputs to base model {x.shape}')

# step 6 - Average Pooling
x = GlobalAveragePooling2D(name='global_avaerage_pooling_layer')(x)
#x = GlobalAveragePooling2D(name='global_average_pooling_layer')(x)
print(f'shape after passing through average pool {x.shape}')

# step 7 - create the output layer
outputs = Dense(LABELS_SIZE,activation='softmax',name='output_layer')(x)

# step 8 - create the model
model_0 = tf.keras.Model(inputs,outputs)
# compile the model
model_0.compile(loss=CategoricalCrossentropy(),
               optimizer=Adam(),
               metrics=['accuracy'])
# tensor board callback
log_dir = tensorboard_dir_name + '/' + experiment_name_0 + '/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback = TensorBoard(log_dir=log_dir)
# ModelCheckpoint
modelcheckpoint_path_1 = f'{modelcheckpoint_dir_name}/{experiment_name_0}/checkpoint.ckpt' 
checkoutpoint_path="modelcheckpoing/model_0/checkpoint.ckpt"
modelcheckpoint_callback = ModelCheckpoint(filepath=checkoutpoint_path, 
                                           save_weights_only=True,
                                           save_best_only=False,
                                           save_freq='epoch',
                                           verbose=1)
model_0.summary()

NameError: name 'INPUT_SHAPE' is not defined

## Create Model 1 - With Data Augmentation Layer

In [74]:
experiment_name_1 = 'model_1'
# step 1 - create the base model
base_model_1 = EfficientNetB0(include_top=False)
# step 2 - Make the base model untrainable.
base_model_1.trainable = False
# step 3 - Create the input layer
inputs_1 = Input(shape=INPUT_SHAPE,name='input_layer_1')
# step 4 - create the data augmentation layer
x = data_augmentation_layer(inputs_1)
# step 5 - pass through the inputs to base model.
x = base_model_1(x,training=False)
# step 6 - pass throug the global max pool
x = GlobalAveragePooling2D(name='global_average_pooling_layer')(x)
# step 7 - define output layer
outputs = Dense(LABELS_SIZE,activation='softmax',name='output_layer')(x)
# step 8 - create the model
model_1 = tf.keras.Model(inputs_1,outputs)
# step 9 - compile the model.
model_1.compile(loss=CategoricalCrossentropy(),
               optimizer=Adam(),
               metrics=['accuracy'])
# step 10 - create tensorboard call back
log_dir = tensorboard_dir_name + '/' + experiment_name_1 + '/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback_for_model_1 = TensorBoard(log_dir=log_dir)
# step 11 - create model checkpoint callback
modelcheckpoint_path_1 = f'{modelcheckpoint_dir_name}/{experiment_name_1}/checkpoint.ckpt' 
modelcheckpoint_callback_for_model_1 = ModelCheckpoint(filepath=modelcheckpoint_path_1, 
                                           save_weights_only=True,
                                           save_best_only=False,
                                           save_freq='epoch',
                                           verbose=1)
# step 10 - print the model summary
model_1.summary()

Model: "model_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer_1 (InputLayer)  [(None, 224, 224, 3)]     0         
                                                                 
 data_augmentation_layer (Se  (None, 224, 224, 3)      0         
 quential)                                                       
                                                                 
 efficientnetb0 (Functional)  (None, None, None, 1280)  4049571  
                                                                 
 global_average_pooling_laye  (None, 1280)             0         
 r (GlobalAveragePooling2D)                                      
                                                                 
 output_layer (Dense)        (None, 10)                12810     
                                                                 
Total params: 4,062,381
Trainable params: 12,810
Non-train

## Create Model 2 - With 10 percent data and Data Augmentation

In [79]:
experiment_name_2 = 'model_2'
# step 1 - create the base model
base_model_2 = EfficientNetB0(include_top=False)
# step 2 - Make the base model untrainable.
base_model_2.trainable = False
# step 3 - Create the input layer
inputs_2 = Input(shape=INPUT_SHAPE,name='input_layer_1')
# step 4 - create the data augmentation layer
x = data_augmentation_layer(inputs_2)
# step 5 - pass through the inputs to base model.
x = base_model_2(x,training=False)
# step 6 - pass throug the global max pool
x = GlobalAveragePooling2D(name='global_average_pooling_layer')(x)
# step 7 - define output layer
outputs = Dense(LABELS_SIZE,activation='softmax',name='output_layer')(x)
# step 8 - create the model
model_2 = tf.keras.Model(inputs_2,outputs)
# step 9 - compile the model.
model_2.compile(loss=CategoricalCrossentropy(),
               optimizer=Adam(),
               metrics=['accuracy'])
# step 10 - create tensorboard call back
log_dir = tensorboard_dir_name + '/' + experiment_name_2 + '/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback_for_model_2 = TensorBoard(log_dir=log_dir)
# step 11 - create model checkpoint callback
modelcheckpoint_path_2 = f'{modelcheckpoint_dir_name}/{experiment_name_2}/checkpoint.ckpt' 
modelcheckpoint_callback_for_model_2 = ModelCheckpoint(filepath=modelcheckpoint_path_2, 
                                           save_weights_only=True,
                                           save_best_only=False,
                                           save_freq='epoch',
                                           verbose=1)
# step 10 - print the model summary
model_2.summary()

Model: "model_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer_1 (InputLayer)  [(None, 224, 224, 3)]     0         
                                                                 
 data_augmentation_layer (Se  (None, 224, 224, 3)      0         
 quential)                                                       
                                                                 
 efficientnetb0 (Functional)  (None, None, None, 1280)  4049571  
                                                                 
 global_average_pooling_laye  (None, 1280)             0         
 r (GlobalAveragePooling2D)                                      
                                                                 
 output_layer (Dense)        (None, 10)                12810     
                                                                 
Total params: 4,062,381
Trainable params: 12,810
Non-train

## Fit All the Models

In [None]:
# fit model 0
history_0 = model_0.fit(train_ten_percent_data,epochs=5,
                     steps_per_epoch=len(train_ten_percent_data),
                     validation_data=test_ten_percent_data,
                     validation_steps=int(0.25 *len(test_ten_percent_data)),
                     callbacks=[tensorboard_callback,modelcheckpoint_callback])
# fit model 1
history_1 = model_1.fit(train_one_percent_data,epochs=5,
                     steps_per_epoch=len(train_one_percent_data),
                     validation_data=test_one_percent_data,
                     validation_steps=int(0.25 *len(test_one_percent_data)),
                     callbacks=[tensorboard_callback_for_model_1,modelcheckpoint_callback_for_model_1])
# fit model 1
history_2 = model_2.fit(train_ten_percent_data,epochs=5,
                     steps_per_epoch=len(train_ten_percent_data),
                     validation_data=test_ten_percent_data,
                     validation_steps=int(0.25 *len(test_ten_percent_data)),
                     callbacks=[tensorboard_callback_for_model_2,modelcheckpoint_callback_for_model_2])

## Model 3 - Fine Tuning from feature extraction from Model2

In [1]:
experiment_name_3 = 'model_3'
# step 1 - create the base model
base_model_3 = EfficientNetB0(include_top=False)
# step 2 - Make the base model untrainable.
base_model_3.trainable = False
# step 3 - Create the input layer
inputs_3 = Input(shape=INPUT_SHAPE,name='input_layer_1')
# step 4 - create the data augmentation layer
x = data_augmentation_layer(inputs_3)
# step 5 - pass through the inputs to base model.
x = base_model_3(x,training=False)
# step 6 - pass throug the global max pool
x = GlobalAveragePooling2D(name='global_average_pooling_layer')(x)
# step 7 - define output layer
outputs = Dense(LABELS_SIZE,activation='softmax',name='output_layer')(x)
# step 8 - create the model
model_3 = tf.keras.Model(inputs_3,outputs)
# step 9 - load weights from previous model check point file.
model_3.load_weights(modelcheckpoint_path_2)

# step 10 - create tensorboard call back
log_dir = tensorboard_dir_name + '/' + experiment_name_3 + '/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback_for_model_3 = TensorBoard(log_dir=log_dir)
# step 11 - create model checkpoint callback
modelcheckpoint_path_3 = f'{modelcheckpoint_dir_name}/{experiment_name_3}/checkpoint.ckpt' 
modelcheckpoint_callback_for_model_2 = ModelCheckpoint(filepath=modelcheckpoint_path_3, 
                                           save_weights_only=True,
                                           save_best_only=False,
                                           save_freq='epoch',
                                           verbose=1)
# step 10 - print the model summary
model_3.summary()

NameError: name 'EfficientNetB0' is not defined

In [None]:
# unfreeze top 10 layers in EfficientNetB0 for learning.
def make_model_trainable(model,no_of_layers):
    model.trainable = True
    for layer in model.layers[:-10]:
        layer.trainable = False
    # print trainable layers and it's index
    for layer_number,layer in enumerate(model.layers):
        if layer.trainable == True:
            print(f'the layer in {model.name} at index {layer_number} is trainable {layer.trainable}')
make_model_trainable(model_3.layers[2],10)
# compile the model.
model_3.compile(loss=CategoricalCrossentropy(),
               optimizer=Adam(lr=0.0001),
               metrics=['accuracy'])
# fit the model 3
initial_epochs = 5
epochs = initial_epochs + 5
history_3 = model_3.fit(train_ten_percent_data,epochs=epochs,
                     steps_per_epoch=len(train_ten_percent_data),
                     validation_data=test_ten_percent_data,
                     validation_steps=int(0.25 *len(test_ten_percent_data)),
                        initial_epoch = history_2.epoch[-1],
                     callbacks=[tensorboard_callback_for_model_3,modelcheckpoint_callback_for_model_3])

## Model 4 - Same as Model 3 but with 100% of the test data.

In [None]:
experiment_name_4 = 'model_4'
# step 1 - create the base model
base_model_4 = EfficientNetB0(include_top=False)
# step 2 - Make the base model untrainable.
base_model_4.trainable = False
# step 3 - Create the input layer
inputs_4 = Input(shape=INPUT_SHAPE,name='input_layer_1')
# step 4 - create the data augmentation layer
x = data_augmentation_layer(inputs_4)
# step 5 - pass through the inputs to base model.
x = base_model_4(x,training=False)
# step 6 - pass throug the global max pool
x = GlobalAveragePooling2D(name='global_average_pooling_layer')(x)
# step 7 - define output layer
outputs_4 = Dense(LABELS_SIZE,activation='softmax',name='output_layer')(x)
# step 8 - create the model
model_4 = tf.keras.Model(inputs_4,outputs_4)
# step 9 - load weights from previous model check point file.
model_4.load_weights(modelcheckpoint_path_2)

# step 10 - create tensorboard call back
log_dir = tensorboard_dir_name + '/' + experiment_name_4 + '/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
tensorboard_callback_for_model_4 = TensorBoard(log_dir=log_dir)
# step 11 - create model checkpoint callback
modelcheckpoint_path_4 = f'{modelcheckpoint_dir_name}/{experiment_name_4}/checkpoint.ckpt' 
modelcheckpoint_callback_for_model_4 = ModelCheckpoint(filepath=modelcheckpoint_path_4, 
                                           save_weights_only=True,
                                           save_best_only=False,
                                           save_freq='epoch',
                                           verbose=1)
# step 10 - print the model summary
model_4.summary()

In [None]:
# unfreeze top 10 layers in EfficientNetB0 for learning.
def make_model_trainable(model,no_of_layers):
    model.trainable = True
    for layer in model.layers[:-10]:
        layer.trainable = False
    # print trainable layers and it's index
    for layer_number,layer in enumerate(model.layers):
        if layer.trainable == True:
            print(f'the layer in {model.name} at index {layer_number} is trainable {layer.trainable}')
make_model_trainable(model_4.layers[2],10)
# compile the model.
model_4.compile(loss=CategoricalCrossentropy(),
               optimizer=Adam(lr=0.0001),
               metrics=['accuracy'])
# fit the model 3
initial_epochs = 5
epochs = initial_epochs + 5
history_4 = model_4.fit(train_all_data,epochs=epochs,
                     steps_per_epoch=len(train_all_data),
                     validation_data=test_all_data,
                     validation_steps=int(0.25 *len(test_all_data)),
                        initial_epoch = history_2.epoch[-1],
                     callbacks=[tensorboard_callback_for_model_4,modelcheckpoint_callback_for_model_4])

In [8]:
def compare_historys(original_history, new_history, initial_epochs=5):
    """
    Compares two model history objects.
    """
    # Get original history measurements
    acc = original_history.history["accuracy"]
    loss = original_history.history["loss"]

    print(len(acc))

    val_acc = original_history.history["val_accuracy"]
    val_loss = original_history.history["val_loss"]

    # Combine original history with new history
    total_acc = acc + new_history.history["accuracy"]
    total_loss = loss + new_history.history["loss"]

    total_val_acc = val_acc + new_history.history["val_accuracy"]
    total_val_loss = val_loss + new_history.history["val_loss"]

    print(len(total_acc))
    print(total_acc)

    # Make plots
    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(total_acc, label='Training Accuracy')
    plt.plot(total_val_acc, label='Validation Accuracy')
    plt.plot([initial_epochs-1, initial_epochs-1],
              plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(total_loss, label='Training Loss')
    plt.plot(total_val_loss, label='Validation Loss')
    plt.plot([initial_epochs-1, initial_epochs-1],
              plt.ylim(), label='Start Fine Tuning') # reshift plot around epochs
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()

In [None]:
# https://tensorboard.dev/experiment/VELvwU9kQKCLEYIMxH1WdQ/