In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Add, Dense, Activation, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D, Dropout
from tensorflow.keras.models import Model, load_model, Sequential
import os
import sys

In [None]:
train_images, test_images = tf.keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
images, labels = train_images

In [None]:
data = {}
data['images'] = images
data['labels'] = labels

data.keys()

index = data['labels']<7

## Task 1

Lets use dictionories to split the data into Source and the Target Domain.

In [None]:
source_images, source_labels = data['images'][index], data['labels'][index];
target_images, target_labels = data['images'][~index], data['labels'][~index];

In [None]:
print(source_images.shape, source_labels.shape)
print(target_images.shape, target_labels.shape)

(41935, 28, 28) (41935,)
(18065, 28, 28) (18065,)


The Sorce domain has many samples as there are 7 classes whilst the target data has the remaining 3 classes.

In [None]:
source_images = (source_images/255.0)
source_images = source_images.astype(np.float32)
source_images = source_images.reshape(source_images.shape[0], 28, 28, 1);
source_labels = source_labels.astype(np.int32)

source_limit = int(0.8*source_images.shape[0])

source_test_images = source_images[source_limit:-1]
source_test_labels = source_labels[source_limit:-1]

target_images = (target_images/255.0)
target_images = target_images.astype(np.float32)
target_labels = target_labels.astype(np.int32)

target_limit = int(0.8*target_images.shape[0])
target_images = target_images.reshape(target_images.shape[0], 28, 28, 1)
target_test_images = target_images[target_limit:-1]
target_test_labels = target_labels[target_limit:-1]



input_shape = source_images[0].shape
n_channels = input_shape[-1]

dataset_source = tf.data.Dataset.from_tensor_slices((source_images[0:source_limit], source_labels[0:source_limit]))
dataset_target = tf.data.Dataset.from_tensor_slices((target_images[0:target_limit], target_labels[0:target_limit]))
# Creating the data batches

ds_source = dataset_source.shuffle(5000).batch(64)
ds_target = dataset_target.shuffle(5000).batch(64)

Normalising and getting the data into right formats for easier training process. 

The Source and Target domain model has the same architecture with diffenrent output layers, so as to compare the performance of the Networks.

In [None]:
model_source = Sequential() 
model_source.add(Conv2D(10, kernel_size=(3,3), input_shape=input_shape))
model_source.add(MaxPooling2D(pool_size=(2,2)))
model_source.add(Flatten())
model_source.add(Dense(15,activation = tf.nn.relu))
model_source.add(Dropout(0.3))
model_source.add(Dense(7,activation=tf.nn.softmax))

It is important to describe the model at every stage as it helps us keep the track of layers and prameters as we proceed to Transfer Learning.

In [None]:
model_source.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')
model_source.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 10)        0         
_________________________________________________________________
flatten (Flatten)            (None, 1690)              0         
_________________________________________________________________
dense (Dense)                (None, 15)                25365     
_________________________________________________________________
dropout (Dropout)            (None, 15)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 112       
Total params: 25,577
Trainable params: 25,577
Non-trainable params: 0
____________________________________________________

In [None]:
model_source.fit(ds_source, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
525/525 - 1s - loss: 0.5688 - accuracy: 0.8088
Epoch 2/10
525/525 - 1s - loss: 0.3712 - accuracy: 0.8770
Epoch 3/10
525/525 - 1s - loss: 0.3239 - accuracy: 0.8928
Epoch 4/10
525/525 - 1s - loss: 0.2809 - accuracy: 0.9076
Epoch 5/10
525/525 - 1s - loss: 0.2389 - accuracy: 0.9219
Epoch 6/10
525/525 - 1s - loss: 0.2199 - accuracy: 0.9267
Epoch 7/10
525/525 - 1s - loss: 0.1979 - accuracy: 0.9328
Epoch 8/10
525/525 - 1s - loss: 0.1852 - accuracy: 0.9367
Epoch 9/10
525/525 - 1s - loss: 0.1657 - accuracy: 0.9439
Epoch 10/10
525/525 - 1s - loss: 0.1594 - accuracy: 0.9447


<tensorflow.python.keras.callbacks.History at 0x7fb782e28630>

Training the Source model to compare the performance later after transfer learning.

Defining the target model and training the target model separately. 

In [None]:
model_target = Sequential() 
model_target.add(Conv2D(10, kernel_size=(3,3), input_shape=input_shape))
model_target.add(MaxPooling2D(pool_size=(2,2)))
model_target.add(Flatten())
model_target.add(Dense(15,activation = tf.nn.relu))
model_target.add(Dropout(0.3))
model_target.add(Dense(10, activation='softmax'))

In [None]:
model_target.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')
model_target.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 10)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1690)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 15)                25365     
_________________________________________________________________
dropout_1 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
__________________________________________________

In [None]:
model_target.fit(ds_target, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
226/226 - 0s - loss: 0.5938 - accuracy: 0.8031
Epoch 2/10
226/226 - 0s - loss: 0.2312 - accuracy: 0.9188
Epoch 3/10
226/226 - 0s - loss: 0.1868 - accuracy: 0.9373
Epoch 4/10
226/226 - 0s - loss: 0.1764 - accuracy: 0.9378
Epoch 5/10
226/226 - 1s - loss: 0.1672 - accuracy: 0.9450
Epoch 6/10
226/226 - 1s - loss: 0.1555 - accuracy: 0.9464
Epoch 7/10
226/226 - 1s - loss: 0.1499 - accuracy: 0.9509
Epoch 8/10
226/226 - 0s - loss: 0.1428 - accuracy: 0.9514
Epoch 9/10
226/226 - 0s - loss: 0.1340 - accuracy: 0.9561
Epoch 10/10
226/226 - 0s - loss: 0.1275 - accuracy: 0.9587


<tensorflow.python.keras.callbacks.History at 0x7fb6ebfd1748>

Evaluationg our Source and Target models.

In [None]:
model_source.evaluate(x = source_test_images, y = source_test_labels, batch_size=32, verbose=1)



[0.06533998250961304, 0.9815168380737305]

In [None]:
model_target.evaluate(target_test_images, target_test_labels, 32, verbose=1)



[0.0738743469119072, 0.974252462387085]

In [None]:
target_test_images.shape

(3612, 28, 28, 1)

Let us create a flag model now use the trained model and drop the layers of the trained model without disturbing the trained model to be used later on.

In [None]:
flag_model = tf.keras.models.clone_model(model_source)
flag_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 10)        0         
_________________________________________________________________
flatten (Flatten)            (None, 1690)              0         
_________________________________________________________________
dense (Dense)                (None, 15)                25365     
_________________________________________________________________
dropout (Dropout)            (None, 15)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 112       
Total params: 25,577
Trainable params: 25,577
Non-trainable params: 0
____________________________________________________

The flag model now has the same trained parameters as the source model.

In [None]:
flag_model = Model(flag_model.input, flag_model.layers[-2].output)
flag_model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_input (InputLayer)    [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 10)        0         
_________________________________________________________________
flatten (Flatten)            (None, 1690)              0         
_________________________________________________________________
dense (Dense)                (None, 15)                25365     
_________________________________________________________________
dropout (Dropout)            (None, 15)                0         
Total params: 25,465
Trainable params: 25,465
Non-trainable params: 0
__________________________________________________

The flag model after dropping the layer.

Lets use the last layer of the Target model on the source model and train it and then compare the performance of the Network.

In [None]:
t_model = Sequential()
t_model.add(flag_model)
t_model.add(model_target.get_layer(index=-1))

In [None]:
t_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
functional_1 (Functional)    (None, 15)                25465     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
_________________________________________________________________


In [None]:
t_model.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
t_model.fit(ds_target, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
226/226 - 1s - loss: 0.3845 - accuracy: 0.8611
Epoch 2/10
226/226 - 1s - loss: 0.1803 - accuracy: 0.9361
Epoch 3/10
226/226 - 1s - loss: 0.1445 - accuracy: 0.9508
Epoch 4/10
226/226 - 1s - loss: 0.1323 - accuracy: 0.9546
Epoch 5/10
226/226 - 0s - loss: 0.1186 - accuracy: 0.9604
Epoch 6/10
226/226 - 0s - loss: 0.1061 - accuracy: 0.9639
Epoch 7/10
226/226 - 0s - loss: 0.0986 - accuracy: 0.9673
Epoch 8/10
226/226 - 1s - loss: 0.0892 - accuracy: 0.9710
Epoch 9/10
226/226 - 0s - loss: 0.0827 - accuracy: 0.9707
Epoch 10/10
226/226 - 0s - loss: 0.0785 - accuracy: 0.9742


<tensorflow.python.keras.callbacks.History at 0x7fb6ea621c50>

In [None]:
t_model.evaluate(target_test_images, target_test_labels, 32, verbose=1)



[0.059141792356967926, 0.9806201457977295]

The test acuuracy is significantly improved, the hidden
layers are trained on the Source data and the classification layer is trained on the Target data.





## Observations

How well did the transfer learning work, compared to training on the target data set from scratch?
You can e.g. compare:

achieved accuracy

steps until convergence

generalization

The source model and target model accuracy before transfer learning is outstanding at 98% and 97% respectively. The final model where the last layer to the source domain is the last layer of the target domain. Test accuracy is significantly improved after transfer learning at 98% for the target domain. One thing to be noted here is the learning is faster through transfer learning. 

## Task 2

### Task 2.1

Doing the same with MNIST and FMNIST.

Mnist as target and Fmnist as source.

In [None]:
source_train, source_test = tf.keras.datasets.fashion_mnist.load_data()
source_train_images, source_train_labels = source_train
source_test_images, source_test_labels = source_test

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [None]:
target_train, target_test = tf.keras.datasets.mnist.load_data()
target_train_images, target_train_labels = target_train
target_test_images, target_test_labels = target_test

In [None]:
print(source_train_images.shape, source_train_images.shape)
print(target_train_images.shape, target_train_images.shape)

(60000, 28, 28) (60000, 28, 28)
(60000, 28, 28) (60000, 28, 28)


In [None]:
source_train_images = (source_train_images/255.0)
source_train_images = source_train_images.astype(np.float32)
source_train_images = source_train_images.reshape(source_train_images.shape[0], 28, 28, 1);
source_train_labels = source_train_labels.astype(np.int32)

source_test_images = (source_test_images/255.0)
source_test_images = source_test_images.astype(np.float32)
source_test_images = source_test_images.reshape(source_test_images.shape[0], 28, 28, 1);
source_test_labels = source_test_labels.astype(np.int32)

In [None]:
target_train_images = (target_train_images/255.0)
target_train_images = target_train_images.astype(np.float32)
target_train_images = target_train_images.reshape(target_train_images.shape[0], 28, 28, 1);
target_train_labels = target_train_labels.astype(np.int32)

target_test_images = (target_test_images/255.0)
target_test_images = target_test_images.astype(np.float32)
target_test_images = target_test_images.reshape(target_test_images.shape[0], 28, 28, 1);
target_test_labels = target_test_labels.astype(np.int32)

In [None]:
dataset_source = tf.data.Dataset.from_tensor_slices((source_train_images, source_train_labels))
dataset_target = tf.data.Dataset.from_tensor_slices((target_train_images, target_train_labels))
# Creating the data batches

ds_source = dataset_source.shuffle(5000).batch(64)
ds_target = dataset_target.shuffle(5000).batch(64)

In [None]:
model_f = Sequential() 
model_f.add(Conv2D(10, kernel_size=(3,3), input_shape=input_shape))
model_f.add(MaxPooling2D(pool_size=(2,2)))
model_f.add(Flatten())
model_f.add(Dense(15,activation = tf.nn.relu))
model_f.add(Dropout(0.3))
model_f.add(Dense(10,activation=tf.nn.softmax))

In [None]:
model_f.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')
model_f.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 10)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1690)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 15)                25365     
_________________________________________________________________
dropout_2 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
__________________________________________________

In [None]:
model_f.fit(ds_source, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
938/938 - 2s - loss: 1.0780 - accuracy: 0.5883
Epoch 2/10
938/938 - 2s - loss: 0.8107 - accuracy: 0.6825
Epoch 3/10
938/938 - 2s - loss: 0.7318 - accuracy: 0.7149
Epoch 4/10
938/938 - 2s - loss: 0.6590 - accuracy: 0.7492
Epoch 5/10
938/938 - 2s - loss: 0.6238 - accuracy: 0.7666
Epoch 6/10
938/938 - 2s - loss: 0.5969 - accuracy: 0.7786
Epoch 7/10
938/938 - 2s - loss: 0.5810 - accuracy: 0.7877
Epoch 8/10
938/938 - 2s - loss: 0.5645 - accuracy: 0.7925
Epoch 9/10
938/938 - 2s - loss: 0.5360 - accuracy: 0.8016
Epoch 10/10
938/938 - 2s - loss: 0.5240 - accuracy: 0.8055


<tensorflow.python.keras.callbacks.History at 0x7fb74ee6b6a0>

In [None]:
model_m = Sequential() 
model_m.add(Conv2D(10, kernel_size=(3,3), input_shape=input_shape))
model_m.add(MaxPooling2D(pool_size=(2,2)))
model_m.add(Flatten())
model_m.add(Dense(15,activation = tf.nn.relu))
model_m.add(Dropout(0.3))
model_m.add(Dense(10, activation='softmax'))

In [None]:
model_m.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')
model_m.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 13, 13, 10)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 1690)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 15)                25365     
_________________________________________________________________
dropout_3 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
__________________________________________________

In [None]:
model_m.fit(ds_target, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
938/938 - 2s - loss: 0.8107 - accuracy: 0.7286
Epoch 2/10
938/938 - 2s - loss: 0.5411 - accuracy: 0.8247
Epoch 3/10
938/938 - 2s - loss: 0.4603 - accuracy: 0.8509
Epoch 4/10
938/938 - 2s - loss: 0.3940 - accuracy: 0.8728
Epoch 5/10
938/938 - 2s - loss: 0.3476 - accuracy: 0.8884
Epoch 6/10
938/938 - 2s - loss: 0.3218 - accuracy: 0.8952
Epoch 7/10
938/938 - 2s - loss: 0.2973 - accuracy: 0.9028
Epoch 8/10
938/938 - 2s - loss: 0.2848 - accuracy: 0.9044
Epoch 9/10
938/938 - 2s - loss: 0.2706 - accuracy: 0.9101
Epoch 10/10
938/938 - 2s - loss: 0.2639 - accuracy: 0.9105


<tensorflow.python.keras.callbacks.History at 0x7fb6ea4d2048>

In [None]:
model_f.evaluate(x = source_test_images, y = source_test_labels, batch_size=32, verbose=1)



[0.3710150718688965, 0.8712000250816345]

In [None]:
model_m.evaluate(x = target_test_images, y = target_test_labels, batch_size=32, verbose=1)



[0.10602888464927673, 0.9678999781608582]

In [None]:
flag_model2 = tf.keras.models.clone_model(model_f)
flag_model2.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 10)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1690)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 15)                25365     
_________________________________________________________________
dropout_2 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
__________________________________________________

In [None]:
flag_model2 = Model(flag_model2.input, flag_model2.layers[-2].output)
flag_model2.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2_input (InputLayer)  [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 10)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1690)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 15)                25365     
_________________________________________________________________
dropout_2 (Dropout)          (None, 15)                0         
Total params: 25,465
Trainable params: 25,465
Non-trainable params: 0
__________________________________________________

In [None]:
tr_model = Sequential()
tr_model.add(flag_model2)
tr_model.add(model_m.get_layer(index=-1))

In [None]:
tr_model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
functional_3 (Functional)    (None, 15)                25465     
_________________________________________________________________
dense_7 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
_________________________________________________________________


In [None]:
tr_model.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
tr_model.fit(ds_target, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
938/938 - 2s - loss: 0.6151 - accuracy: 0.7989
Epoch 2/10
938/938 - 2s - loss: 0.4143 - accuracy: 0.8670
Epoch 3/10
938/938 - 2s - loss: 0.3457 - accuracy: 0.8896
Epoch 4/10
938/938 - 2s - loss: 0.3087 - accuracy: 0.8996
Epoch 5/10
938/938 - 2s - loss: 0.2854 - accuracy: 0.9077
Epoch 6/10
938/938 - 2s - loss: 0.2629 - accuracy: 0.9155
Epoch 7/10
938/938 - 2s - loss: 0.2550 - accuracy: 0.9169
Epoch 8/10
938/938 - 2s - loss: 0.2407 - accuracy: 0.9213
Epoch 9/10
938/938 - 2s - loss: 0.2358 - accuracy: 0.9222
Epoch 10/10
938/938 - 2s - loss: 0.2295 - accuracy: 0.9259


<tensorflow.python.keras.callbacks.History at 0x7fb6ea2199b0>

In [None]:
tr_model.evaluate(target_test_images, target_test_labels, 32, verbose=1)



[0.09717144817113876, 0.9713000059127808]

 Observations

Mnist as target and Fmnist as source.

Target domain: Mnist

Source domain: Fmnist

Model works better for both the domains with test accuracy for source domain at 87% and target domain test accuracy at 96%. Model is not overfitting. Transfer learning works better here as the test accuracy for the final model where the classification layer is of the target model is 97.13%. We can see that the final model generalises well enough. 

### Task 2.2

Mnist as source and Fmnist as target.

In [None]:
flag_model3 = tf.keras.models.clone_model(model_m)
flag_model3.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 13, 13, 10)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 1690)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 15)                25365     
_________________________________________________________________
dropout_3 (Dropout)          (None, 15)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
__________________________________________________

In [None]:
flag_model3 = Model(flag_model3.input, flag_model3.layers[-2].output)
flag_model3.summary()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3_input (InputLayer)  [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 26, 26, 10)        100       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 13, 13, 10)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 1690)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 15)                25365     
_________________________________________________________________
dropout_3 (Dropout)          (None, 15)                0         
Total params: 25,465
Trainable params: 25,465
Non-trainable params: 0
__________________________________________________

In [None]:
trf_model = Sequential()
trf_model.add(flag_model3)
trf_model.add(model_f.get_layer(index=-1))

In [None]:
trf_model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
functional_5 (Functional)    (None, 15)                25465     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
_________________________________________________________________


In [None]:
trf_model.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
trf_model.fit(ds_source, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
938/938 - 2s - loss: 0.8123 - accuracy: 0.6889
Epoch 2/10
938/938 - 2s - loss: 0.6298 - accuracy: 0.7629
Epoch 3/10
938/938 - 2s - loss: 0.5931 - accuracy: 0.7795
Epoch 4/10
938/938 - 2s - loss: 0.5756 - accuracy: 0.7849
Epoch 5/10
938/938 - 2s - loss: 0.5573 - accuracy: 0.7928
Epoch 6/10
938/938 - 2s - loss: 0.5443 - accuracy: 0.7969
Epoch 7/10
938/938 - 2s - loss: 0.5302 - accuracy: 0.8010
Epoch 8/10
938/938 - 2s - loss: 0.5220 - accuracy: 0.8058
Epoch 9/10
938/938 - 2s - loss: 0.4973 - accuracy: 0.8149
Epoch 10/10
938/938 - 2s - loss: 0.4790 - accuracy: 0.8189


<tensorflow.python.keras.callbacks.History at 0x7fb6ea0e9588>

In [None]:
trf_model.evaluate(source_test_images, source_test_labels, 32, verbose=1)



[0.35560479760169983, 0.8744000196456909]

 Observations

Target domain: Fmnist

Source domain: Mnist

Model works better for both the domains with test accuracy for source domain at 87% and target domain test accuracy at 96%. Model is not overfitting. Transfer learning does not work as better as the previous case, here as the test accuracy for the final model where the classification layer is of the target model is 87.44%. We can see that the final model does not generalise well enough as the previous one. 

## Task 3

freezing all pre-trained layers (i.e. set trainable to False). Lets take the same tasks as above with Mnist and Fmnist and compare the performance after freezing the pre trained layers.

In [None]:
flag_model3.trainable = False
trf_model = Sequential( [ flag_model3, model_f.get_layer(index=-1)])

In [None]:
trf_model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
functional_5 (Functional)    (None, 15)                25465     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 160
Non-trainable params: 25,465
_________________________________________________________________


Freezing the pre trained model

In [None]:
trf_model.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
trf_model.fit(ds_source, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
938/938 - 2s - loss: 0.4629 - accuracy: 0.8266
Epoch 2/10
938/938 - 2s - loss: 0.4659 - accuracy: 0.8248
Epoch 3/10
938/938 - 2s - loss: 0.4621 - accuracy: 0.8259
Epoch 4/10
938/938 - 2s - loss: 0.4590 - accuracy: 0.8278
Epoch 5/10
938/938 - 2s - loss: 0.4587 - accuracy: 0.8280
Epoch 6/10
938/938 - 2s - loss: 0.4606 - accuracy: 0.8290
Epoch 7/10
938/938 - 2s - loss: 0.4571 - accuracy: 0.8283
Epoch 8/10
938/938 - 2s - loss: 0.4593 - accuracy: 0.8264
Epoch 9/10
938/938 - 2s - loss: 0.4555 - accuracy: 0.8291
Epoch 10/10
938/938 - 2s - loss: 0.4583 - accuracy: 0.8297


<tensorflow.python.keras.callbacks.History at 0x7fb6e04c8a90>

In [None]:
trf_model.evaluate(source_test_images, source_test_labels, 32, verbose=1)



[0.3539397120475769, 0.8788999915122986]

freezing only some bottom layers

In [None]:
flag_model3.trainable = True
trf_model = Sequential( [ flag_model3, model_f.get_layer(index=-1)])

In [None]:
trf_model.layers[-1].trainable = False
trf_model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
functional_5 (Functional)    (None, 15)                25465     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,465
Non-trainable params: 160
_________________________________________________________________


In [None]:
trf_model.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
trf_model.fit(ds_source, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
938/938 - 2s - loss: 0.4683 - accuracy: 0.8255
Epoch 2/10
938/938 - 2s - loss: 0.4547 - accuracy: 0.8306
Epoch 3/10
938/938 - 2s - loss: 0.4457 - accuracy: 0.8336
Epoch 4/10
938/938 - 2s - loss: 0.4405 - accuracy: 0.8327
Epoch 5/10
938/938 - 2s - loss: 0.4351 - accuracy: 0.8364
Epoch 6/10
938/938 - 2s - loss: 0.4281 - accuracy: 0.8395
Epoch 7/10
938/938 - 2s - loss: 0.4252 - accuracy: 0.8413
Epoch 8/10
938/938 - 2s - loss: 0.4180 - accuracy: 0.8436
Epoch 9/10
938/938 - 2s - loss: 0.4183 - accuracy: 0.8422
Epoch 10/10
938/938 - 2s - loss: 0.4155 - accuracy: 0.8442


<tensorflow.python.keras.callbacks.History at 0x7fb6e03d9eb8>

In [None]:
trf_model.evaluate(source_test_images, source_test_labels, 32, verbose=1)



[0.33802491426467896, 0.8830999732017517]

replacing the source classification layer by multiple new ones for the target domain

In [None]:
trf_model = Sequential( [ flag_model3, tf.keras.layers.Dense(10, activation='softmax')])

In [None]:
trf_model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
functional_5 (Functional)    (None, 15)                25465     
_________________________________________________________________
dense_8 (Dense)              (None, 10)                160       
Total params: 25,625
Trainable params: 25,625
Non-trainable params: 0
_________________________________________________________________


Adding a new trainable classification layer to the pre trained model.

In [None]:
trf_model.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
trf_model.fit(ds_source, batch_size=64, epochs=10, verbose=2, use_multiprocessing=True)

Epoch 1/10
938/938 - 2s - loss: 0.8484 - accuracy: 0.6973
Epoch 2/10
938/938 - 2s - loss: 0.5489 - accuracy: 0.7930
Epoch 3/10
938/938 - 2s - loss: 0.5010 - accuracy: 0.8110
Epoch 4/10
938/938 - 2s - loss: 0.4825 - accuracy: 0.8185
Epoch 5/10
938/938 - 2s - loss: 0.4774 - accuracy: 0.8203
Epoch 6/10
938/938 - 2s - loss: 0.4486 - accuracy: 0.8331
Epoch 7/10
938/938 - 2s - loss: 0.4307 - accuracy: 0.8421
Epoch 8/10
938/938 - 2s - loss: 0.4248 - accuracy: 0.8454
Epoch 9/10
938/938 - 2s - loss: 0.4144 - accuracy: 0.8493
Epoch 10/10
938/938 - 2s - loss: 0.4101 - accuracy: 0.8512


<tensorflow.python.keras.callbacks.History at 0x7fb6e0281898>

In [None]:
trf_model.evaluate(source_test_images, source_test_labels, 32, verbose=1)



[0.3326401710510254, 0.8840000033378601]

In [None]:
replace_model = Sequential()
replace_model.add(flag_model3)
replace_model.add(Dense(15, activation= tf.nn.relu))
replace_model.add(Dense(10, activation=tf.nn.softmax))

In [None]:
replace_model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
functional_5 (Functional)    (None, 15)                25465     
_________________________________________________________________
dense_9 (Dense)              (None, 15)                240       
_________________________________________________________________
dense_10 (Dense)             (None, 10)                160       
Total params: 25,865
Trainable params: 25,865
Non-trainable params: 0
_________________________________________________________________


In [None]:
replace_model.compile(optimizer='Adam', loss = tf.keras.losses.SparseCategoricalCrossentropy(), metrics='accuracy')

In [None]:
replace_model.fit(ds_source, batch_size=64, verbose=2, epochs=10, use_multiprocessing=True)

Epoch 1/10
938/938 - 2s - loss: 0.9438 - accuracy: 0.6794
Epoch 2/10
938/938 - 2s - loss: 0.5144 - accuracy: 0.8151
Epoch 3/10
938/938 - 2s - loss: 0.4604 - accuracy: 0.8345
Epoch 4/10
938/938 - 2s - loss: 0.4348 - accuracy: 0.8419
Epoch 5/10
938/938 - 2s - loss: 0.4215 - accuracy: 0.8476
Epoch 6/10
938/938 - 2s - loss: 0.4110 - accuracy: 0.8519
Epoch 7/10
938/938 - 2s - loss: 0.4079 - accuracy: 0.8536
Epoch 8/10
938/938 - 2s - loss: 0.3972 - accuracy: 0.8580
Epoch 9/10
938/938 - 2s - loss: 0.3893 - accuracy: 0.8608
Epoch 10/10
938/938 - 2s - loss: 0.3878 - accuracy: 0.8616


<tensorflow.python.keras.callbacks.History at 0x7fb6e01af048>

## Observations

Transfer learning through different ways:

Souce domain: Mnist

Target domian: Fmnist
as Fmnist is more complex than Mnist. 

1. freezing all pre-trained layers:

Model works better for both the domains with test accuracy for source domain at 87% and target domain test accuracy at 96%. Model is not overfitting. Transfer learning does not work better for target domain here as the test accuracy for the final model where the pre-trained layers are freezed is at 87.89%. Generalisation is pretty well in this case.

2. freezing only some bottom layers:

Model works better for both the domains with test accuracy for source domain at 87% and target domain test accuracy at 96%.Model is not overfitting.Transfer learning does not work better for target domain here as the test accuracy for the final model where the classification layer is freezed is at 88%.Generalisation is pretty well in this case.

3. Adding a new trainable classification layer to the pre trained model:

Model works better for both the domains with test accuracy for source domain at 87% and target domain test accuracy at 96%.Model is not overfitting.Transfer learning does not work better for target domain here as the test accuracy for the final model where the classification layer is newly added is at 88%. Generalisation is pretty well in this case.

4. Adding multiple new layers to pre trained model:

A new dense layer and a classification layer is added to the final model. accuracy is decent at 86%. 

