<a href="https://colab.research.google.com/github/rsethi21/transfer_learning/blob/main/TestTransferLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Following notebook is modified from the guide: https://www.tensorflow.org/tutorials/images/transfer_learning, https://www.youtube.com/watch?v=LsdxvjLWkIY

# Imports

In [None]:
# imports
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import cv2
from tensorflow.keras.applications import imagenet_utils
import pathlib
import sklearn
import tensorflow_hub as hub
import random
import pandas as pd

# Data Download and Preprocessing

In [None]:
# download datasets and separate into train and validation sets (already done but otherwise will simply use train,test split)
_URL = 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz'
data_dir = tf.keras.utils.get_file(origin=_URL,
                                   fname='flower_photos',
                                   untar=True, cache_dir='.')
data_dir = pathlib.Path(data_dir)
# train_dir = os.path.join(PATH, 'train')
# validation_dir = os.path.join(PATH, 'validation')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz


In [None]:
# Data preprocessing
class_names = list(os.listdir(data_dir))
class_names.remove('LICENSE.txt')
flowers_image_dict = {
    name: list(data_dir.glob(f'{name}/*')) for name in class_names
}
flowers_labels_dict = {
    class_names[i]: i for i in range(len(class_names))
}

In [None]:
#visualize a sample training image
IMG_SIZE = (224, 224)
X, y = [], []
for flower_name, images in flowers_image_dict.items():
  for image in images:
    img = cv2.imread(str(image))
    resized_img = cv2.resize(img, IMG_SIZE)
    X.append(resized_img)
    y.append(flowers_labels_dict[flower_name])

In [None]:
# split dataset into train, test, val
X = np.array(X)
y = np.array(y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 0, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=0, shuffle=True)
X_train_scaled = X_train/225.0
X_test_scaled = X_test/255.0

# Example PTM Performance Prior to Transfer

In [None]:
# download pretrained model for control comparisons
IMG_SHAPE = IMG_SIZE+(3,)
full_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE, include_top=True,
                                               weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224.h5


In [None]:
predicted = full_model.predict(np.array([X_train_scaled[0], X_train_scaled[23]]))
results = imagenet_utils.decode_predictions(predicted)
print(results)
for result in results:
  scores = []
  for prediction in result:
    scores.append(float(prediction[2].item()))
  max_score = max(scores)
  max_index = scores.index(max_score)
  print(result[max_index])
print(y_train[0])
print(y_train[23])
print(flowers_labels_dict)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
[[('n01494475', 'hammerhead', 0.22574307), ('n03944341', 'pinwheel', 0.21009746), ('n02317335', 'starfish', 0.048465464), ('n04552348', 'warplane', 0.021277947), ('n04592741', 'wing', 0.019486705)], [('n03485794', 'handkerchief', 0.20947656), ('n07714571', 'head_cabbage', 0.19468735), ('n04209133', 'shower_cap', 0.049777754), ('n04525038', 'velvet', 0.044048462), ('n02892767', 'brassiere', 0.031819213)]]
('n01494475', 'hammerhead', 0.22574307)
('n03485794', 'handkerchief', 0.20947656)
3
4
{'sunflowers': 0, 'dandelion': 1, 'daisy': 2, 'tulips': 3, 'roses': 4}


# Set-up an Train Transfer Model

In [None]:
# Create the base model from the pre-trained model MobileNet V2
IMG_SHAPE = IMG_SIZE + (3,)
base_model = hub.KerasLayer('https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4', input_shape=IMG_SHAPE, trainable=False)

In [None]:
prediction_layer = tf.keras.layers.Dense(len(class_names))

In [None]:
# build model to incorporate new head and preprocessor module
transfer_model = tf.keras.Sequential([base_model, prediction_layer])
transfer_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 1280)              2257984   
                                                                 
 dense (Dense)               (None, 5)                 6405      
                                                                 
Total params: 2,264,389
Trainable params: 6,405
Non-trainable params: 2,257,984
_________________________________________________________________


In [None]:
base_learning_rate = 0.0001
transfer_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
transfer_model.fit(X_train_scaled, y_train, epochs=8, validation_data=(X_test_scaled, y_test))

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7fd513455150>

# Transfer Model and PTM Model Evaluation on Test Set

In [None]:
comp_dict = {value: key for key, value in flowers_labels_dict.items()}
y_val_labels = [comp_dict[val] for val in y_val]

In [None]:
pre_trained_predictions = full_model.predict(np.array(X_val)/255.0)
pre_trained_processed_predictions = imagenet_utils.decode_predictions(pre_trained_predictions)
initial_pred = []
for result in pre_trained_processed_predictions:
  scores = []
  for prediction in result:
    scores.append(float(prediction[2].item()))
  max_score = max(scores)
  max_index = scores.index(max_score)
  initial_pred.append(result[max_index][1])



In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_val_labels, initial_pred))

                       precision    recall  f1-score   support

               Angora       0.00      0.00      0.00         0
   Bedlington_terrier       0.00      0.00      0.00         0
       English_setter       0.00      0.00      0.00         0
          Persian_cat       0.00      0.00      0.00         0
                  alp       0.00      0.00      0.00         0
          barn_spider       0.00      0.00      0.00         0
             bassinet       0.00      0.00      0.00         0
          bathing_cap       0.00      0.00      0.00         0
                  bee       0.00      0.00      0.00         0
          bell_pepper       0.00      0.00      0.00         0
               bonnet       0.00      0.00      0.00         0
          book_jacket       0.00      0.00      0.00         0
          brain_coral       0.00      0.00      0.00         0
               bubble       0.00      0.00      0.00         0
    cabbage_butterfly       0.00      0.00      0.00  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
confusion_matrix_pre = sklearn.metrics.confusion_matrix(y_val_labels, initial_pred, labels=class_names)

In [None]:
pd.DataFrame(confusion_matrix_pre, index=[f'is {class_name}' for class_name in class_names], columns=[f'predicted {class_name}' for class_name in class_names])

Unnamed: 0,predicted sunflowers,predicted dandelion,predicted daisy,predicted tulips,predicted roses
is sunflowers,0,0,16,0,0
is dandelion,0,0,4,0,0
is daisy,0,0,34,0,0
is tulips,0,0,8,0,0
is roses,0,0,0,0,0


In [None]:
transfer_model_predictions = transfer_model.predict(np.array(X_val)/255.0)
transfer_model_predictions
tuned_pred = []
for result in transfer_model_predictions:
  scores = []
  for prediction in result:
    scores.append(prediction)
  max_score = max(scores)
  max_index = scores.index(max_score)
  tuned_pred.append(comp_dict[max_index])



In [None]:
print(classification_report(y_val_labels, tuned_pred))

              precision    recall  f1-score   support

       daisy       0.73      0.77      0.75        52
   dandelion       0.88      0.89      0.89        76
       roses       0.73      0.91      0.81        45
  sunflowers       0.80      0.75      0.78        53
      tulips       0.89      0.74      0.81        68

    accuracy                           0.81       294
   macro avg       0.81      0.81      0.81       294
weighted avg       0.82      0.81      0.81       294



In [None]:
confusion_matrix_transfer = sklearn.metrics.confusion_matrix(y_val_labels, tuned_pred, labels=class_names)

In [None]:
pd.DataFrame(confusion_matrix_transfer, index=[f'is {class_name}' for class_name in class_names], columns=[f'predicted {class_name}' for class_name in class_names])

Unnamed: 0,predicted sunflowers,predicted dandelion,predicted daisy,predicted tulips,predicted roses
is sunflowers,40,5,4,1,3
is dandelion,1,68,6,0,1
is daisy,5,3,40,2,2
is tulips,3,1,5,50,9
is roses,1,0,0,3,41
