## Retrain an existing model for new data
- ### freeze existing layers to classify existing data omitting last layer(weight don't change)

In [18]:
import os
import PIL
import cv2
import pathlib
import requests
import numpy as np
import pandas as pd
import seaborn as sn
import tensorflow as tf
import PIL.Image as Image
from tensorflow import keras
import tensorflow_hub as hub
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
%matplotlib inline

# load the pre-trained model
IMAGE_SHAPE = (224, 224)

IMAGE_SHAPE + (3, ) # add rgb layers

classifier = tf.keras.Sequential([hub.KerasLayer("https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4", input_shape=IMAGE_SHAPE+(3,))])

## Predict a goldfish using this existing model

In [19]:
gold_fish = Image.open(requests.get("https://raw.githubusercontent.com/codebasics/deep-learning-keras-tf-tutorial/master/18_transfer_learning/goldfish.jpg", stream=True).raw).resize(IMAGE_SHAPE)

gold_fish = np.array(gold_fish) / 255.0
gold_fish.shape

(224, 224, 3)

In [20]:
result = classifier.predict(gold_fish[np.newaxis, ...])

image_labels = []
with open("16_Image_net_labels.txt", "r") as f:
    image_labels = f.read().splitlines()
image_labels[np.argmax(result)]

'goldfish'

## Load new data and retrain the model

In [23]:
# load, split and preprocessing
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url,  cache_dir='.', untar=True)
data_dir = pathlib.Path(data_dir)

flowers_images_dict = {
    'roses': list(data_dir.glob('roses/*')),
    'daisy': list(data_dir.glob('daisy/*')),
    'dandelion': list(data_dir.glob('dandelion/*')),
    'sunflowers': list(data_dir.glob('sunflowers/*')),
    'tulips': list(data_dir.glob('tulips/*')),
}
flowers_labels_dict = {
    'roses': 0,
    'daisy': 1,
    'dandelion': 2,
    'sunflowers': 3,
    'tulips': 4,
}
X, y = [], []
for name, imgs in flowers_images_dict.items():
    for img in imgs:
        img = cv2.imread(str(img))
        resized_img = cv2.resize(img, (224, 224))
        X.append(resized_img)
        y.append(flowers_labels_dict[name])
X, y = np.array(X), np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
X_train, X_test = X_train / 255, X_test / 255

In [36]:
# try to use classifier to predict our new dataset, way off
print(np.argmax(classifier.predict(np.array([X[0], X[1], X[2]])), axis=1))
print(image_labels[795], image_labels[880], image_labels[795])

key_list = list(flowers_labels_dict.keys())
val_list = list(flowers_labels_dict.values())
print(key_list[val_list.index(0)], key_list[val_list.index(1)], key_list[val_list.index(2)])

[795 880 795]
shower curtain umbrella shower curtain
roses daisy dandelion


In [38]:
# reload the existing model
feature_extractor_model = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"
pretrained_model_without_top_layer = hub.KerasLayer(feature_extractor_model, input_shape=(224, 224, 3), trainable=False)

In [42]:
model = keras.Sequential([
    pretrained_model_without_top_layer,
    keras.layers.Dense(5),
])
model.compile(optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_4 (KerasLayer)   (None, 1280)              2257984   
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 6405      
Total params: 2,264,389
Trainable params: 6,405
Non-trainable params: 2,257,984
_________________________________________________________________


In [44]:
# very fast training time and good accuracy
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x130ba4175e0>

In [45]:
# very good prediction data
y_pred = model.predict(X_test)
y_pred_class = [np.argmax(x) for x in y_pred]
print("Classification Report: \n", classification_report(y_test, y_pred_class))

Classification Report: 
               precision    recall  f1-score   support

           0       0.85      0.86      0.86       176
           1       0.78      0.92      0.84       154
           2       0.95      0.87      0.91       226
           3       0.90      0.87      0.88       150
           4       0.88      0.86      0.87       212

    accuracy                           0.87       918
   macro avg       0.87      0.88      0.87       918
weighted avg       0.88      0.87      0.87       918

