<a href="https://colab.research.google.com/github/shinchan75034/tensorflow-pocket-ref/blob/main/chapter04/Oreilly_chapter_4_transfer_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## TensorFlow Hub

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import matplotlib.pylab as plt

In [None]:
data_dir = tf.keras.utils.get_file(
    'flower_photos',
    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
    untar=True)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz


In [None]:
data_dir

'/root/.keras/datasets/flower_photos'

In [None]:
!ls -lrt /root/.keras/datasets/flower_photos

total 620
-rw-r----- 1 270850 5000 418049 Feb  9  2016 LICENSE.txt
drwx------ 2 270850 5000  45056 Feb 10  2016 tulips
drwx------ 2 270850 5000  36864 Feb 10  2016 sunflowers
drwx------ 2 270850 5000  36864 Feb 10  2016 roses
drwx------ 2 270850 5000  53248 Feb 10  2016 dandelion
drwx------ 2 270850 5000  40960 Feb 10  2016 daisy


Now we are going to use a generator object and invoke its `flow_from_directory` to handle the reshaping process.

In [None]:
pixels =224
BATCH_SIZE = 32 
IMAGE_SIZE = (pixels, pixels)
NUM_CLASSES = 5

In [None]:
datagen_kwargs = dict(rescale=1./255, validation_split=.20)
dataflow_kwargs = dict(target_size=IMAGE_SIZE, batch_size=BATCH_SIZE,
                   interpolation="bilinear")

valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    **datagen_kwargs)
valid_generator = valid_datagen.flow_from_directory(
    data_dir, subset="validation", shuffle=False, **dataflow_kwargs)

train_datagen = valid_datagen
train_generator = train_datagen.flow_from_directory(
    data_dir, subset="training", shuffle=True, **dataflow_kwargs)

Found 731 images belonging to 5 classes.
Found 2939 images belonging to 5 classes.


Find label index and order of classes

In [None]:
labels_idx = (train_generator.class_indices)
idx_labels = dict((v,k) for k,v in labels_idx.items())

In [None]:
idx_labels

{0: 'daisy', 1: 'dandelion', 2: 'roses', 3: 'sunflowers', 4: 'tulips'}

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)),
    hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v1_101/feature_vector/4",
                   trainable=False),
    tf.keras.layers.Dense(NUM_CLASSES, activation='softmax', name = 'flower_class')
])
model.build([None, 224, 224, 3])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 2048)              42605504  
_________________________________________________________________
flower_class (Dense)         (None, 5)                 10245     
Total params: 42,615,749
Trainable params: 10,245
Non-trainable params: 42,605,504
_________________________________________________________________


In [None]:
model.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy'])

In [None]:
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = valid_generator.samples // valid_generator.batch_size
hist = model.fit(
    train_generator,
    epochs=5, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps).history

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
sample_test_images, ground_truth_labels = next(valid_generator)

In [None]:
type(sample_test_images)

numpy.ndarray

In [None]:
len(ground_truth_labels)

32

In [None]:
prediction = model.predict(valid_generator)

In [None]:
len(prediction)

731

In [None]:
prediction

array([[9.85499024e-01, 1.88068618e-04, 1.49480900e-10, 1.43129453e-02,
        5.68702276e-08],
       [9.98949230e-01, 1.33055210e-05, 2.35849057e-12, 1.03748892e-03,
        9.11507136e-09],
       [9.82492685e-01, 1.48683535e-02, 2.36548458e-05, 2.59268004e-03,
        2.26959637e-05],
       ...,
       [6.02934379e-05, 1.69046167e-02, 2.88234860e-01, 4.45079943e-03,
        6.90349400e-01],
       [2.47006869e-06, 2.84596535e-05, 8.25712868e-06, 1.88458387e-07,
        9.99960661e-01],
       [5.68267074e-04, 2.16917172e-02, 1.91001443e-03, 7.87149533e-04,
        9.75042880e-01]], dtype=float32)

In [None]:
labelings = tf.math.argmax(prediction, axis = -1)

In [None]:
predicted_idx = tf.math.argmax(prediction, axis = -1)

In [None]:
predicted_idx

<tf.Tensor: shape=(731,), dtype=int64, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 0, 3, 1, 1, 1, 4, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,
     

In [None]:
idx_labels[0]

'daisy'

In [None]:
label_reference = np.asarray(list(labels_idx))
label_reference

array(['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips'],
      dtype='<U10')

In [None]:
def find_label(idx):
    return idx_labels[idx]

In [None]:
find_label_batch = np.vectorize(find_label)
result_class = find_label_batch(predicted_idx)

In [None]:
import pandas as pd
predicted_label = result_class.tolist()
file_name = valid_generator.filenames

results=pd.DataFrame({"File":file_name,
                      "Prediction":predicted_label})
results

Unnamed: 0,File,Prediction
0,daisy/100080576_f52e8ee070_n.jpg,daisy
1,daisy/10140303196_b88d3d6cec.jpg,daisy
2,daisy/10172379554_b296050f82_n.jpg,daisy
3,daisy/10172567486_2748826a8b.jpg,daisy
4,daisy/10172636503_21bededa75_n.jpg,daisy
...,...,...
726,tulips/14068200854_5c13668df9_m.jpg,tulips
727,tulips/14068295074_cd8b85bffa.jpg,tulips
728,tulips/14068348874_7b36c99f6a.jpg,tulips
729,tulips/14068378204_7b26baa30d_n.jpg,tulips


In [None]:
y_actual = pd.Series(valid_generator.classes)
y_predicted = pd.Series(predicted_idx)

In [None]:
pd.crosstab(y_actual, y_predicted, rownames = ['Actual'], colnames=['Predicted'], margins=True)

Predicted,0,1,2,3,4,All
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,116,7,2,1,0,126
1,2,162,3,8,4,179
2,3,1,104,5,15,128
3,0,9,4,121,5,139
4,3,2,8,10,136,159
All,124,181,121,145,160,731


In [None]:
predicted_results = y_predicted
truth = y_actual

In [None]:
type(predicted_results[0])

numpy.int64

In [None]:
from sklearn.metrics import classification_report
report = classification_report(truth, predicted_results)


In [None]:
print(report)

              precision    recall  f1-score   support

           0       0.94      0.92      0.93       126
           1       0.90      0.91      0.90       179
           2       0.86      0.81      0.84       128
           3       0.83      0.87      0.85       139
           4       0.85      0.86      0.85       159

    accuracy                           0.87       731
   macro avg       0.87      0.87      0.87       731
weighted avg       0.87      0.87      0.87       731



## tf.keras.applications module 

In [None]:
base_model = tf.keras.applications.ResNet101V2(input_shape = (224, 224, 3), include_top = False, weights = 'imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
model2 = tf.keras.Sequential([
  base_model,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(NUM_CLASSES, activation = 'softmax', name = 'flower_class')
])

In [None]:
model2.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy']
)

In [None]:
model2.fit(
    train_generator,
    epochs=5, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fabc7e13438>

## Fine tuning


In [None]:
base_model.trainable = True
print("Number of layers in the base model: ", len(base_model.layers))

Number of layers in the base model:  377


In [None]:
# Fine tune from this layer and onwards
fine_tune_at = 300

In [None]:
# Freeze all the layers before the 'fine_tune_at' layer
for layer in base_model.layers[: fine_tune_at]:
  layer.trainable = False

In [None]:
model3 = tf.keras.Sequential([
  base_model,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(NUM_CLASSES, activation = 'softmax', name = 'flower_class')
])

In [None]:
model3.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy']
)

In [None]:
fine_tune_epochs = 2
steps_per_epoch = train_generator.samples // train_generator.batch_size
validation_steps = valid_generator.samples // valid_generator.batch_size
model3.fit(
    train_generator,
    epochs=fine_tune_epochs, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7fabc189df60>

In [None]:
model3 = tf.keras.Sequential([
  base_model,
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(NUM_CLASSES, activation = 'softmax', name = 'flower_class')
])

In [None]:
model3.compile(
  optimizer=tf.keras.optimizers.SGD(lr=0.005, momentum=0.9), 
  loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),
  metrics=['accuracy']
)

In [None]:
model3.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet101v2 (Functional)     (None, 7, 7, 2048)        42626560  
_________________________________________________________________
flatten (Flatten)            (None, 100352)            0         
_________________________________________________________________
flower_class (Dense)         (None, 5)                 501765    
Total params: 43,128,325
Trainable params: 19,680,773
Non-trainable params: 23,447,552
_________________________________________________________________


In [None]:
model3.fit(
    train_generator,
    epochs=5, steps_per_epoch=steps_per_epoch,
    validation_data=valid_generator,
    validation_steps=validation_steps)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fabbf269400>