In [16]:
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
import tensorflow_hub as hub

In [47]:
(xtr, ytr), (xte, yte) = mnist.load_data()
xtr = xtr.reshape(-1,28,28,1).astype("float32")/255.0
xte = xte.reshape(-1,28,28,1).astype("float32")/255.0

# Basic Model

In [49]:
model_start = keras.Sequential()
model_start.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1),name='layer1'))
model_start.add(layers.MaxPooling2D((2, 2),name='layer2'))
model_start.add(layers.Conv2D(64, (3, 3), activation='relu',name='layer3'))
model_start.add(layers.MaxPooling2D((2, 2),name='layer4'))
model_start.add(layers.Conv2D(64, (3, 3), activation='relu',name='layer5'))
model_start.add(layers.Flatten(name='layer6'))
model_start.add(layers.Dense(64, activation='relu',name='layer7'))
model_start.add(layers.Dense(10, activation='softmax', name='fin'))

In [50]:
filepath = "./save/pretrain.h5"

## Callback Earlystopping (cb) :
### avoid overfitting during trainning
## Callback ModelCheck (ck) : 
### monitoring training weight and save best weight or last weight

In [51]:
cb = tf.keras.callbacks.EarlyStopping(monitor='accuracy', mode='auto', restore_best_weights=True)
ck = tf.keras.callbacks.ModelCheckpoint(filepath, monitor='accuracy', verbose=0, save_best_only=True,mode='auto')

In [52]:
print(model_start.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer1 (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
layer2 (MaxPooling2D)        (None, 13, 13, 32)        0         
_________________________________________________________________
layer3 (Conv2D)              (None, 11, 11, 64)        18496     
_________________________________________________________________
layer4 (MaxPooling2D)        (None, 5, 5, 64)          0         
_________________________________________________________________
layer5 (Conv2D)              (None, 3, 3, 64)          36928     
_________________________________________________________________
layer6 (Flatten)             (None, 576)               0         
_________________________________________________________________
layer7 (Dense)               (None, 64)               

In [68]:
model_start.compile(optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])
model_start.fit(xtr, ytr, epochs=5, callbacks=[ck])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x243c6a05970>

In [75]:
model_start.evaluate(xte,yte, verbose=2)

313/313 - 2s - loss: 0.0293 - accuracy: 0.9922


[0.029279153794050217, 0.9922000169754028]

# Conclusion 1
### [1] Basic model, train time = 197.0s,
###     8 layers, 93,322 parameters
###     0.9922 accuracy

# ------------------------------------------------
# Pre_train model

## [1] models.load_model :
### ready to use by calling saved weight

# Transfer-learning workflow

1. instantiate a base model and load pre-trained weights into it.
2. Freeze all layers in the base model by setting <font color='red'>"trainable = False."</font>
3. Create a new model on top of the output of one (or several) layers from the base model.
4. Train your new model on your new dataset.

Alternative, lightweight workflow

1. Instantiate a base model and load pre-trained weights into it.
2. Run your new dataset through it and record the output of one (or several) layers 3. from the base model. This is called feature extraction.
4. Use that output as input data for a new, smaller model.

ref : https://keras.io/guides/transfer_learning/

## Freeze layesrs = NO CHANGE during training
It is important to make a freezing the layers from the pre-trained model for avoiding the weights in selected layers to be updated.

In [86]:
model = keras.models.load_model("./save/pretrain.h5")
#[print(i.trainable) for i in model.layers]
model.trainable = False

for layer in model.layers:
    assert layer.trainable ==False
    layer.trainable = False
#print("\n")
#[print(i.trainable) for i in model.layers]

## Transfer Learning :
It is about leveraging feature representation from a pre-trained model. From trained massive datasets, pre-trained model can be reused in other ways to predict new features. Moreover, it is useful to small training dataset because of using the weight from the pre-trained models to initialize the weights of the new model.

## Fine-tuning :
It is an optional step in tranfer learning such as a goal for improving the performance of the model with using pre_training weights and low learning rate.

In [77]:
base_inputs = model.layers[0].input
base_outputs = model.layers[-2].output
final_outputs = layers.Dense(10)(base_outputs)

In [78]:
new_model = keras.Model(inputs=base_inputs, outputs=final_outputs)
print(new_model.summary())

Model: "model_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
layer1_input (InputLayer)    [(None, 28, 28, 1)]       0         
_________________________________________________________________
layer1 (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
layer2 (MaxPooling2D)        (None, 13, 13, 32)        0         
_________________________________________________________________
layer3 (Conv2D)              (None, 11, 11, 64)        18496     
_________________________________________________________________
layer4 (MaxPooling2D)        (None, 5, 5, 64)          0         
_________________________________________________________________
layer5 (Conv2D)              (None, 3, 3, 64)          36928     
_________________________________________________________________
layer6 (Flatten)             (None, 576)               0  

In [79]:
new_model.compile(
    optimizer = keras.optimizers.Adam(),
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics = ["accuracy"],
)

In [80]:
#Duration time after trainable = False 
new_model.fit(xtr,ytr,batch_size=32, epochs=5, verbose=2)

Epoch 1/5
1875/1875 - 8s - loss: 0.1404 - accuracy: 0.9637
Epoch 2/5
1875/1875 - 8s - loss: 0.0124 - accuracy: 0.9969
Epoch 3/5
1875/1875 - 8s - loss: 0.0086 - accuracy: 0.9976
Epoch 4/5
1875/1875 - 8s - loss: 0.0069 - accuracy: 0.9980
Epoch 5/5
1875/1875 - 8s - loss: 0.0060 - accuracy: 0.9982


<keras.callbacks.History at 0x243c6c759d0>

# Conclusion 2
### [2] pre_train model, train time = 40.7s,
###     8 layers, 650 parameters (non-train 92,672)
###     0.9982 accuracy

In [62]:
# #Duration time before trainable = False 
# new_model.fit(xtr,ytr,batch_size=32, epochs=5, verbose=2)

In [63]:
x = tf.random.normal(shape=(5,299,299,3))
y = tf.constant([0,1,2,3,4])
model_pre = keras.applications.InceptionV3(include_top=True)
#print(model_pre.summary())

In [64]:
base_inputs = model_pre.layers[0].input
base_outputs = model_pre.layers[-2].output
final_outputs = layers.Dense(5)(base_outputs)
new_model_pre = keras.Model(inputs=base_inputs, outputs=final_outputs)

new_model_pre.compile(
    optimizer = keras.optimizers.Adam(),
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics = ["accuracy"],
)

In [65]:
new_model_pre.fit(x,y, epochs=5, verbose=2)

Epoch 1/5
1/1 - 9s - loss: 1.7041 - accuracy: 0.2000
Epoch 2/5
1/1 - 3s - loss: 0.1549 - accuracy: 1.0000
Epoch 3/5
1/1 - 3s - loss: 5.3299e-04 - accuracy: 1.0000
Epoch 4/5
1/1 - 3s - loss: 1.9646e-04 - accuracy: 1.0000
Epoch 5/5
1/1 - 2s - loss: 1.0395e-04 - accuracy: 1.0000


<keras.callbacks.History at 0x24324415f70>

In [66]:
x = tf.random.normal(shape=(5,299,299,3))
y = tf.constant([0,1,2,3,4])
url = 'https://tfhub.dev/google/imagenet/inception_v3/classification/5'
base_model = hub.KerasLayer(url, input_shape=(299,299,3))
base_model.trainable = False
model_news = keras.Sequential([
    base_model,
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(5),
])
model_news.compile(
    optimizer = keras.optimizers.Adam(),
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics = ["accuracy"],
)
model_news.fit(x,y, epochs=5, verbose=2)

Epoch 1/5
1/1 - 5s - loss: 1.8779 - accuracy: 0.2000
Epoch 2/5
1/1 - 0s - loss: 1.6916 - accuracy: 0.2000
Epoch 3/5
1/1 - 0s - loss: 1.5723 - accuracy: 0.4000
Epoch 4/5
1/1 - 0s - loss: 1.4636 - accuracy: 0.4000
Epoch 5/5
1/1 - 0s - loss: 1.3349 - accuracy: 0.6000


<keras.callbacks.History at 0x24324c878b0>