# MNIST手寫辨識遷移學習

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.optimizers import SGD
# tf.Keras utilis function
from tensorflow.keras.utils import to_categorical

(train_data, train_label), (test_data,test_label) = mnist.load_data()
print("共有 %d 訓練資料，每張圖的大小為 %d x %d" %train_data.shape)
print("共有 %d 測試資料，每張圖的大小為 %d x %d" %test_data.shape)

這邊的訓練資料要分成兩部分，第一部分是前面五個數字(0-5)，此資料是用來作預訓練，而第二部分是後面五個數字(6-9)，此資料是要用來做遷移學習。

In [None]:
train_data_first5 = train_data[train_label<5]
train_label_first5 = train_label[train_label<5]
test_data_first5 = test_data[test_label<5]
test_label_first5 = test_label[test_label<5]
print(train_data_first5.shape)
print(train_label_first5.shape)
train_data_last5 = train_data[train_label>=5] # 訓練資料 下載大於5的圖
# 將標籤 5-9 改為 0-4
train_label_last5 = train_label[train_label>=5]-5 # 訓練資料 5>0，6>1，7>2，8>3，9>4
test_data_last5 = test_data[test_label>=5] # 測試資料 下載大於5的圖
test_label_last5 = test_label[test_label>=5]-5 # 測試資料

資料前處理與維度轉換

首先我們先將資料內容的範圍由0-255轉成0-1，此外，這邊也將圖片的維度改為4維張量並改為浮點數型別

In [None]:
# 資料前處理與維度轉換
def preprocess(x):
    x = tf.cast(x, dtype = tf.float32)/255
    x = tf.reshape(x,[x.shape[0],28,28,1])
    return x

train_data_first5 = preprocess(train_data_first5)
test_data_first5 = preprocess(test_data_first5)
train_data_last5 = preprocess(train_data_last5)
test_data_last5 = preprocess(test_data_last5)

將標籤(label)轉換成 one-hot encoding 的形式

In [None]:
train_label_first5 = to_categorical(train_label_first5,5)
test_label_first5 = to_categorical(test_label_first5,5)
train_label_last5 = to_categorical(train_label_last5,5)
test_label_last5 = to_categorical(test_label_last5,5)
print(train_label_first5.shape)

建立卷積神經網路層(這邊我們使用兩層卷積層與兩層池化層)

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size = (3,3),
                 input_shape=(28,28,1),activation = "relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(16, kernel_size = (3,3),activation = "relu"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(64, activation = "relu"))
model.add(Dense(5, activation = "softmax"))
model.summary()

編譯與訓練模型

In [None]:
model.compile(loss = "categorical_crossentropy", optimizer = "adam",
             metrics=["accuracy"])
history = model.fit(train_data_first5,train_label_first5,
                    validation_split = 0.2,
                    epochs=5, batch_size = 64,verbose=2)

凍結模型

In [None]:
print(len(model.layers))
for i in range(len(model.layers)):
    print(i, model.layers[i])

由上述程式碼中可以看到網路共有七層，這邊我們必須凍結0-3層，所謂凍結的意思是指該層的權重參數不再訓練。

In [None]:
for i in range(4):
    model.layers[i].trainable = False

因此第二次訓練的時候只訓練後面的三層，這邊要特別注意的是我們仍須再次編譯模型，這樣才有辦法得到第二次轉移學習後的分類模型。

In [None]:
model.compile(loss = "categorical_crossentropy", optimizer = "adam",
             metrics=["accuracy"])
history = model.fit(train_data_last5,train_label_last5,
                    validation_split = 0.2,
                    epochs=5, batch_size = 64,verbose=2)

完成後面五個手寫數字圖片的訓練後，接下來就可以評估模型了

In [None]:
loss, accuracy = model.evaluate(test_data_last5,test_label_last5)