In [1]:
# 手寫數字辨識 (以 MLP 模型)
import numpy as np
import pandas as pd
from keras.utils import np_utils
from keras.datasets import mnist
np.random.seed(10) #fix random result

In [2]:
# 下載資料集
(x_train_image, y_train_label), (x_test_image,  y_test_label) = mnist.load_data()

In [3]:
# 影像集大小
print('x_train_image', x_train_image.shape) # > X_train_image (數量, 寬, 高)
print('y_train_label', y_train_label.shape) # > y_train_label (數量,)

x_train_image (60000, 28, 28)
y_train_label (60000,)


In [4]:
# 查看第一筆影像
import otherLib as olib
olib.plot_prediction(x_train_image, y_train_label, [], 0)

<Figure size 1200x1400 with 25 Axes>

In [5]:
# 將圖片binary data reshape 成 1 維向量(float)
x_train = x_train_image.reshape(x_train_image.shape[0], 28*28).astype('float32')
x_test  = x_test_image.reshape(x_test_image.shape[0], 28*28).astype('float32')

In [6]:
# 圖片降維後大小
print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [7]:
# 圖片標準化 (圖片為 0~255, 要修成 0~1))
x_train_normalize = x_train / 255
x_test_normalize  = x_test  / 255

In [8]:
# 將標籤做成 One-hot encoding
y_train_oneHot = np_utils.to_categorical(y_train_label)
y_test_oneHot  = np_utils.to_categorical(y_test_label)

In [9]:
# 顯示轉換前後結果
print("轉換前:")
print(y_train_label[:5])
print("轉換後:")
print(y_train_oneHot[:5])

轉換前:
[5 0 4 1 9]
轉換後:
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


In [10]:
# 建立線性推疊模型
from keras.models import Sequential
from keras.layers import Dense
model = Sequential()

In [11]:
# 建立模型輸入層與隱藏層
# 隱藏層: 512 
# 輸入層: 784 (28*28)
# 常態分佈亂數: normal (處理 bias)
# 啟動函數: 線性
model.add(Dense(
    units = 512,
    input_dim = 784,
    kernel_initializer = "normal",
    activation = 'relu'
))

In [12]:
# 建立輸出層
# 輸出層: 10 (10個數字)
# 常態分佈亂數: normal (處理 bias)
# 啟動函數: 機率向量
model.add(Dense(
    units = 10,
    kernel_initializer = 'normal',
    activation = 'softmax'
))

In [13]:
# 顯示剛建立之模型
# 401920 = 784 * 512 +512
# 5130 = 512 * 10 +10
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               401920    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________
None


In [14]:
# 定義訓練方法
# 損失函數: 交叉熵
# 最佳化方法: adam
# 最佳化依據: 準確率
model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = 'accuracy'
)

In [15]:
# 開始訓練
# x,y: 輸入值
# 拆分: 0.2 資料作驗證； 0.8做訓練
# 訓練: 10 次
# 每批次: 500 筆資料
train_history = model.fit(
    x = x_train_normalize,
    y = y_train_oneHot,
    validation_split = 0.2,
    epochs = 10,
    batch_size = 500
)
# loss, accuracy: 訓練資料(0.8)的 loss 與準確度
# val_loss, val_accuracy: 驗證資料(0.2)的 loss 與準確度

Epoch 1/10


InternalError:  Blas GEMM launch failed : a.shape=(500, 784), b.shape=(784, 512), m=500, n=512, k=784
	 [[node sequential/dense/MatMul (defined at <ipython-input-15-14d57f166926>:11) ]] [Op:__inference_train_function_602]

Function call stack:
train_function


In [None]:
# 畫張圖顯示每次訓練的結果變化
# 最終 accuracy 與 val_accuracy 差距表示 overfitting 越嚴重
plt.plot(train_history.history['accuracy'])
plt.plot(train_history.history['val_accuracy'])
plt.title('Train History')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# 拿測試資料驗證訓練的模型，再利用已知答案算出正確率
scores = model.evaluate(x_test_normalize, y_test_oneHot)
print()
print('accuracy=', scores[1])

In [None]:
# 拿測試資料驗證訓練的模型，僅輸出預測值
prediction = model.predict_classes(x_test)
prediction

In [None]:
# 建立混淆矩陣，看哪兩組數字最容易被誤判成對方
pd.crosstab(
    y_test_label,
    prediction,
    rownames = ['label'],
    colnames = ['prediction']
)

In [None]:
# 查看被混淆的值為哪幾筆 (實際5 預測3)
df = pd.DataFrame({
    'label': y_test_label,
    'prediction': prediction
})
df[(df.label == 5 ) & (df.prediction == 3)]

In [None]:
# 查看被混淆的圖片有多醜
plot_prediction(x_test_image, y_test_label, prediction, idx=340)

In [None]:
# 嘗試處理 overfitting
# 隨機放棄 20% 隱藏層
from keras.layers import Dropout
model = Sequential()
model.add(Dense(
    units = 512,
    input_dim = 784,
    kernel_initializer = "normal",
    activation = 'relu'
))
model.add(Dropout(0.2))
model.add(Dense(
    units = 10,
    kernel_initializer = 'normal',
    activation = 'softmax'
))
model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = 'accuracy'
)
print(model.summary())

In [None]:
# 訓練有處理 overfitting 之模型
train_history_dropout = model.fit(
    x = x_train_normalize,
    y = y_train_oneHot,
    validation_split = 0.2,
    epochs = 10,
    batch_size = 500
)

In [None]:
# 畫圖
# overfitting 問題已縮小 
plt.plot(train_history_dropout.history['accuracy'])
plt.plot(train_history_dropout.history['val_accuracy'])
plt.title('Train History')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# 預測準確度略為提高
scores = model.evaluate(x_test_normalize, y_test_oneHot)
print()
print('accuracy=', scores[1])

In [None]:
# 建立多層次感測器 (2 組隱藏層、2 組dropout)
# 注意! 第二組隱藏層無輸入層
model = Sequential()
model.add(Dense(
    units = 512,
    input_dim = 784,
    kernel_initializer = "normal",
    activation = 'relu'
))
model.add(Dropout(0.2))

model.add(Dense(
    units = 512,
    kernel_initializer = "normal",
    activation = 'relu'
))
model.add(Dropout(0.2))

model.add(Dense(
    units = 10,
    kernel_initializer = 'normal',
    activation = 'softmax'
))
model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = 'accuracy'
)
print(model.summary())

In [None]:
# 訓練有處理 overfitting 之模型
train_history_2layers = model.fit(
    x = x_train_normalize,
    y = y_train_oneHot,
    validation_split = 0.2,
    epochs = 10,
    batch_size = 500
)

In [None]:
# 畫圖 & 準確度
# 準確度又更好了
plt.plot(train_history_dropout.history['accuracy'])
plt.plot(train_history_dropout.history['val_accuracy'])
plt.title('Train History')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

scores = model.evaluate(x_test_normalize, y_test_oneHot)
print()
print('accuracy=', scores[1])