# MNIST Hand-Written Number Classification

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

%env KERAS_BACKEND = tensorflow
%matplotlib inline

env: KERAS_BACKEND=tensorflow


In [2]:
from keras.datasets import mnist
from keras.layers import Dense, Activation, Flatten, Input, Conv2D, MaxPool2D, concatenate
from keras.models import Model
from keras.utils import np_utils
from keras import optimizers, losses
from keras.layers.core import Lambda
from ipywidgets import interact_manual, fixed

import keras.backend as K

Using TensorFlow backend.


### 用 CNN 前，要先把維度 (28,28)(經過 normalized )的訓練及測試資料reshape成(28,28,1)

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train_normalized = x_train/x_train.max()
x_test_normalized = x_test/x_test.max()
x_train_cnn = x_train_normalized.reshape(len(x_train), 28, 28, 1)
x_test_cnn = x_test_normalized.reshape(len(x_test), 28, 28, 1)

### One-Hot Encode

In [4]:
y_train_one_hot = np_utils.to_categorical(y_train, 10)
y_test_one_hot = np_utils.to_categorical(y_test,10)

### 用來做 interact_manual 的畫圖函數

In [5]:
def WRONG_CLASSIFICATION(name, y_wrong_predict, x_wrong_predict, number=0): 
    # (machine name in the title, the number classified by wrong prediction, 
    #  the corresponding figure, the number you want to look at)
    length = len(y_wrong_predict[number])
    height = np.ceil(length/10).astype(np.int32)  
    figure = plt.figure(figsize=(100,10*height), dpi=100)   # set the size of figure, which should cotain 10 column if wrong prediction is more than 10
    
    if (length>10):
        width = 10
        title_shift = 1.1 # for layout adjustment
    else:
        width = length
        title_shift = 1.3 # for layout adjustment
    
    for i in range(0, length):
        ax = figure.add_subplot(height, width, i+1 )
        ax.imshow(x_wrong_predict[number][i], cmap='Greys')
        ax.axis('off')
        ax.set_title('Classified as %d' %y_wrong_predict[number][i],fontsize=60, color='red')  # put on the title for subplots
    
    figure.suptitle('Wrong Classification of %d for $%s$ Machince' %(number,name), fontsize=200, y=title_shift)  # put on the title for the figure
    plt.show() 
    

### 用自行定義的神經元做 normalized layer，可以把被 filters 濾過並經過 pooling 縮減過的矩陣 normalized 到 \[0,1\] 

In [6]:
def NORMALIZED(inputs):
    return (inputs)/(K.max(inputs)-K.min(inputs))

normalized_layer_1 = Lambda(NORMALIZED)
normalized_layer_2 = Lambda(NORMALIZED)
normalized_layer_3 = Lambda(NORMALIZED)
normalized_layer_4 = Lambda(NORMALIZED)

### 定義 Input

In [7]:
x = Input(shape=(28,28,1))

### 用 Functioal API 設計神經網路:
這邊及之後 5 個網路用的主架構都大同小異(差在有沒有加 normalized layer 及 optimizer, losses, activation 及將濾鏡數由小到大改成由大到小)，均由三個單元組成，每個單元有:  3 個不同的 filter 過濾 input (有點調皮想試試看不是方形的濾鏡) ，濾完之後concatenate，再經過 pooling。重複三次(但每個單元 filter 數目不同)後flatten,再經一層 Dense 連到 output。每個網路訓練的 epcho 累積起來都是 45 次，binsize 則是 200。 

### Model 1<sup>st</sup>

In [8]:
Cv_1 = Conv2D(2,(4,16), padding = 'same', input_shape = (28,28,1), activation='relu')
Ch_1 = Conv2D(2,(16,4), padding = 'same', input_shape = (28,28,1), activation='relu')
Cs_1 = Conv2D(2,(8,8), padding = 'same', input_shape = (28,28,1), activation='relu')
P_1 = (MaxPool2D(pool_size=(2,2)))

Cv_2 = Conv2D(4,(4,16), padding = 'same', activation='relu')
Ch_2 = Conv2D(4,(16,4), padding = 'same', activation='relu')
Cs_2 = Conv2D(4,(8,8), padding = 'same', activation='relu')
P_2 = (MaxPool2D(pool_size=(2,2)))

Cv_3 = Conv2D(8,(4,16), padding = 'same', activation='relu')
Ch_3 = Conv2D(8,(16,4), padding = 'same', activation='relu')
Cs_3 = Conv2D(8,(8,8), padding = 'same', activation='relu')
P_3 = (MaxPool2D(pool_size=(2,2)))

F_1 = Flatten()
D_1 = Dense(9, activation='relu')
Output = Dense(10, activation='softmax')

In [9]:
v_1 = Cv_1(x)
h_1 = Ch_1(x)
s_1 = Cs_1(x)
L_1 = concatenate([v_1, h_1, s_1])
PL_1 = P_1(L_1)

v_2 = Cv_2(PL_1)
h_2 = Ch_2(PL_1)
s_2 = Cs_2(PL_1)
L_2 = concatenate([v_2, h_2, s_2])
PL_2 = P_2(L_2)

v_3 = Cv_3(PL_2)
h_3 = Ch_3(PL_2)
s_3 = Cs_3(PL_2)
L_3 = concatenate([v_3, h_3, s_3])
PL_3 = P_3(L_3)

f_1 = F_1(PL_3)
d_1 = D_1(f_1)
output = Output(d_1)

Instructions for updating:
Colocations handled automatically by placer.


In [14]:
model_1st = Model(x,output)
model_1st.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
concatenat

In [15]:
#model_1st.load_weights('./MNIST_CNN_Machines/CNN_1st_machine_weights.h5')
model_1 = model_1st.compile( optimizers.sgd(lr = 0.05),losses.categorical_crossentropy, metrics=['acc'])
model_1_his = model_1st.fit(x_train_cnn, y_train_one_hot, batch_size=100, epochs=45)

Epoch 1/45

KeyboardInterrupt: 

In [None]:
#model_1st_json = model_1st.to_json()
#open('./MNIST_CNN_Machines/CNN_1st_machine','w').write(model_1st_json)
#model_1st.save_weights('./MNIST_CNN_Machines/CNN_1st_machine_weights.h5')

In [12]:
model_1st_score_train = model_1st.evaluate(x_train_cnn, y_train_one_hot)
model_1st_score_test = model_1st.evaluate(x_test_cnn, y_test_one_hot)

y_1st_predict_one_hot = model_1st.predict(x_test_cnn)



### 訓練及測試資料的正確率
看起來還不錯，測試有到 98.6% 。

In [13]:
print(f'For Training Data: Loss = {model_1st_score_train[0]:.6f}; Accuracy = {100*model_1st_score_train[1]:.6f}%')
print(f'For Testing Data: Loss = {model_1st_score_test[0]:.6f}; Accuracy = {100*model_1st_score_test[1]:.6f}%')

For Training Data: Loss = 0.019496; Accuracy = 99.461667%
For Testing Data: Loss = 0.073772; Accuracy = 98.600000%


### 看一下 Confusion Matrix
非對角線幾乎是個位數，好像還可以。

In [14]:
y_1st_predict = np.zeros(len(y_test)).astype(np.int32)
for i in range(0, len(y_test)):
    y_1st_predict[i] = y_1st_predict_one_hot[i].argmax()
pd.crosstab(y_test, y_1st_predict, rownames=['Ture Number'], colnames=['Predict Number'])

Predict Number,0,1,2,3,4,5,6,7,8,9
Ture Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,976,0,0,0,0,0,2,1,1,0
1,3,1127,1,0,1,0,2,0,1,0
2,2,0,1021,3,2,0,0,2,2,0
3,1,1,2,1001,0,0,0,2,0,3
4,0,0,2,0,977,0,1,0,0,2
5,3,1,0,13,2,862,1,0,5,5
6,6,2,2,0,2,0,945,0,1,0
7,0,5,5,0,1,0,0,1014,2,1
8,0,0,3,4,3,0,1,3,957,3
9,2,1,1,1,14,0,0,10,0,980


### 用 interact_manual 選擇想要看的數字，可以對照神經網路判斷錯誤的資料的原圖

In [15]:
y_wrong_predict_1st = []
x_wrong_predict_1st = []

for i in range(0,10):
    TF = (y_test==i) &(y_1st_predict!=y_test)
    y_wrong_predict_1st.append(y_1st_predict[TF])
    x_wrong_predict_1st.append(x_test[TF])
    
interact_manual(WRONG_CLASSIFICATION, name=fixed('1^{st}'),
                y_wrong_predict=fixed(y_wrong_predict_1st), x_wrong_predict=fixed(x_wrong_predict_1st),
               number=range(0,10) );

### Model 2<sup>nd</sup>
這次每個單元最後面都加了 normalized layer ，再輸進最後的 Dense 層也給他 normalized 一下。

In [8]:
Cv_1 = Conv2D(2,(4,16), padding = 'same', input_shape = (28,28,1), activation='relu')
Ch_1 = Conv2D(2,(16,4), padding = 'same', input_shape = (28,28,1), activation='relu')
Cs_1 = Conv2D(2,(8,8), padding = 'same', input_shape = (28,28,1), activation='relu')
P_1 = (MaxPool2D(pool_size=(2,2)))

Cv_2 = Conv2D(4,(4,16), padding = 'same', activation='relu')
Ch_2 = Conv2D(4,(16,4), padding = 'same', activation='relu')
Cs_2 = Conv2D(4,(8,8), padding = 'same', activation='relu')
P_2 = (MaxPool2D(pool_size=(2,2)))

Cv_3 = Conv2D(8,(4,16), padding = 'same', activation='relu')
Ch_3 = Conv2D(8,(16,4), padding = 'same', activation='relu')
Cs_3 = Conv2D(8,(8,8), padding = 'same', activation='relu')
P_3 = (MaxPool2D(pool_size=(2,2)))

F_1 = Flatten()
D_1 = Dense(9, activation='relu')
Output = Dense(10, activation='softmax')

In [9]:
v_1 = Cv_1(x)
h_1 = Ch_1(x)
s_1 = Cs_1(x)
L_1 = concatenate([v_1, h_1, s_1])
PL_1 = P_1(L_1)
N_1 = normalized_layer_1(PL_1)

v_2 = Cv_2(N_1)
h_2 = Ch_2(N_1)
s_2 = Cs_2(N_1)
L_2 = concatenate([v_2, h_2, s_2])
PL_2 = P_2(L_2)
N_2 = normalized_layer_2(PL_2)

v_3 = Cv_3(N_2)
h_3 = Ch_3(N_2)
s_3 = Cs_3(N_2)
L_3 = concatenate([v_3, h_3, s_3])
PL_3 = P_3(L_3)
N_3 = normalized_layer_3(PL_3)

f_1 = F_1(N_3)
N_4 = normalized_layer_4(f_1)
d_1 = D_1(N_4)
output = Output(d_1)

Instructions for updating:
Colocations handled automatically by placer.


In [10]:
model_2nd = Model(x,output)
model_2nd.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
concatenat

In [11]:
model_2nd.load_weights('./MNIST_CNN_Machines/CNN_2nd_machine_weights.h5')
model_2 = model_2nd.compile( optimizers.sgd(lr = 0.05),losses.categorical_crossentropy, metrics=['acc'])
#model_2_his = model_2nd.fit(x_train_cnn, y_train_one_hot, batch_size=100, epochs=50)

In [22]:
#model_2nd_json = model_2nd.to_json()
#open('./MNIST_CNN_Machines/CNN_2nd_machine','w').write(model_2nd_json)
#model_2nd.save_weights('./MNIST_CNN_Machines/CNN_2nd_machine_weights.h5')

In [None]:
model_2nd_score_train = model_2nd.evaluate(x_train_cnn, y_train_one_hot)
model_2nd_score_test = model_2nd.evaluate(x_test_cnn, y_test_one_hot)

y_2nd_predict_one_hot = model_2nd.predict(x_test_cnn)

 7104/60000 [==>...........................] - ETA: 2:03

### 訓練及測試資料的正確率
有點強悍，訓練到 100%，測試也有99.2%。

In [39]:
print(f'For Training Data: Loss = {model_2nd_score_train[0]:.6f}; Accuracy = {100*model_2nd_score_train[1]:.6f}%')
print(f'For Testing Data: Loss = {model_2nd_score_test[0]:.6f}; Accuracy = {100*model_2nd_score_test[1]:.6f}%')

For Training Data: Loss = 0.018801; Accuracy = 99.470000%
For Testing Data: Loss = 0.036967; Accuracy = 98.880000%


### 有點稀疏的 Confusion Matrix

In [40]:
y_2nd_predict = np.zeros(len(y_test)).astype(np.int32)
for i in range(0, len(y_test)):
    y_2nd_predict[i] = y_2nd_predict_one_hot[i].argmax()
pd.crosstab(y_test, y_2nd_predict, rownames=['Ture Number'], colnames=['Predict Number'])

Predict Number,0,1,2,3,4,5,6,7,8,9
Ture Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,973,0,1,0,0,1,4,1,0,0
1,0,1133,0,0,1,0,0,1,0,0
2,4,3,1019,2,0,0,0,3,1,0
3,0,0,1,1000,0,4,0,3,1,1
4,1,0,0,0,971,0,1,1,0,8
5,2,0,0,7,0,880,1,1,1,0
6,6,5,0,0,2,3,940,0,2,0
7,0,4,3,1,0,0,0,1018,1,1
8,2,1,5,3,1,1,0,1,958,2
9,0,0,0,0,5,2,0,5,1,996


### 一樣做個互動看一下那些資料認不出來

In [28]:
y_wrong_predict_2nd = []
x_wrong_predict_2nd = []

for i in range(0,10):
    TF = (y_test==i) &(y_2nd_predict!=y_test)
    y_wrong_predict_2nd.append(y_2nd_predict[TF])
    x_wrong_predict_2nd.append(x_test[TF])
    
interact_manual(WRONG_CLASSIFICATION, name=fixed('2^{nd}'),
                y_wrong_predict=fixed(y_wrong_predict_2nd), x_wrong_predict=fixed(x_wrong_predict_2nd),
               number=range(0,10) );

### Model 3<sup>rd</sup>
結構跟 Model 2<sup>nd</sup> 一模一樣，不過改用 adam 當 optimizer。

In [78]:
Cv_1 = Conv2D(2,(4,16), padding = 'same', input_shape = (28,28,1), activation='relu')
Ch_1 = Conv2D(2,(16,4), padding = 'same', input_shape = (28,28,1), activation='relu')
Cs_1 = Conv2D(2,(8,8), padding = 'same', input_shape = (28,28,1), activation='relu')
P_1 = (MaxPool2D(pool_size=(2,2)))

Cv_2 = Conv2D(4,(4,16), padding = 'same', activation='relu')
Ch_2 = Conv2D(4,(16,4), padding = 'same', activation='relu')
Cs_2 = Conv2D(4,(8,8), padding = 'same', activation='relu')
P_2 = (MaxPool2D(pool_size=(2,2)))

Cv_3 = Conv2D(8,(4,16), padding = 'same', activation='relu')
Ch_3 = Conv2D(8,(16,4), padding = 'same', activation='relu')
Cs_3 = Conv2D(8,(8,8), padding = 'same', activation='relu')
P_3 = (MaxPool2D(pool_size=(2,2)))

F_1 = Flatten()
D_1 = Dense(9, activation='relu')
Output = Dense(10, activation='softmax')

v_1 = Cv_1(x)
h_1 = Ch_1(x)
s_1 = Cs_1(x)
L_1 = concatenate([v_1, h_1, s_1])
PL_1 = P_1(L_1)
N_1 = normalized_layer_1(PL_1)

v_2 = Cv_2(N_1)
h_2 = Ch_2(N_1)
s_2 = Cs_2(N_1)
L_2 = concatenate([v_2, h_2, s_2])
PL_2 = P_2(L_2)
N_2 = normalized_layer_2(PL_2)

v_3 = Cv_3(N_2)
h_3 = Ch_3(N_2)
s_3 = Cs_3(N_2)
L_3 = concatenate([v_3, h_3, s_3])
PL_3 = P_3(L_3)
N_3 = normalized_layer_3(PL_3)

f_1 = F_1(N_3)
N_4 = normalized_layer_4(f_1)
d_1 = D_1(N_4)
output = Output(d_1)

In [79]:
model_3rd = Model(x,output)
model_3rd.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_100 (Conv2D)             (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_101 (Conv2D)             (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_102 (Conv2D)             (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
concatenat

In [80]:
#model_3rd.load_weights('./MNIST_CNN_Machines/CNN_3rd_machine_weights.h5')
model_3 = model_3rd.compile( optimizers.adam(lr = 0.05),losses.categorical_crossentropy, metrics=['acc'])
model_3_his = model_3rd.fit(x_train_cnn, y_train_one_hot, batch_size=100, epochs=45)

Epoch 1/45

KeyboardInterrupt: 

In [32]:
#model_3rd_json = model_3rd.to_json()
#open('./MNIST_CNN_Machines/CNN_3rd_machine','w').write(model_3rd_json)
#model_3rd.save_weights('./MNIST_CNN_Machines/CNN_3rd_machine_weights.h5')

表現還不錯，只是沒有 sgd 那麼優。

In [33]:
model_3rd_score_train = model_3rd.evaluate(x_train_cnn, y_train_one_hot)
model_3rd_score_test = model_3rd.evaluate(x_test_cnn, y_test_one_hot)
y_3rd_predict_one_hot = model_3rd.predict(x_test_cnn)
print(f'For Training Data: Loss = {model_3rd_score_train[0]:.6f}; Accuracy = {100*model_3rd_score_train[1]:.6f}%')
print(f'For Testing Data: Loss = {model_3rd_score_test[0]:.6f}; Accuracy = {100*model_3rd_score_test[1]:.6f}%')

y_3rd_predict = np.zeros(len(y_test)).astype(np.int32)
for i in range(0, len(y_test)):
    y_3rd_predict[i] = y_3rd_predict_one_hot[i].argmax()
pd.crosstab(y_test, y_3rd_predict, rownames=['Ture Number'], colnames=['Predict Number'])



Predict Number,0,1,2,3,4,5,6,7,8,9
Ture Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,975,0,2,1,1,0,1,0,0,0
1,0,1122,4,0,2,0,2,2,3,0
2,2,1,1023,1,0,0,1,2,2,0
3,0,0,1,1006,0,1,0,0,1,1
4,0,3,0,0,965,0,2,0,0,12
5,1,0,0,11,0,873,3,0,2,2
6,3,2,0,1,4,2,945,0,1,0
7,0,4,12,3,0,0,0,1006,1,2
8,1,0,5,2,0,1,2,0,963,0
9,0,0,0,1,9,3,0,6,11,979


In [34]:
y_wrong_predict_3rd = []
x_wrong_predict_3rd = []

for i in range(0,10):
    TF = (y_test==i) &(y_3rd_predict!=y_test)
    y_wrong_predict_3rd.append(y_3rd_predict[TF])
    x_wrong_predict_3rd.append(x_test[TF])
    
interact_manual(WRONG_CLASSIFICATION, name=fixed('3^{rd}'),
                y_wrong_predict=fixed(y_wrong_predict_3rd), x_wrong_predict=fixed(x_wrong_predict_3rd),
               number=range(0,10) );

### Model 4<sup>th</sup>
跟 Model 2<sup>nd</sup> 一樣，loss 改用mse 。

In [68]:
Cv_1 = Conv2D(2,(4,16), padding = 'same', input_shape = (28,28,1), activation='relu')
Ch_1 = Conv2D(2,(16,4), padding = 'same', input_shape = (28,28,1), activation='relu')
Cs_1 = Conv2D(2,(8,8), padding = 'same', input_shape = (28,28,1), activation='relu')
P_1 = (MaxPool2D(pool_size=(2,2)))

Cv_2 = Conv2D(4,(4,16), padding = 'same', activation='relu')
Ch_2 = Conv2D(4,(16,4), padding = 'same', activation='relu')
Cs_2 = Conv2D(4,(8,8), padding = 'same', activation='relu')
P_2 = (MaxPool2D(pool_size=(2,2)))

Cv_3 = Conv2D(8,(4,16), padding = 'same', activation='relu')
Ch_3 = Conv2D(8,(16,4), padding = 'same', activation='relu')
Cs_3 = Conv2D(8,(8,8), padding = 'same', activation='relu')
P_3 = (MaxPool2D(pool_size=(2,2)))

F_1 = Flatten()
D_1 = Dense(9, activation='relu')
Output = Dense(10, activation='softmax')

v_1 = Cv_1(x)
h_1 = Ch_1(x)
s_1 = Cs_1(x)
L_1 = concatenate([v_1, h_1, s_1])
PL_1 = P_1(L_1)
N_1 = normalized_layer_1(PL_1)

v_2 = Cv_2(N_1)
h_2 = Ch_2(N_1)
s_2 = Cs_2(N_1)
L_2 = concatenate([v_2, h_2, s_2])
PL_2 = P_2(L_2)
N_2 = normalized_layer_2(PL_2)

v_3 = Cv_3(N_2)
h_3 = Ch_3(N_2)
s_3 = Cs_3(N_2)
L_3 = concatenate([v_3, h_3, s_3])
PL_3 = P_3(L_3)
N_3 = normalized_layer_3(PL_3)

f_1 = F_1(N_3)
N_4 = normalized_layer_4(f_1)
d_1 = D_1(N_4)
output = Output(d_1)

In [69]:
model_4th = Model(x,output)
model_4th.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_73 (Conv2D)              (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_74 (Conv2D)              (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_75 (Conv2D)              (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
concatenat

In [70]:
#model_4th.load_weights('./MNIST_CNN_Machines/CNN_4th_machine_weights.h5')
model_4 = model_4th.compile( optimizers.sgd(lr = 0.05),losses.mse, metrics=['acc'])
model_4_his = model_4th.fit(x_train_cnn, y_train_one_hot, batch_size=100, epochs=45)

Epoch 1/45
12700/60000 [=====>........................] - ETA: 3:32 - loss: 0.0903 - acc: 0.0971

KeyboardInterrupt: 

In [56]:
#model_4th_json = model_4th.to_json()
#open('./MNIST_CNN_Machines/CNN_4th_machine','w').write(model_4th_json)
#model_4th.save_weights('./MNIST_CNN_Machines/CNN_4th_machine_weights.h5')

In [57]:
model_4th_score_train = model_4th.evaluate(x_train_cnn, y_train_one_hot)
model_4th_score_test = model_4th.evaluate(x_test_cnn, y_test_one_hot)
print(f'For Training Data: Loss = {model_4th_score_train[0]:.6f}; Accuracy = {100*model_4th_score_train[1]:.6f}%')
print(f'For Testing Data: Loss = {model_4th_score_test[0]:.6f}; Accuracy = {100*model_4th_score_test[1]:.6f}%')

For Training Data: Loss = 0.089994; Accuracy = 11.236667%
For Testing Data: Loss = 0.089987; Accuracy = 11.350000%


### Model 5<sup>th</sup>
跟 Model 2<sup>nd</sup> 一樣， activation 改用 sigmoid。

In [74]:
Cv_1 = Conv2D(2,(4,16), padding = 'same', input_shape = (28,28,1), activation='sigmoid')
Ch_1 = Conv2D(2,(16,4), padding = 'same', input_shape = (28,28,1), activation='sigmoid')
Cs_1 = Conv2D(2,(8,8), padding = 'same', input_shape = (28,28,1), activation='sigmoid')
P_1 = (MaxPool2D(pool_size=(2,2)))

Cv_2 = Conv2D(4,(4,16), padding = 'same', activation='sigmoid')
Ch_2 = Conv2D(4,(16,4), padding = 'same', activation='sigmoid')
Cs_2 = Conv2D(4,(8,8), padding = 'same', activation='sigmoid')
P_2 = (MaxPool2D(pool_size=(2,2)))

Cv_3 = Conv2D(8,(4,16), padding = 'same', activation='sigmoid')
Ch_3 = Conv2D(8,(16,4), padding = 'same', activation='sigmoid')
Cs_3 = Conv2D(8,(8,8), padding = 'same', activation='sigmoid')
P_3 = (MaxPool2D(pool_size=(2,2)))

F_1 = Flatten()
D_1 = Dense(9, activation='sigmoid')
Output = Dense(10, activation='softmax')

v_1 = Cv_1(x)
h_1 = Ch_1(x)
s_1 = Cs_1(x)
L_1 = concatenate([v_1, h_1, s_1])
PL_1 = P_1(L_1)
N_1 = normalized_layer_1(PL_1)

v_2 = Cv_2(N_1)
h_2 = Ch_2(N_1)
s_2 = Cs_2(N_1)
L_2 = concatenate([v_2, h_2, s_2])
PL_2 = P_2(L_2)
N_2 = normalized_layer_2(PL_2)

v_3 = Cv_3(N_2)
h_3 = Ch_3(N_2)
s_3 = Cs_3(N_2)
L_3 = concatenate([v_3, h_3, s_3])
PL_3 = P_3(L_3)
N_3 = normalized_layer_3(PL_3)

f_1 = F_1(N_3)
N_4 = normalized_layer_4(f_1)
d_1 = D_1(N_4)
output = Output(d_1)

In [75]:
model_5th = Model(x,output)
model_5th.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_91 (Conv2D)              (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_92 (Conv2D)              (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_93 (Conv2D)              (None, 28, 28, 2)    130         input_1[0][0]                    
__________________________________________________________________________________________________
concatenat

In [77]:
#model_5th.load_weights('./MNIST_CNN_Machines/CNN_5th_machine_weights.h5')
model_5 = model_5th.compile( optimizers.sgd(lr = 0.1),losses.categorical_crossentropy, metrics=['acc'])
model_5_his = model_5th.fit(x_train_cnn, y_train_one_hot, batch_size=100, epochs=45)

Epoch 1/45
 1200/60000 [..............................] - ETA: 5:28 - loss: 2.3023 - acc: 0.1067

KeyboardInterrupt: 

In [None]:
model_5th_score_train = model_5th.evaluate(x_train_cnn, y_train_one_hot)
model_5th_score_test = model_5th.evaluate(x_test_cnn, y_test_one_hot)
y_5th_predict_one_hot = model_5th.predict(x_test_cnn)
print(f'For Training Data: Loss = {model_5th_score_train[0]:.6f}; Accuracy = {100*model_5th_score_train[1]:.6f}%')
print(f'For Testing Data: Loss = {model_5th_score_test[0]:.6f}; Accuracy = {100*model_5th_score_test[1]:.6f}%')

y_5th_predict = np.zeros(len(y_test)).astype(np.int32)
for i in range(0, len(y_test)):
    y_5th_predict[i] = y_5th_predict_one_hot[i].argmax()
pd.crosstab(y_test, y_5th_predict, rownames=['Ture Number'], colnames=['Predict Number'])

In [None]:
y_wrong_predict_5th = []
x_wrong_predict_5th = []

for i in range(0,10):
    TF = (y_test==i) &(y_5th_predict!=y_test)
    y_wrong_predict_5th.append(y_5th_predict[TF])
    x_wrong_predict_5th.append(x_test[TF])
    
interact_manual(WRONG_CLASSIFICATION, name=fixed('5^{th}'),
                y_wrong_predict=fixed(y_wrong_predict_5th), x_wrong_predict=fixed(x_wrong_predict_5th),
               number=range(0,10) );

### Model 6<sup>th</sup>
有點淘氣的想試試把 filter 數目由小到大改成由大到小會怎麼樣。

In [42]:
Cv_1 = Conv2D(8,(4,16), padding = 'same', input_shape = (28,28,1), activation='relu')
Ch_1 = Conv2D(8,(16,4), padding = 'same', input_shape = (28,28,1), activation='relu')
Cs_1 = Conv2D(8,(8,8), padding = 'same', input_shape = (28,28,1), activation='relu')
P_1 = (MaxPool2D(pool_size=(2,2)))

Cv_2 = Conv2D(4,(4,16), padding = 'same', activation='relu')
Ch_2 = Conv2D(4,(16,4), padding = 'same', activation='relu')
Cs_2 = Conv2D(4,(8,8), padding = 'same', activation='relu')
P_2 = (MaxPool2D(pool_size=(2,2)))

Cv_3 = Conv2D(2,(4,16), padding = 'same', activation='relu')
Ch_3 = Conv2D(2,(16,4), padding = 'same', activation='relu')
Cs_3 = Conv2D(2,(8,8), padding = 'same', activation='relu')
P_3 = (MaxPool2D(pool_size=(2,2)))

F_1 = Flatten()
D_1 = Dense(9, activation='relu')
Output = Dense(10, activation='softmax')

v_1 = Cv_1(x)
h_1 = Ch_1(x)
s_1 = Cs_1(x)
L_1 = concatenate([v_1, h_1, s_1])
PL_1 = P_1(L_1)
N_1 = normalized_layer_1(PL_1)

v_2 = Cv_2(N_1)
h_2 = Ch_2(N_1)
s_2 = Cs_2(N_1)
L_2 = concatenate([v_2, h_2, s_2])
PL_2 = P_2(L_2)
N_2 = normalized_layer_2(PL_2)

v_3 = Cv_3(N_2)
h_3 = Ch_3(N_2)
s_3 = Cs_3(N_2)
L_3 = concatenate([v_3, h_3, s_3])
PL_3 = P_3(L_3)
N_3 = normalized_layer_3(PL_3)

f_1 = F_1(N_3)
N_4 = normalized_layer_4(f_1)
d_1 = D_1(N_4)
output = Output(d_1)

In [43]:
model_6th = Model(x,output)
model_6th.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
conv2d_28 (Conv2D)              (None, 28, 28, 8)    520         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_29 (Conv2D)              (None, 28, 28, 8)    520         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_30 (Conv2D)              (None, 28, 28, 8)    520         input_1[0][0]                    
__________________________________________________________________________________________________
concatenat

In [44]:
model_6th.load_weights('./MNIST_CNN_Machines/CNN_6th_machine_weights.h5')
model_6 = model_6th.compile( optimizers.sgd(lr = 0.05),losses.categorical_crossentropy, metrics=['acc'])
#model_6_his = model_6th.fit(x_train_cnn, y_train_one_hot, batch_size=100, epochs=45)

Epoch 1/45
Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45
Epoch 8/45
Epoch 9/45
Epoch 10/45
Epoch 11/45
Epoch 12/45
Epoch 13/45
Epoch 14/45
Epoch 15/45
Epoch 16/45
Epoch 17/45
Epoch 18/45
Epoch 19/45
Epoch 20/45
Epoch 21/45
Epoch 22/45
Epoch 23/45
Epoch 24/45
Epoch 25/45
Epoch 26/45
Epoch 27/45
Epoch 28/45
Epoch 29/45
Epoch 30/45
Epoch 31/45
Epoch 32/45
Epoch 33/45
Epoch 34/45
Epoch 35/45
Epoch 36/45
Epoch 37/45
Epoch 38/45
Epoch 39/45
Epoch 40/45
Epoch 41/45
Epoch 42/45
Epoch 43/45
Epoch 44/45
Epoch 45/45


In [47]:
model_6th_json = model_6th.to_json()
open('./MNIST_CNN_Machines/CNN_6th_machine','w').write(model_6th_json)
model_6th.save_weights('./MNIST_CNN_Machines/CNN_6th_machine_weights.h5')

### 看起來表現也相當不俗，只是由小到大還是稍微好了一點。

In [45]:
model_6th_score_train = model_6th.evaluate(x_train_cnn, y_train_one_hot)
model_6th_score_test = model_6th.evaluate(x_test_cnn, y_test_one_hot)
y_6th_predict_one_hot = model_6th.predict(x_test_cnn)
print(f'For Training Data: Loss = {model_6th_score_train[0]:.6f}; Accuracy = {100*model_6th_score_train[1]:.6f}%')
print(f'For Testing Data: Loss = {model_6th_score_test[0]:.6f}; Accuracy = {100*model_6th_score_test[1]:.6f}%')

y_6th_predict = np.zeros(len(y_test)).astype(np.int32)
for i in range(0, len(y_test)):
    y_6th_predict[i] = y_6th_predict_one_hot[i].argmax()
pd.crosstab(y_test, y_6th_predict, rownames=['Ture Number'], colnames=['Predict Number'])

For Training Data: Loss = 0.001483; Accuracy = 100.000000%
For Testing Data: Loss = 0.028013; Accuracy = 99.010000%


Predict Number,0,1,2,3,4,5,6,7,8,9
Ture Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,978,0,0,0,0,0,0,1,1,0
1,0,1130,2,0,0,0,0,2,1,0
2,4,2,1019,1,0,0,1,3,2,0
3,0,0,0,1003,0,4,0,2,1,0
4,0,1,0,0,972,0,4,0,0,5
5,2,0,0,4,0,883,1,0,0,2
6,5,2,2,0,0,4,943,0,2,0
7,1,4,4,1,1,0,0,1016,0,1
8,0,1,1,1,0,4,0,1,964,2
9,1,0,0,1,8,2,0,4,0,993


In [46]:
y_wrong_predict_6th = []
x_wrong_predict_6th = []

for i in range(0,10):
    TF = (y_test==i) &(y_6th_predict!=y_test)
    y_wrong_predict_6th.append(y_6th_predict[TF])
    x_wrong_predict_6th.append(x_test[TF])
    
interact_manual(WRONG_CLASSIFICATION, name=fixed('6^{th}'),
                y_wrong_predict=fixed(y_wrong_predict_6th), x_wrong_predict=fixed(x_wrong_predict_6th),
               number=range(0,10) );

interactive(children=(Dropdown(description='number', options=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), value=0), Button(…

## Conclusion
1. 做了各層 input 的 normalized 後，測試資料準確率有稍微提升。
2. adma 的效果好像不如 sgd 好，在這邊訓練資料跟測試資料的準確率都比同樣的結構用 sgd 低一點。
3. 看起來做分類問題 loss 還是用 categorical_crossentropy 比較好訓練的起來。
4. 
5. filter 數目由小到大測試資料準確率比大到小些微高一點