## 利用這次教的funcional API方式，測試一下對於 mnist data 若是先行提取部分特徵做訓練，而後再連回原始資料集進行第二次訓練，是否會影響其表現。

In [1]:
%matplotlib inline
%env KERAS_BACKEND=tensorflow
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import keras
from keras.datasets import mnist
from keras.utils import to_categorical

env: KERAS_BACKEND=tensorflow


Using TensorFlow backend.


## 基本的 load data

In [3]:
print ("Heloo")

Heloo


In [4]:
(x_train0, y_train0), (x_test0, y_test0)=mnist.load_data()

In [5]:
print (x_train0.shape)
print (x_test0.shape)
print (y_train0.shape)
print (y_test0.shape)

(60000, 28, 28)
(10000, 28, 28)
(60000,)
(10000,)


## x的部分做 reshape和 normalization

In [6]:
x_train = x_train0.reshape(len(x_train0[:, 0, 0]), x_train0[0].size)
x_test = x_test0.reshape(len(x_test0[:, 0, 0]), x_test0[0].size)

In [7]:
print (x_train0.shape)
print (x_test0.shape)

(60000, 28, 28)
(10000, 28, 28)


In [8]:
x_train = (x_train-x_train.min())/(x_train.max()-x_train.min())
x_test = (x_test-x_test.min())/(x_test.max()-x_test.min())

In [9]:
print (x_train.max(), x_train.min())
print (x_test.max(), x_test.min())

1.0 0.0
1.0 0.0


## y 做不同特徵之 labeling，共分三種：
* 是偶數
* 大於等於5
* 可以被3整除

In [10]:
y_iseven0=[]
y_lt50=[]
y_div30=[]
for i in range(len(y_train0)):
    # is even
    if (y_train0[i]%2==0):
        y_iseven0.append(1)
    else : y_iseven0.append(0)
        
    # is > 5
    if (y_train0[i]>=5):
        y_lt50.append(1)
    else : y_lt50.append(0)
        
    # is %3 == 0
    if (y_train0[i]%3 == 0):
        y_div30.append(1)
    else : y_div30.append(0)

In [11]:
print (y_train0[:10])
print (y_iseven0[:10])
print (y_lt50[:10])
print (y_div30[:10])

[5 0 4 1 9 2 1 3 1 4]
[0, 1, 1, 0, 0, 1, 0, 0, 0, 1]
[1, 0, 0, 0, 1, 0, 0, 0, 0, 0]
[0, 1, 0, 0, 1, 0, 0, 1, 0, 0]


## 之後，y再進行 1-hot encoding

In [12]:
y_train = to_categorical(y_train0, 10)
y_test = to_categorical(y_test0, 10)

y_iseven = to_categorical(y_iseven0, 2)
y_lt5 = to_categorical(y_lt50, 2)
y_div3 = to_categorical(y_div30, 2)

In [13]:
print (y_train[:3])
print (y_test[:3])
print (y_iseven[:3])
print (y_lt5[:3])
print (y_div3[:3])

[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0.]
 [0. 1.]
 [0. 1.]]
[[0. 1.]
 [1. 0.]
 [1. 0.]]
[[1. 0.]
 [0. 1.]
 [1. 0.]]


## 整個 Model 的 layout

## 第一步：
利用branching 訓練一些數字特徵<br>
![layout1](https://github.com/shanpig/ML-course/blob/master/layout1.png?raw=true)
## 第二步：
固定前面訓練結果，後面加上一層 NN，然後output出 1-hot結果<br>
![layout2](https://github.com/shanpig/ML-course/blob/master/layout2.png?raw=true)

## Model buildup

In [14]:
from keras.optimizers import Adam
from keras.layers import concatenate
from keras.models import Model, Input
from keras.layers import Dense, Dropout, Activation, add

### 各 functional 的 assignment

In [15]:
x = Input(shape=(784,))
f1=Dense(20, kernel_initializer="random_uniform", bias_initializer="zeros", activation="sigmoid")
f2=Dense(2, kernel_initializer="random_uniform", bias_initializer="zeros", activation="sigmoid")
f3=Dense(2, kernel_initializer="random_uniform", bias_initializer="zeros", activation="sigmoid")
f4=Dense(2, kernel_initializer="random_uniform", bias_initializer="zeros", activation="sigmoid")
f5=Dense(20, kernel_initializer="random_uniform", bias_initializer="zeros", activation="sigmoid")
f6=Dense(10, kernel_initializer="random_uniform", bias_initializer="zeros", activation="softmax")

### functional 及 neuron layers 之間的關係

In [16]:
h1 = f1(x)
c1 = f2(h1)
c2 = f3(h1)
c3 = f4(h1)

h = concatenate([c1, c2, c3, h1])

h2 = f5(h)

y = f6(h2)

### 設定兩步驟訓練的 list

In [17]:
layout1 = [f1, f2, f3, f4]
layout2 = [f5, f6]

### model creation

In [18]:
model = Model(x, y)
model_iseven = Model(x, c1)
model_lt5 = Model(x, c2)
model_div3 = Model(x, c3)

In [19]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 784)          0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 20)           15700       input_1[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 2)            42          dense_1[0][0]                    
__________________________________________________________________________________________________
dense_3 (Dense)                 (None, 2)            42          dense_1[0][0]                    
__________________________________________________________________________________________________
dense_4 (D

In [20]:
model_iseven.compile(loss="categorical_crossentropy", optimizer=Adam(), metrics=["acc"])
model_lt5.compile(loss="categorical_crossentropy", optimizer=Adam(), metrics=["acc"])
model_div3.compile(loss="categorical_crossentropy", optimizer=Adam(), metrics=["acc"])

## "是偶數"的model training

In [21]:
model_iseven_hist = model_iseven.fit(x_train, y_iseven, batch_size=128, epochs=20, shuffle=True, validation_split=0.2)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## "大於等於五"的 model training

In [22]:
model_lt5_hist = model_lt5.fit(x_train, y_lt5, batch_size=128, epochs=20, shuffle=True, validation_split=0.2)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## "可被3整除"的 model training

In [23]:
model_div3_hist = model_div3.fit(x_train, y_div3, batch_size=128, epochs=20, shuffle=True, validation_split=0.2)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


## 將前面訓練過之參數鎖起來，接著訓練第二步。

In [135]:
for i in layout1:
    i.trainable=False
model.compile(loss="categorical_crossentropy", optimizer=Adam(), metrics=["acc"])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 784)          0                                            
__________________________________________________________________________________________________
dense_26 (Dense)                (None, 20)           15700       input_4[0][0]                    
__________________________________________________________________________________________________
dense_27 (Dense)                (None, 2)            42          dense_26[0][0]                   
__________________________________________________________________________________________________
dense_28 (Dense)                (None, 2)            42          dense_26[0][0]                   
__________________________________________________________________________________________________
dense_29 (

## 第二步 training

In [1]:
model_hist = model.fit(x_train, y_train, batch_size=128, epochs=20, shuffle=True, validation_split=0.2)

NameError: name 'model' is not defined

## 為了比對，另外做了兩層 fc layer 的對照 model

In [138]:
model2 = Sequential()
model2.add(Dense(20, activation="sigmoid", kernel_initializer="random_uniform", bias_initializer="zeros", input_dim = 784))
model2.add(Dense(20, activation="sigmoid", kernel_initializer="random_uniform", bias_initializer="zeros"))
model2.add(Dense(10, activation="softmax", kernel_initializer="random_uniform", bias_initializer="zeros"))

In [139]:
model2.compile(loss="categorical_crossentropy", metrics=["acc"], optimizer=Adam())

In [140]:
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_42 (Dense)             (None, 20)                15700     
_________________________________________________________________
dense_43 (Dense)             (None, 20)                420       
_________________________________________________________________
dense_44 (Dense)             (None, 10)                210       
Total params: 16,330
Trainable params: 16,330
Non-trainable params: 0
_________________________________________________________________


In [141]:
model2.fit(x_train, y_train, batch_size=128, epochs=20, shuffle=True, validation_split=0.2)

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1e85193a198>

## 兩 model 對同一測資的 accuracy比較

In [142]:
model.evaluate(x_test, y_test)



[0.3425723619103432, 0.8949]

In [143]:
model2.evaluate(x_test, y_test)



[0.19678768182098866, 0.9459]

## 看起來反而是兩層 fc 比較準，倒是出乎我意料之外XDDD