# Objective

1. Build an NN model for Mnist dataset for classifying handwritting numbers from 0 to 9
2. Build a new model for Mnish dataset based on the previous model while the new model only classifies numbers 0 and 1

# Prepare Environment

In [1]:
%env KERAS_BACKEND=tensorflow
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

env: KERAS_BACKEND=tensorflow


# Prepare Data
1. Load

In [2]:
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

Using TensorFlow backend.


2. Reshape

In [3]:
print('Before reshape: x_train.shape =', x_train.shape, '; x_test.shape =', x_test.shape)

x_train = x_train.reshape(60000, 28, 28, 1)
x_test = x_test.reshape(10000, 28, 28, 1)
print('After reshape: x_train.shape =', x_train.shape, '; x_test.shape =', x_test.shape)

Before reshape: x_train.shape = (60000, 28, 28) ; x_test.shape = (10000, 28, 28)
After reshape: x_train.shape = (60000, 28, 28, 1) ; x_test.shape = (10000, 28, 28, 1)


3. Filter 0 and 1

In [4]:
x_train_01 = x_train[y_train <= 1]
y_train_01 = y_train[y_train <= 1]
x_test_01 = x_test[y_test <= 1]
y_test_01 = y_test[y_test <= 1]
print('x_train_01.shape =', x_train_01.shape, '; x_test_01.shape =', x_test_01.shape)
print('y_train_01.shape =', y_train_01.shape, '; y_test_01.shape =', y_test_01.shape)

x_train_01.shape = (12665, 28, 28, 1) ; x_test_01.shape = (2115, 28, 28, 1)
y_train_01.shape = (12665,) ; y_test_01.shape = (2115,)


4. Categorize

In [5]:
import keras.utils as np_utils

y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
y_train_01 = np_utils.to_categorical(y_train_01, 2)
y_test_01 = np_utils.to_categorical(y_test_01, 2)
print('After categorize: y_train.shape =', y_train.shape, '; y_test.shape =', y_test.shape)
print('After categorize: y_train_01.shape =', y_train_01.shape, '; y_test_01.shape =', y_test_01.shape)

After categorize: y_train.shape = (60000, 10) ; y_test.shape = (10000, 10)
After categorize: y_train_01.shape = (12665, 2) ; y_test_01.shape = (2115, 2)


# Build CNN
First model for 0 to 9 classification

In [6]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Activation

# We put 3 conv. blocks together, called conv_layer.
conv_layer = [Conv2D(32, (3, 3), padding='same', input_shape=(28,28,1)),
              Activation('relu'),
              MaxPooling2D(pool_size=(2, 2)),
              
              Conv2D(64, (3, 3), padding='same'),
              Activation('relu'),
              MaxPooling2D(pool_size=(2, 2)),
              
              Conv2D(128, (3, 3), padding='same'),
              Activation('relu'),
              MaxPooling2D(pool_size=(2, 2))]

# We put Flatten, and 2 fully-connectd layers together, called fc_layer.
fc_layer = [Flatten(),
            Dense(200),
            Activation('relu'),
            Dense(10),
            Activation('softmax')]

model = Sequential(conv_layer + fc_layer)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 14, 14, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 128)         73856     
__________

In [7]:
model.compile(loss='mse', optimizer='sgd', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=100, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x17bafb70d68>

In [8]:
model.evaluate(x_test, y_test)



[0.01645310133784078, 0.877]

# Transfer Learning
Make use of the ready 0-to-9 classification model to build another 0-and-1 classification model

### Two nodes in output layer
1. Transfer
    - Create another model based on the previous model and new layers

In [9]:
new_fc_layer = [Flatten(),
                Dense(200),
                Activation('relu'),
                Dense(2),
                Activation('softmax')]

model_01 = Sequential(conv_layer + new_fc_layer)
model_01.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 14, 14, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 128)         73856     
__________

2. Fix Trained Parameters

In [10]:
for layer in conv_layer:
    layer.trainable = False

model_01.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 14, 14, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 128)         73856     
__________

3. Train

In [11]:
model_01.compile(loss='mse', optimizer='sgd', metrics=['accuracy'])
model_01.fit(x_train_01, y_train_01, batch_size=100, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x17bbd511e48>

In [12]:
model_01.evaluate(x_test_01, y_test_01)



[0.0005399081621233167, 0.9995271867612293]

### One node in output layer
1. Data adjustment
    - Reverse categorical

In [20]:
y_train_01_b = np.argmax(y_train_01, axis=1)
y_test_01_b = np.argmax(y_test_01, axis=1)
print(y_train_01_b.shape, y_test_01_b.shape)
print('y_train_01_b =', y_train_01_b)
print('y_test_01_b =', y_test_01_b)

(12665,) (2115,)
y_train_01_b = [0 1 1 ... 1 0 1]
y_test_01_b = [1 0 1 ... 1 0 1]


2. Transfer

In [21]:
binary_fc_layer = [Flatten(),
                Dense(200),
                Activation('relu'),
                Dense(1),
                Activation('sigmoid')]

model_binary = Sequential(conv_layer + binary_fc_layer)
model_binary.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 14, 14, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 128)         73856     
__________

3. Train

In [23]:
model_binary.compile(loss='mse', optimizer='sgd', metrics=['accuracy'])
model_binary.fit(x_train_01, y_train_01_b, batch_size=100, epochs=5)
model_binary.evaluate(x_test_01, y_test_01_b)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[0.001045695909280701, 0.9995271867612293]

# Conclusion

Even though the original model is trained for multi-class classification, transfering it to a two-class classification model still works properly.
Having a little twist in the output layer doesn't harm because the hidden layer with 200 nodes will cover the conversion.