In [14]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.python.keras.layers import InputLayer, Input
from tensorflow.python.keras.layers import Reshape, Conv2D
from tensorflow.python.keras.layers import MaxPool2D, Dense, Flatten
from tensorflow.python.keras.optimizers import Adam

In [3]:
from tensorflow.examples.tutorials.mnist import input_data
data = input_data.read_data_sets('data/MNIST/', one_hot=True)

Extracting data/MNIST/train-images-idx3-ubyte.gz
Extracting data/MNIST/train-labels-idx1-ubyte.gz
Extracting data/MNIST/t10k-images-idx3-ubyte.gz
Extracting data/MNIST/t10k-labels-idx1-ubyte.gz


In [4]:
data.test.cls = np.argmax(data.test.labels, axis=1)

In [6]:
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
img_shape_full = (img_size, img_size, 1)
num_channels = 1
num_classes = 10

In [10]:
model = Sequential()
model.add(InputLayer(input_shape=(img_size_flat,)))
model.add(Reshape(img_shape_full))
model.add(Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation='relu', name='conv_layer1'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Conv2D(kernel_size=5, strides=2, filters=32, padding='same', activation='relu', name='conv_layer2'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

optimizer = Adam(lr=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128)

Epoch 1/1


<tensorflow.python.keras._impl.keras.callbacks.History at 0x2318f2a2240>

In [11]:
result= model.evaluate(x=data.test.images, y=data.test.labels)



In [13]:
for name, value in zip(model.metrics_names, result):
    print(name, value)
print("{0}: {1:.2%}".format(model.metrics_names[1], result[1]))

loss 0.09760198063477873
acc 0.9664
acc: 96.64%


In [18]:
inputs = Input(shape=(img_size_flat,))
net = inputs
net = Reshape(img_shape_full)(net)
net = Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation='relu', name='conv_layer1')(net)
net = MaxPool2D(pool_size=2, strides=2)(net)
net = Conv2D(kernel_size=5, strides=1, filters=16, padding='same', activation='relu', name='conv_layer2')(net)
net = MaxPool2D(pool_size=2, strides=2)(net)
net = Flatten()(net)
net = Dense(128, activation='relu')(net)
net = Dense(num_classes, activation='softmax')(net)
outputs = net

model_func = Model(inputs=inputs, outputs=outputs)
model_func.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
model_func.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128)
model_func.evaluate(x=data.test.images, y=data.test.labels)

for name, value in zip(model_func.metrics_names, result):
    print(name, value)
print("{0}: {1:.2%}".format(model_func.metrics_names[1], result[1]))

Epoch 1/1
loss 0.09760198063477873
acc 0.9664
acc: 96.64%


In [19]:
model_path = 'model.keras'
model_func.save(model_path)
del model_func
from tensorflow.python.keras.models import load_model
model_func_saved = load_model(model_path)
model_func_saved.evaluate(x=data.test.images, y=data.test.labels)
model_func_saved.evaluate(x=data.validation.images, y=data.validation.labels)



[0.07595181209146977, 0.9782]

In [20]:
model_func_saved.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 784)               0         
_________________________________________________________________
reshape_5 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
conv_layer1 (Conv2D)         (None, 28, 28, 16)        416       
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 14, 14, 16)        0         
_________________________________________________________________
conv_layer2 (Conv2D)         (None, 14, 14, 16)        6416      
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 7, 7, 16)          0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 784)               0         
__________

In [24]:
model_func_saved.get_weights()[0]

array([[[[ 0.14919381, -0.01331354, -0.13101117,  0.11116042,
           0.19253737, -0.01417204,  0.08113975, -0.059536  ,
           0.00549069, -0.11361254,  0.0861453 ,  0.11725801,
          -0.0964651 ,  0.10747826,  0.17376587, -0.05049446]],

        [[ 0.17283697,  0.1659312 , -0.01307141, -0.04050573,
           0.16053014, -0.1095231 , -0.00414349,  0.06535481,
           0.10398444, -0.04334639,  0.04895893,  0.01406143,
          -0.0907459 ,  0.09475459,  0.07411278,  0.10065028]],

        [[ 0.08023234,  0.148917  ,  0.08958954,  0.16025147,
           0.135305  , -0.0843061 , -0.016828  ,  0.04589121,
           0.10663011,  0.09767466,  0.00403546,  0.10059351,
          -0.09096719,  0.16253401,  0.10599612,  0.10880768]],

        [[-0.01583367,  0.19241506,  0.04044468,  0.0415483 ,
           0.0922812 , -0.13344663, -0.14382344,  0.17235543,
           0.1969471 ,  0.12597056, -0.14035459, -0.02110622,
           0.11844183, -0.04823356,  0.02604043, -0.13452064]

In [30]:
model_func_saved.layers[2].get_weights()[0].shape

(5, 5, 1, 16)

### 1. Training on more epochs

In [33]:
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
img_shape_full = (img_size, img_size, 1)
num_classes = 10
num_channels = 1

model1 = Sequential()
model1.add(InputLayer(input_shape=(img_size_flat,)))
model1.add(Reshape(img_shape_full))
model1.add(Conv2D(kernel_size=5, padding='same', strides=1, filters=16, activation='relu', name='conv_layer1'))
model1.add(MaxPool2D(strides=2, pool_size=2))
model1.add(Conv2D(kernel_size=5, strides=1, filters=32, padding='same', activation='relu', name='conv_layer2'))
model1.add(MaxPool2D(strides=2, pool_size=2))
model1.add(Flatten())
model1.add(Dense(128, activation='relu'))
model1.add(Dense(num_classes, activation='softmax'))

optimizer = Adam(lr=0.001)
model1.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics =['accuracy'])
model1.fit(x=data.train.images, y= data.train.labels, epochs=2, batch_size=128)
model1.evaluate(x=data.test.images, y=data.test.labels)

Epoch 1/2
Epoch 2/2


[0.04566507011021022, 0.9833]

### 2. Using sigmoid instead on relu activaion functions

In [34]:
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
img_shape_full = (img_size, img_size, 1)
num_classes = 10
num_channels = 1

model1 = Sequential()
model1.add(InputLayer(input_shape=(img_size_flat,)))
model1.add(Reshape(img_shape_full))
model1.add(Conv2D(kernel_size=5, padding='same', strides=1, filters=16, activation='sigmoid', name='conv_layer1'))
model1.add(MaxPool2D(strides=2, pool_size=2))
model1.add(Conv2D(kernel_size=5, strides=1, filters=32, padding='same', activation='sigmoid', name='conv_layer2'))
model1.add(MaxPool2D(strides=2, pool_size=2))
model1.add(Flatten())
model1.add(Dense(128, activation='sigmoid'))
model1.add(Dense(num_classes, activation='softmax'))

optimizer = Adam(lr=0.001)
model1.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics =['accuracy'])
model1.fit(x=data.train.images, y= data.train.labels, epochs=2, batch_size=128)
model1.evaluate(x=data.test.images, y=data.test.labels)

Epoch 1/2
Epoch 2/2


[0.1597835160881281, 0.9526]

### 3. Using elu activation function

In [35]:
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
img_shape_full = (img_size, img_size, 1)
num_classes = 10
num_channels = 1

model1 = Sequential()
model1.add(InputLayer(input_shape=(img_size_flat,)))
model1.add(Reshape(img_shape_full))
model1.add(Conv2D(kernel_size=5, padding='same', strides=1, filters=16, activation='elu', name='conv_layer1'))
model1.add(MaxPool2D(strides=2, pool_size=2))
model1.add(Conv2D(kernel_size=5, strides=1, filters=32, padding='same', activation='elu', name='conv_layer2'))
model1.add(MaxPool2D(strides=2, pool_size=2))
model1.add(Flatten())
model1.add(Dense(128, activation='elu'))
model1.add(Dense(num_classes, activation='softmax'))

optimizer = Adam(lr=0.001)
model1.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics =['accuracy'])
model1.fit(x=data.train.images, y= data.train.labels, epochs=2, batch_size=128)
model1.evaluate(x=data.test.images, y=data.test.labels)

Epoch 1/2
Epoch 2/2


[0.044801207398506813, 0.9842]

### 4. Change activation function with single value

In [36]:
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
img_shape_full = (img_size, img_size, 1)
num_classes = 10
num_channels = 1

def conv_model(activation='sigmoid'):
    model1 = Sequential()
    model1.add(InputLayer(input_shape=(img_size_flat,)))
    model1.add(Reshape(img_shape_full))
    model1.add(Conv2D(kernel_size=5, padding='same', strides=1, filters=16, activation=activation, name='conv_layer1'))
    model1.add(MaxPool2D(strides=2, pool_size=2))
    model1.add(Conv2D(kernel_size=5, strides=1, filters=32, padding='same', activation=activation, name='conv_layer2'))
    model1.add(MaxPool2D(strides=2, pool_size=2))
    model1.add(Flatten())
    model1.add(Dense(128, activation=activation))
    model1.add(Dense(num_classes, activation='softmax'))
    return model1

model_func = conv_model(activation='elu')
optimizer = Adam(lr=0.001)
model_func.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics =['accuracy'])
model_func.fit(x=data.train.images, y= data.train.labels, epochs=1, batch_size=128)
model_func.evaluate(x=data.test.images, y=data.test.labels)

Epoch 1/1


[0.05806006715670228, 0.9802]

### 5. Change stride in conv layer and see the difference in accuracy and running times

In [37]:
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
img_shape_full = (img_size, img_size, 1)
num_classes = 10
num_channels = 1

def conv_model(activation='sigmoid'):
    model1 = Sequential()
    model1.add(InputLayer(input_shape=(img_size_flat,)))
    model1.add(Reshape(img_shape_full))
    model1.add(Conv2D(kernel_size=5, padding='same', strides=2, filters=16, activation=activation, name='conv_layer1'))
    model1.add(MaxPool2D(strides=2, pool_size=2))
    model1.add(Conv2D(kernel_size=5, strides=2, filters=32, padding='same', activation=activation, name='conv_layer2'))
    model1.add(MaxPool2D(strides=2, pool_size=2))
    model1.add(Flatten())
    model1.add(Dense(128, activation=activation))
    model1.add(Dense(num_classes, activation='softmax'))
    return model1

model_func = conv_model(activation='elu')
optimizer = Adam(lr=0.001)
model_func.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics =['accuracy'])
model_func.fit(x=data.train.images, y= data.train.labels, epochs=1, batch_size=128)
model_func.evaluate(x=data.test.images, y=data.test.labels)

Epoch 1/1


[0.10827499214857816, 0.9661]

### 6. Add more conv layers and see the difference in accuracy

In [38]:
img_size = 28
img_size_flat = img_size * img_size
img_shape = (img_size, img_size)
img_shape_full = (img_size, img_size, 1)
num_classes = 10
num_channels = 1

def conv_model(activation='sigmoid'):
    model1 = Sequential()
    model1.add(InputLayer(input_shape=(img_size_flat,)))
    model1.add(Reshape(img_shape_full))
    model1.add(Conv2D(kernel_size=5, padding='same', strides=1, filters=16, activation=activation, name='conv_layer1'))
    model1.add(MaxPool2D(strides=2, pool_size=2))
    model1.add(Conv2D(kernel_size=5, strides=1, filters=32, padding='same', activation=activation, name='conv_layer2'))
    model1.add(MaxPool2D(strides=2, pool_size=2))
    model1.add(Conv2D(kernel_size=5, strides=1, filters=64, padding='same', activation=activation, name='conv_layer3'))
    model1.add(MaxPool2D(strides=2, pool_size=2))
    model1.add(Flatten())
    model1.add(Dense(128, activation=activation))
    model1.add(Dense(num_classes, activation='softmax'))
    return model1

model_func = conv_model(activation='elu')
optimizer = Adam(lr=0.001)
model_func.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics =['accuracy'])
model_func.fit(x=data.train.images, y= data.train.labels, epochs=1, batch_size=128)
model_func.evaluate(x=data.test.images, y=data.test.labels)

Epoch 1/1


[0.05552197875850834, 0.9819]