# Using deeper CNN on a small MNIST data set

In this script we build deeper CNN with many convolutional layers. Like in the experiments with the fully connected NN with MNIST that we performed last time, we use again a small data with only 2400 images in the training data set so that the NN can be trained on the CPU.

In [None]:
#!pip3 install sklearn ##for docker users

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.image as imgplot
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

import time
import tensorflow as tf
tf.set_random_seed(1)

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, BatchNormalization
from keras.layers import Convolution2D, MaxPooling2D, Flatten
import keras
import sys
print ("Keras {} TF {} Python {}".format(keras.__version__, tf.__version__, sys.version_info))

In [None]:
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
X_train=x_train[0:2400]
Y_train=y_train[0:2400]

X_val=x_train[2400:3000]
Y_val=y_train[2400:3000]

X_test=x_test[0:1000]
Y_test=y_test[0:1000]

del x_train, y_train, x_test, y_test


In [None]:
X_train=np.reshape(X_train, (2400,28,28,1))
X_val=np.reshape(X_val, (600,28,28,1))
X_test=np.reshape(X_test, (1000,28,28,1))

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

In [None]:
plt.imshow(X_train[42,:,:,0],cmap="gray")

In [None]:
def convertToOneHot(vector, num_classes=None):
    result = np.zeros((len(vector), num_classes), dtype='float32')
    result[np.arange(len(vector)), vector] = 1
    return result
print("class label")
print(Y_train[0:5])
print("class label in OneHot encodig")
print(convertToOneHot(Y_train[0:5], 10))

In [None]:
Y_train=convertToOneHot(Y_train,num_classes=10)
print(Y_train.shape)
Y_val=convertToOneHot(Y_val,num_classes=10)
print(Y_val.shape)

In [None]:
# here we center and standardize the data
X_mean = np.mean( X_train, axis = 0)
X_std = np.std( X_train, axis = 0)

X_train = (X_train - X_mean ) / (X_std + 0.0001)
X_val = (X_val - X_mean ) / (X_std + 0.0001)

In [None]:
# here we define  hyperparameter of the NN
batch_size = 128
nb_classes = 10
nb_epoch = 30
img_rows, img_cols = 28, 28
kernel_size = (3, 3)
input_shape = (img_rows, img_cols, 1)
pool_size = (2, 2)

In [None]:
### a deeper CNN model
name = 'cnn2'
model = Sequential()

model.add(Convolution2D(8,kernel_size,padding='same',input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Convolution2D(8, kernel_size,padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=pool_size))

model.add(Convolution2D(16, kernel_size,padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))

# here is your code comming:




# end of your code

model.add(Flatten())#macht einen vektor aus dem output
model.add(Dense(40))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Activation('relu'))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


In [None]:
model.summary()

In [None]:
SVG(model_to_dot(model,show_shapes=True).create(prog='dot', format='svg'))

In [None]:
tensorboard = keras.callbacks.TensorBoard(
        log_dir='tensorboard/mnist/' + name + '/', 
        write_graph=True,
        histogram_freq=0)

In [None]:
history=model.fit(X_train, Y_train, 
                  batch_size=128, 
                  epochs=30,
                  verbose=2, 
                  validation_data=(X_val, Y_val),
                  callbacks=[tensorboard])

In [None]:
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='lower right')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper right')
plt.show()

## Prediction on the test set


In [None]:
X_test_norm = np.array((X_test - X_mean ) / (X_std + 0.0001),dtype="float32")
pred=model.predict(X_test_norm)
print(confusion_matrix(Y_test,np.argmax(pred,axis=1)))
print("Acc = " ,np.sum(Y_test==np.argmax(pred,axis=1))/len(pred))

In [None]:
#!tensorboard --logdir=tensorboard/tensorboard/mnist/