## Introduction
+ The following modules need to be installed in order for the code to execute properly
    + keras
    + tensorflow
    
+ Useful links
    + https://github.com/keras-team/keras/tree/master/examples  -> examples of network architecture
    + https://elitedatascience.com/keras-tutorial-deep-learning-in-python -> keras tutorial with mnist

## Attempts with core layers

Import necessary modules

In [None]:
import keras
from keras.utils        import np_utils
from keras.models       import Sequential
from keras.layers       import Dense, Conv2D, MaxPooling2D, Activation, Dropout, Flatten, BatchNormalization
from keras.datasets     import mnist
from keras.optimizers   import Adam, Adadelta
from keras.preprocessing.image import ImageDataGenerator

First very minimal NN -> reaches 97.5% accuracy

In [None]:
# read data from dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# reshape x in 1D
x_train = x_train.reshape(x_train.shape[0], 784)
x_test = x_test.reshape(x_test.shape[0], 784)

# cast x to float32 and normalize to [0,1]
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

print(y_train[0])
# reshape y to 10-dim bit instead of int
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
print(y_train[0])

model = Sequential()
model.add(Dense(256, input_dim=784, activation='sigmoid'))
model.add(Dense(256, activation='sigmoid'))
model.add(Dropout(0.3))
model.add(Dense(24))
model.add(Dense(10, activation='softmax'))
# print(model.output_shape)
opt = Adam(lr = 0.01)
model.compile(loss = 'categorical_crossentropy',
             optimizer = opt,
             metrics = ['accuracy'])
model.fit(x_train, y_train,
         epochs = 8,
         batch_size = 32)
score = model.evaluate(x_test, y_test, batch_size = 32)
print(score)

failed = np.nonzero(model.predict_class(x_test).reshape((-1,)) != y_test)
for i in range(10):
    

Attempt using convolutionnal NN

In [None]:
# read data from dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# reshape in 2D, 28*28
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

#convert to float and normalize
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# reshape y to 10-dim bit instead of int
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

model = Sequential()
 
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))

model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(BatchNormalization())

model.add(Dense(100, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(Dense(10, activation='softmax'))

opt = Adam(lr = 0.01)
model.compile(loss = 'categorical_crossentropy',
             optimizer = opt,
             metrics = ['accuracy'])
model.fit(x_train, y_train,
         epochs = 8,
         batch_size = 32)
score = model.evaluate(x_test, y_test, batch_size = 32)
print(score)

Now let's improve it by adding some data augmentation

In [None]:
# read data from dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# reshape in 2D, 28*28
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

#convert to float and normalize
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# reshape y to 10-dim bit instead of int
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)



gen = ImageDataGenerator(rotation_range=11, width_shift_range=0.14, shear_range=0.25,
                         height_shift_range=0.14, zoom_range=0.12)

test_gen = ImageDataGenerator()

train_generator = gen.flow(x_train, y_train, batch_size=64)
test_generator = test_gen.flow(x_test, y_test, batch_size=64)


model = Sequential()
 
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(BatchNormalization(axis=-1))
model.add(Activation('relu'))

model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(BatchNormalization())

model.add(Dense(100, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(10, activation='softmax'))

opt = Adam(lr = 0.01)
model.compile(loss = 'categorical_crossentropy',
             optimizer = opt,
             metrics = ['accuracy'])
model.fit_generator(train_generator, steps_per_epoch=60000//64, epochs=5, 
                    validation_data=test_generator, validation_steps=10000//64)
score = model.evaluate(x_test, y_test, batch_size = 64)
print(score)

Adding more conv2D layers, with more filters to improve results.

In [2]:
# read data from dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# reshape in 2D, 28*28
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)

#convert to float and normalize
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# reshape y to 10-dim bit instead of int
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)



gen = ImageDataGenerator(rotation_range=11, width_shift_range=0.14, shear_range=0.25,
                         height_shift_range=0.14, zoom_range=0.12)

test_gen = ImageDataGenerator()

train_generator = gen.flow(x_train, y_train, batch_size=64)
test_generator = test_gen.flow(x_test, y_test, batch_size=64)


model = Sequential()
 
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
#model.add(BatchNormalization(axis=-1))
#model.add(Activation('relu'))

model.add(Conv2D(64, (3, 3), activation='relu'))
#model.add(BatchNormalization(axis=-1))
#model.add(Activation('relu'))

model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
#model.add(BatchNormalization())
#model.add(Dense(24, activation='relu'))
model.add(Dense(10, activation='softmax'))

#opt = Adam(lr = 0.015)
opt = Adadelta()
model.compile(loss = 'categorical_crossentropy',
             optimizer = opt,
             metrics = ['accuracy'])
model.fit_generator(train_generator, steps_per_epoch=60000//64, epochs=10, 
                    validation_data=test_generator, validation_steps=10000//64)
score = model.evaluate(x_test, y_test, batch_size = 64)
print(score)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[0.039691976184088706, 0.9883]


In [None]:
import numpy as np
import matplotlib.pyplot as plt
print("here")

failed = np.nonzero(model.predict_classes(x_test).reshape((-1,)) != np.nonzero(y_test))

print(failed)
for fail in failed[:25]:
    img = x_test[fail].reshape(28, 28)
    plt.imshow(img)
    print("predicted %s ; real value is %s" % (model.predict_classes(x_test[fail]), y_test[fail]))
    plt.show()

In [None]:
print(hello)