In [1]:
# Here is where we import all the things we need
# This example is based on Abhishek's Machine Learning Demystified
# https://github.com/shekit/machine-learning-demystified/

import keras

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, MaxPooling2D, Conv2D, Dropout
from keras.layers import Flatten

# For getting the MNIST data set
from keras.datasets import mnist

# This is for image processing and displaying results right here in jupyter
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
# Load the data into training and testing matrices
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
# Look at the "shape" of the matrix
x_train.shape

(60000, 28, 28)

In [9]:
# reshape your input data
# Need the additional "1" for the grayscale channel
# (would be a 3 for RGB)
x_train = x_train.reshape(60000, 28, 28, 1)
x_test = x_test.reshape(10000, 28, 28, 1)

# All values between 0 and 1
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

In [10]:
# let's look at the "shape" of the input data
# 60,000 28x28 pixels with 1 channel each
x_train.shape

(60000, 28, 28, 1)

In [11]:
# change y to one hot encoding
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

In [15]:
# Let's look at an example of a "one-hot encoded" digit
# This is a 5
y_train[0]

array([ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.])

In [13]:
# Build the model!

# This is the architecture of your model!
model = Sequential()

# Add a convolutional layer
model.add(Conv2D(32, kernel_size=(3,3), input_shape=(28,28,1)))
# Set activation, maxpooling, and dropout
model.add(Activation('relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

# Another convolutional layer!
model.add(Conv2D(64, kernel_size=(3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

# This flattens everything now into a 1D array as inputs to the
# fully connected layer
model.add(Flatten())

# Full connected "hidden" layers
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
# Final output layer, 10 outputs 
model.add(Dense(10, activation='softmax'))

In [17]:
# we can look at the model now
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 26, 26, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
activation_2 (Activation)    (None, 11, 11, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 64)          0         
__________

In [14]:
# This last step finalizes the model!
model.compile(loss='categorical_crossentropy',
             optimizer='rmsprop',
             metrics=['accuracy'])

In [None]:
# Just one epoch for now (but it won't train well)
model.fit(x_train, y_train, epochs=1, batch_size=128, verbose=1)

Epoch 1/20

In [15]:
# evaluate the model
score = model.evaluate(x_test, y_test)



In [16]:
score

[2.305835609436035, 0.075499999999999998]

In [None]:
# This is where we save the model for use in the flask server!
model.save('model.h5')  # creates a HDF5 file 'model.h5'