Example adapted from [this online post](https://nextjournal.com/gkoehler/digit-recognition-with-keras).

In [25]:
from shutil import copyfileobj
from six.moves import urllib
from sklearn.datasets.base import get_data_home
import os

def fetch_mnist(data_home=None):
    mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
    data_home = get_data_home(data_home=data_home)
    data_home = os.path.join(data_home, 'mldata')
    if not os.path.exists(data_home):
        os.makedirs(data_home)
    mnist_save_path = os.path.join(data_home, "mnist-original.mat")
    if not os.path.exists(mnist_save_path):
        mnist_url = urllib.request.urlopen(mnist_alternative_url)
        with open(mnist_save_path, "wb") as matlab_file:
            copyfileobj(mnist_url, matlab_file)
fetch_mnist()
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata("MNIST original")



In [26]:
X, y = mnist["data"], mnist["target"]
X.shape

(70000, 784)

In [27]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]

In [28]:
import numpy as np

shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

In [29]:
X_train.shape

(60000, 784)

In [30]:
X_test.shape

(10000, 784)

In [31]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

In [32]:
X_train /= 255
X_test /= 255

one-hot encoding the output using numpy-related utilities from keras

In [33]:
from keras.utils import np_utils
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')

n_classes = 10
print("Shape before one-hot encoding: ", y_train.shape)
y_train = np_utils.to_categorical(y_train, n_classes)
y_test = np_utils.to_categorical(y_test, n_classes)
print("Shape after one-hot encoding: ", y_train.shape)

Shape before one-hot encoding:  (60000,)
Shape after one-hot encoding:  (60000, 10)


building a linear stack of densely connected layers with the sequential model from keras

![](nn_example.png)

In [34]:
from keras.models import Sequential, load_model
from keras.layers.core import Dense, Activation

model = Sequential()

model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))                            

model.add(Dense(512))
model.add(Activation('relu'))

model.add(Dense(10))
model.add(Activation('softmax'))

In [35]:
# compiling the sequential model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [36]:
model.fit(X_train, y_train,
          batch_size=128, epochs=10,
          verbose=2,
          validation_data=(X_test, y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 7s - loss: 0.2186 - acc: 0.9358 - val_loss: 0.0987 - val_acc: 0.9701
Epoch 2/10
 - 6s - loss: 0.0792 - acc: 0.9757 - val_loss: 0.0769 - val_acc: 0.9754
Epoch 3/10
 - 6s - loss: 0.0505 - acc: 0.9841 - val_loss: 0.0698 - val_acc: 0.9787
Epoch 4/10
 - 6s - loss: 0.0346 - acc: 0.9885 - val_loss: 0.0705 - val_acc: 0.9777
Epoch 5/10
 - 6s - loss: 0.0269 - acc: 0.9909 - val_loss: 0.0873 - val_acc: 0.9757
Epoch 6/10
 - 7s - loss: 0.0244 - acc: 0.9915 - val_loss: 0.0845 - val_acc: 0.9763
Epoch 7/10
 - 6s - loss: 0.0192 - acc: 0.9932 - val_loss: 0.0679 - val_acc: 0.9815
Epoch 8/10
 - 6s - loss: 0.0168 - acc: 0.9941 - val_loss: 0.0796 - val_acc: 0.9816
Epoch 9/10
 - 6s - loss: 0.0140 - acc: 0.9953 - val_loss: 0.0876 - val_acc: 0.9796
Epoch 10/10
 - 6s - loss: 0.0109 - acc: 0.9961 - val_loss: 0.0889 - val_acc: 0.9811


<keras.callbacks.History at 0x1d0c9251c18>

Compute model accuracy on the 10,000 testing examples 

In [37]:
loss_and_metrics = model.evaluate(X_test, y_test, verbose=2)

print("Test Loss", loss_and_metrics[0])
print("Test Accuracy", loss_and_metrics[1])

Test Loss 0.08889384464093324
Test Accuracy 0.9811


save the model in HDF5 format (an open standard that is more efficient than Python pickle)

In [38]:
model.save("./keras_mnist_first.h5")

In [None]:
#mnist_model = load_model("./keras_mnist_first.h5")