In [4]:
import os
import struct
import numpy as np

np.random.seed(1337)

import keras
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils

mnist_dir = '/Users/matt/dev/machine-learning/datasets/mnist/'

path_test_labels = os.path.join(mnist_dir, 't10k-labels-idx1-ubyte')
path_test_images = os.path.join(mnist_dir, 't10k-images-idx3-ubyte')
path_train_labels = os.path.join(mnist_dir, 'train-labels-idx1-ubyte')
path_train_images = os.path.join(mnist_dir, 'train-images-idx3-ubyte')

class Mnist:
    def __init__(self, labels_path, images_path):
        with open(labels_path, 'rb') as file:
            magic, num = struct.unpack(">II", file.read(8))
            assert magic == 2049
            self.labels = np.fromfile(file, dtype=np.int8)

        with open(images_path, 'rb') as file:
            magic, num, rows, cols = struct.unpack(">IIII", file.read(16))
            assert magic == 2051
            raw_images = np.fromfile(file, dtype=np.uint8)
            self.images = raw_images.reshape(num, rows, cols) # Nx28x28
            # raw = np.fromfile(file, dtype=np.int8)
            # print(raw.shape)
            # print(raw.reshape(num, rows, cols).shape)

    def flattened_images(self):
        (num_images, rows, cols) = self.images.shape
        flattened = self.images.reshape(num_images, rows*cols) # Nx784
        return self._normalize(flattened)

    def one_hot_labels(self):
        return np_utils.to_categorical(self.labels, 10)

    def _normalize(self, nparray):
        nparray = nparray.astype('float32')
        nparray /= 255
        return nparray

train = Mnist(path_train_labels, path_train_images)
test = Mnist(path_test_labels, path_test_images)


In [5]:
batch_size = 128
nb_epoch = 20

model = Sequential()
model.add(Dense(512, input_dim=(784)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

# model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.RMSprop(),
              metrics=['accuracy'])

history = model.fit(train.flattened_images(),
                    train.one_hot_labels(),
                    batch_size=batch_size,
                    nb_epoch=nb_epoch,
                    verbose=1,
                    validation_data=(test.flattened_images(),
                                     test.one_hot_labels()))
score = model.evaluate(test.flattened_images(),
                      test.one_hot_labels(),
                      verbose=0)

print('Test score:', score[0])
print('Test accuracy:', score[1])
        



Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
('Test score:', 0.11637635516840407)
('Test accuracy:', 0.98140000000000005)
