In [None]:
# example of loading the mnist dataset from keras
from keras.datasets import mnist
from matplotlib import pyplot
from keras.utils import to_categorical

# load dataset
def load_mnist_dataset():
    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
    # reshape dataset to have a single channel (1 color)
    train_images = train_images.reshape((train_images.shape[0], 28, 28, 1))
    test_images  = test_images.reshape((test_images.shape[0], 28, 28, 1))
    
    # convert the vector trainY and testY in a one hot encoding (an array of 0s and 1)
    train_labels = to_categorical(train_labels, num_classes=None, dtype='float32')
    test_labels = to_categorical(test_labels, num_classes=None, dtype='float32')
    
    return train_images, train_labels, test_images, test_labels

# summarize loaded dataset
def summarize_mnist_dataset(trainX, trainY, testX, testY): 
    print('Train: X=%s, y=%s' % (trainX.shape, trainY.shape))
    print('Test: X=%s, y=%s' % (testX.shape, testY.shape))

    # plot first few images
    for i in range(9):
        # define subplot
        pyplot.subplot(330 + 1 + i)
        # plot raw pixel data
        pyplot.imshow(trainX[i], cmap=pyplot.get_cmap('gray'))

    # show the figure
    pyplot.show()

In [None]:
train_images, train_labels, test_images, test_labels = load_mnist_dataset()
summarize_mnist_dataset(train_images, train_labels, test_images, test_labels)


In [None]:
# scale pixels
def prep_pixels(train, test):
    # Modifying the values of each pixel such that they range from 0 to 1 will improve the rate at which our model learns.
    train = train.reshape(train.shape[0], 28*28)
    test = test.reshape(test.shape[0], 28*28)
   
    #convert into to float
    train_norm = train.astype('float32')
    test_norm = test.astype('float32')
    # normalize to range 0-1
    train_norm = train_norm / 255.0
    test_norm = test_norm / 255.0

    return train_norm, test_norm

In [None]:
from keras import models
from keras.layers import Dense
from keras.optimizers import SGD

#define simple NN model
def simple_model():
    model = models.Sequential()
    model.add(Dense(8, activation='sigmoid', input_shape=(28 * 28,)))
    model.add(Dense(10, activation='softmax'))
    
    opt = SGD(lr=0.01, momentum=0.9)
    model.compile(optimizer = opt, loss='categorical_crossentropy', metrics=['accuracy'])
        
    return model

def simple_model_2_layers():
    model = models.Sequential()
    model.add(Dense(8, activation='sigmoid', input_shape=(28 * 28,)))
    model.add(Dense(8, activation='sigmoid', input_shape=(28 * 28,)))
    model.add(Dense(10, activation='softmax'))
    
    opt = SGD(lr=0.01, momentum=0.9)
    model.compile(optimizer = opt, loss='categorical_crossentropy', metrics=['accuracy'])
        
    return model

def simple_model_more_nodes():
    model = models.Sequential()
    model.add(Dense(784, activation='sigmoid', input_shape=(28 * 28,)))
    model.add(Dense(10, activation='softmax'))
    
    opt = SGD(lr=0.01, momentum=0.9)
    model.compile(optimizer = opt, loss='categorical_crossentropy', metrics=['accuracy'])
        
    return model

In [None]:
def evaluate_model(model):
    train_norm, test_norm = prep_pixels(train_images, test_images)
    
    model.fit(train_norm, train_labels, epochs=5, batch_size=128)
    
    test_loss, test_acc = model.evaluate(test_norm, test_labels)
    print('test_acc:', test_acc, 'test_loss', test_loss)
    
    train_loss, train_acc = model.evaluate(train_norm, train_labels)
    print('train_acc:', train_acc, 'train_loss', train_loss)

In [None]:
print("Lets evaluate the model")
simple_model = simple_model()
evaluate_model(simple_model)

*Description*: We can increase the accuracy for this model if we increase the number activation nodes. The accuracy is pretty low comparing with other models that exists for this dataset.
*Input*: Use 784 activation nodes for hidden later
         The accuracy should increase because the model has more `detailed`. More activation nodes will allow the recording of more features that, in turn, will help algorithm distinguish better the shapes.
*Output*: The accuracy improved from .88 to .91
        The result is as expected.

In [None]:
model_more_nodes = simple_model_more_nodes()
evaluate_model(model_more_nodes)

In [None]:
*Description*: We can increase the accuracy for this model if we increase the number of hidden layers. The accuracy is pretty low comparing with other models that exists for this dataset.
*Input*: Use 4 activation nodes in the hidden later
         The accuracy should increase because the model is more complex and it has more layers to compute the correct prediction
*Output*: The accuracy decreese from .88 to .84
          The result is different than expected.
          A possible explanation is that the input dataset is relativelly small and with added layer we add more parameters to learn but we do not have enough data for that

In [None]:
model_2_layers = simple_model_2_layers()
evaluate_model(model_2_layers)