In [55]:
# aux imports
import numpy as np
import pandas as pd
from plotnine import ggplot, aes, geom_line, ylab

# keras imports
from keras.layers import Dense
from keras.models import Sequential
from keras.callbacks import LambdaCallback

# sklearn imports
from sklearn.datasets import fetch_mldata
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split, StratifiedKFold

In [2]:
mnist = fetch_mldata('MNIST original')

In [3]:
lb = LabelBinarizer()
y = lb.fit(mnist.target).transform(mnist.target)

In [4]:
X_train, X_test, y_train , y_test  = train_test_split(mnist.data, y, test_size=0.2)

In [11]:
def get_model(hidden_unit_size):
    ''' Creates a 2-layers fully connected neural network
        for the mnist dataset.
        The neural net's layer size are the following:
        [784, hidden_unit_size, 10]
        
    :param hidden_unit_size: int.
        the size of the hidden layer
    :return: keras.models.Sequential
    '''
    model = Sequential()

    model.add(Dense(hidden_unit_size, activation='sigmoid', input_shape=(784,)))
    model.add(Dense(10, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])
    return model

In [60]:
hidden_unit_sizes = 2 ** np.arange(1, 16) 
hidden_unit_sizes 

array([    2,     4,     8,    16,    32,    64,   128,   256,   512,
        1024,  2048,  4096,  8192, 16384, 32768])

In [None]:
epochs = 5
accs = []

# callback for retrieving training info.
# the info will be plotted subsequently.
logging_callback = LambdaCallback(
    on_epoch_end=lambda epoch, logs: accs.append(logs),
)

for hidden_unit_size in hidden_unit_sizes:
    model = get_model(hidden_unit_size)
    # training neural net
    model.fit(X_train, y_train, 
              epochs = epochs,
              verbose=0,
              batch_size=128,
              validation_split=0.2,
              callbacks=[logging_callback])

# pandas dataframe with the training info
results = pd.DataFrame(accs)
results['size']  = np.repeat(hidden_unit_sizes, epochs)
results['epoch']  = np.tile(np.arange(epochs), hidden_unit_sizes.shape[0])

In [None]:
last_epoch_results = results[results.
                                groupby('size').
                                epoch.
                                apply(lambda x: x == x.max())]

ggplot(last_epoch_results, aes(x='size')) + \
geom_line(aes(y='1 - acc')) + \
geom_line(aes(y='1 - val_acc')) + \
ylab('error')

In [58]:
model.fit?

In [84]:
np.mean(np.argmax(model.predict(X_test), axis=1) == np.argmax(y_test, axis=1))

0.91178571428571431

In [85]:
y_test

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 1, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])