In [None]:
import tensorflow as tf
import h5py
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Load the mnist pre-shuffled train data and test data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)
print("x_test shape:", x_test.shape, "y_test shape:", y_test.shape)

The cell above contains the usual "magic": it imports certain libraries that allow to use OS functionalities, plotting and most importantly the TensorFlow library.

It also downloads the dataset and we look at the "shape", that is: how many samples, how are they stored.

In [None]:
index = 10
plt.imshow(x_train[index])
print("That's a " + str(y_train[index]))

In [None]:
x_train[index]

Now we have to reshape the data. First, we take some more picture and put them into a special validation set which is not used during training.

Then we have to change the form of the data a little bit, but that's just for technical reasons.

In [None]:
# Further break training data into train / validation sets (# put 5000 into validation set and keep remaining 55,000 for train)
(x_train, x_valid) = x_train[5000:], x_train[:5000] 
(y_train, y_valid) = y_train[5000:], y_train[:5000]

# Reshape input data from (28, 28) to (28, 28, 1)
w, h = 28, 28
x_train = x_train.reshape(x_train.shape[0], w, h, 1)
x_valid = x_valid.reshape(x_valid.shape[0], w, h, 1)
x_test = x_test.reshape(x_test.shape[0], w, h, 1)

# One-hot encode the labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_valid = tf.keras.utils.to_categorical(y_valid, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Print training set shape
print("x_train shape:", x_train.shape, "y_train shape:", y_train.shape)

# Print the number of training, validation, and test datasets
print(x_train.shape[0], 'train set')
print(x_valid.shape[0], 'validation set')
print(x_test.shape[0], 'test set')

Now comes the magic: we build the neural network. It consists of a few convolution layers with maxpool and a couple of dense (aka "normal") layers in between. That's a big network!

In [None]:
model = tf.keras.Sequential()

#add layers here :)
#In the following examples your main task will be to set up the network. 
#I will give a sample, and you can check https://www.tensorflow.org/api_docs/python/tf/layers
#for all the possible layers and ways they can be connected.


model.summary()

We can quickly set up what kind of error ("loss") function we want and what specific optimization we use. ADAM is similar to our gradient descent from before, but it's adaptive! So it changes its stride in a clever way according to the way the error changes.

In [None]:
model.compile(loss='categorical_crossentropy',
              # set optimizer here: optimizer='blabla',
              # And at http://tflearn.org/optimizers you can take a look at all the different optimizers.
             metrics=['accuracy'])

This is also pretty nifty: we use the training and validation data from before to train our model. Caution: each epoch takes about 1.5 minutes! But then we save the best model measured from the accuracy on the validation data.

In [None]:
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='model.weights.best.hdf5', verbose = 0, save_best_only=True)
model.fit(x_train,
         y_train,
         batch_size=32,
         # hier die Anzahl der Durchläufe wählen: epochs=2,
         validation_data=(x_valid, y_valid),
         callbacks=[checkpointer])

We load that best model into memory..

In [None]:
# Load the weights with the best validation accuracy
model.load_weights('model.weights.best.hdf5')

And validate it against the test data (test isn't validation)!

In [None]:
#Evaluate the model on test set
score = model.evaluate(x_test, y_test, verbose=0)

# Print test accuracy
print('\n', 'Test accuracy:', score[1])

Let's look at some random samples and check if the model predicted them correctly.

In [None]:
y_hat = model.predict(x_test)

figure = plt.figure(figsize=(20, 10))
for i, index in enumerate(np.random.choice(x_test.shape[0], size=25, replace=False)):
    ax = figure.add_subplot(5, 5, i + 1, xticks=[], yticks=[])
    
    ax.imshow(np.squeeze(x_test[index]))
    predict_index = np.argmax(y_hat[index])
    true_index = np.argmax(y_test[index])
    
    ax.set_title("{} ({})".format(predict_index, true_index),
                 color=("green" if predict_index == true_index else "red"))

