This notebook reuses our self-made neural net from [a previous notebook][1] to train it on a more realistic (larger) dataset of digits and splitting the dataset into a training and a test sample to obtain an independent measurement of the accuracy on a dataset that the neural network has not seen before.

[1]: NN_from_scratch_digits.py

First we define the functions that stay unchanged.

In [None]:
import math, random
import matplotlib
import matplotlib.pyplot as plt

In [None]:
def dot(v, w):
    """v_1 * w_1 + ... + v_n * w_n"""
    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def sigmoid(t):
    return 1 / (1 + math.exp(-t))

def argmax(l):
    return l.index(max(l))

def reshape(array, xs, ys):
    if len(array) != xs*ys:
        raise ValueError("Wrong size in reshape")
    return [array[row:(row+xs)]
            for row in range(0, len(array), xs)] 

In [None]:
def neuron_output(weights, inputs):
    return sigmoid(dot(weights, inputs))

def feed_forward(neural_network, input_vector):
    """takes in a neural network (represented as a list of lists of lists of weights)
    and returns the output from forward-propagating the input"""

    outputs = []

    for layer in neural_network:

        input_with_bias = input_vector + [1]             # add a bias input (always 1)
        output = [neuron_output(neuron, input_with_bias) # compute the output
                  for neuron in layer]                   # for this layer
        outputs.append(output)                           # and remember it

        # the input to the next layer is the output of this one
        input_vector = output

    # outputs = two arrays (one array of size 4 for the hidden layer plus one array of size 10 for the output layer)
    return outputs 

def predict(network, input):
    """run input through the network and return output of last layer"""
    return feed_forward(network, input)[-1]

Also the backpropagation function is mostly unchanged but we introduce a learning-rate parameter (`rate`), which is just a multiplicative factor for the adjustments we make to the weights in each call:

In [None]:
def backpropagate(network, input_vector, target, rate = 1.0):

    hidden_outputs, outputs = feed_forward(network, input_vector)

    # compute the delta (error term) of the output layer
    output_deltas = [output * (1 - output) * (output - target[i]) # (1)
                     for i, output in enumerate(outputs)]

    # back-propagate errors to hidden layer: compute the delta (error term) of the hidden layer
    hidden_deltas = [hidden_output * (1 - hidden_output) *
                     dot(output_deltas, [n[i] for n in network[-1]]) # (2)
                     for i, hidden_output in enumerate(hidden_outputs)]
    
    # adjust weights for output layer (network[-1])
    for i, output_neuron in enumerate(network[-1]): # loop over weights of neurons in output layer
        for j, hidden_output in enumerate(hidden_outputs + [1]): # loop over output of neurons in hidden layer + bias
            output_neuron[j] -= output_deltas[i] * hidden_output * rate # (3)

    # adjust weights for hidden layer (network[0])
    for i, hidden_neuron in enumerate(network[0]): # loop over weights of neurons in hidden layer
        for j, input in enumerate(input_vector + [1]): # loop over output of neurons in first layer, i.e. the inputs + bias
            hidden_neuron[j] -= hidden_deltas[i] * input* rate

# Now with the digits dataset

We import the digits dataset from sklearn that [we have used before][1].

[1]: ScilearnIntro.ipynb

In [None]:
#import numpy as np
from sklearn.datasets import load_digits
digits = load_digits()
digits.images.shape

The next steps consist in preprocessing the data to have it in a compatible format for our network:

In [None]:
# normalize images and flatten -- note we're normalizing to a mean value << 1
images = [list(image.flatten()/16/8) for image in digits.images]

In [None]:
#np.mean(images)

In [None]:
# convert target to one-hot encoding
numbers = [[1 if i == j else 0 for i in range(10)]
            for j in digits.target]

In [None]:
fig, axes = plt.subplots(figsize=(20, 8), ncols=10, nrows=5)
idx = 0
for g in axes:
    for ax in g:
        if idx >= len(images): break
        figure = images[idx]
        ax.imshow(reshape(figure, 8, 8))
        idx += 1

Here we define our network. The size of the digits is 8*8 pixels. We use 10 neurons in the hidden layer:

In [None]:
random.seed(0)    # to get reproducible results
input_size  = 64  # each input is a vector of 64 pixels
num_hidden  = 10  # number of neurons in the hidden layer
output_size = 10  # we need 10 outputs for each input

# each hidden neuron has one weight per input, plus a bias weight
hidden_layer = [[random.random() for __ in range(input_size + 1)]
                for __ in range(num_hidden)]

# each output neuron has one weight per hidden neuron, plus a bias weight
output_layer = [[random.random() for __ in range(num_hidden + 1)]
                for __ in range(output_size)]

# the network starts out with random weights
network = [hidden_layer, output_layer]

Another helper function computes the accuracy:

In [None]:
# compute accuracy
def accuracy(network, X, y):
    total = float(len(y))
    correct = sum([argmax(predict(network, input)) == argmax(y[idx]) for idx, input in enumerate(X)])
    return correct / total

Then we can start the training of the neural network. We will manually adjust the learning rate, starting with 1 and decreasing it later on. The `frac` variable determines which fraction of the digits dataset is used in each iteration.

In [None]:
train_frac    = 0.7
len_train     = int(len(images) * train_frac)
images_train  = images[:len_train]
images_test   = images[len_train:]
numbers_train = numbers[:len_train] 
numbers_test  = numbers[len_train:]

Sizes of the training, test and total sample of images:

In [None]:
len(images_train), len(images_test), len(images)

Run the training and print the accuracy both on the training and test sample:

In [None]:
learn_rate  = 1  # learning rate
batch_size  = 50 # fraction of sample to use in each round
num_epochs  = 10 # number of epochs to run

one_epoch = math.ceil(len(images_train) / batch_size) # roughly
for x in range(one_epoch * num_epochs):
    # pick a subsample
    train_on = random.sample(list(zip(images_train, numbers_train)), batch_size)
    for input_vector, target_vector in train_on:
        backpropagate(network, input_vector, target_vector, learn_rate) # 65 µs/loop (4 hidden), 134 µs/loop (10 hidden), 380 µs/loop (32 hidden)
    if x % one_epoch == 0:
        print("Batches processed: %d, accuracy (train): %.3f, accuracy (test): %.3f" % 
              (x,
               accuracy(network, images_train, numbers_train),
               accuracy(network, images_test , numbers_test),
              ))

These functions show the weights of the neurons in the hidden layer as before:

In [None]:
def patch(x, y, hatch, color):
    """return a matplotlib 'patch' object with the specified
    location, crosshatch pattern, and color"""
    return matplotlib.patches.Rectangle((x - 0.5, y - 0.5), 1, 1,
                                        hatch=hatch, fill=False, color=color)


def show_weights(neuron_idx, ax):
    weights = network[0][neuron_idx]

    grid = [weights[row:(row+8)]      # turn the weights into a 5x5 grid
            for row in range(0,64,8)] # [weights[0:5], ..., weights[20:25]]

    pos = ax.imshow(grid,
                    cmap=matplotlib.cm.coolwarm,
                    interpolation='none', # plot blocks as blocks
                    vmin = -8, vmax = 8) # define a unique range for all subplots
    
    # print bias
    ax.set_xlabel("bias = %.2f" % weights[25])
    return pos

fig, ax = plt.subplots(figsize=(15, 3), ncols=num_hidden)
for idx in range(num_hidden):
    pos = show_weights(idx, ax[idx])
    #fig.colorbar(pos, ax = ax[0])


In [None]:
plt.imshow(output_layer, cmap=matplotlib.cm.coolwarm)
plt.xlabel("Weight of hidden neuron (and bias)")
plt.ylabel("Output label");

At this point you can continue the training by rerunning the cells above a few times and play with the `rate` and `frac` parameters.

Test the network for an image:

In [None]:
idx = 42
plt.imshow(reshape(images_test[idx], 8, 8))
probs = predict(network, images_test[idx])
print("Probabilities:", probs)
predicted_label = argmax(probs)
true_label      = argmax(numbers_test[idx])
print("i.e. prediction: %d (%s)" % (argmax(probs), predicted_label == true_label))
if predicted_label != true_label:
    print("     true label: %d" % true_label)

(I'd have a hard time to recognize this as a "2" but apparently both the network and the target label agree on this.)