# Exercise 7: Code a simple neural network

In this exercise you will code up a simple neurtal network from scratch and test it on the well-known MNIST dataset.

In [1]:
# First import all important libraries for the network class
import numpy as np
# And scipy for the sigmoid function expit()
import scipy as sp
# Import matplotlib so we can look at the data
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

### Write a neural network from scratch

In [3]:
# Define the neural network class
class neuralNetwork:
        
    # Initialise the neural network
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        # Set the number of nodes for each layer
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        
        # Link weight matrices
        # You can either use np.random.rand between -0.5 and 0.5 or np.random.normal with 1/sqrt(Nnodesperlayer)
        self.wih = np.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
        self.who = np.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))

        # learning rate
        self.lr = learningrate
        
        # Activation function
        # Use the sigmod function for this exercise: sp.special.expit
        self.af = lambda x: sp.special.expit(x)
        
        # Nothing to return
        pass

    
    # Now we need a function to train the neural network
    def train(self, inputs_list, targets_list):
        # First convert the inputs list to 2d array
        inputs  = np.array(inputs_list,  ndmin=2).T
        targets = np.array(targets_list, ndmin=2).T
        
        # Propagate signal forward from input to hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        # And get the signal processed by the activation function
        hidden_outputs = self.af(hidden_inputs)
        
        # Propagate signal forward from hidden to output layer
        final_inputs = np.dot(self.who, hidden_outputs)
        # And get the signal processed by the activation function
        final_outputs = self.af(final_inputs)
        
        # Output layer error is the (target - outputs)
        output_errors = targets - final_outputs
        # Backpropagate the error to the hidden layer
        # Hidden layer error is the output_errors, split by weights, recombined at hidden nodes
        hidden_errors = np.dot(self.who.T, output_errors) 
        
        # Update the hidden-output weights
        self.who += self.lr * np.dot((output_errors * final_outputs * (1.0 - final_outputs)), np.transpose(hidden_outputs))
        
        # Update the input-hidden weights
        self.wih += self.lr * np.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), np.transpose(inputs))
        
        # Nothing to return
        pass

    
    # And finally a function to query the neural network
    def query(self, inputs_list):
        # Convert inputs list to 2d array
        inputs = np.array(inputs_list, ndmin=2).T
        
        # And same as above
        # Propagate signal forward from input to hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        # And get the signal processed by the activation function
        hidden_outputs = self.af(hidden_inputs)
        
        # Propagate signal forward from hidden to output layer
        final_inputs = np.dot(self.who, hidden_outputs)
        # And get the signal processed by the activation function
        final_outputs = self.af(final_inputs)
        
        return final_outputs

In [4]:
# Set the number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 10

# learning rate
learning_rate = 0.1

# Create instance of neural network
n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)

### Get the MNIST data

In [5]:
# Get the MNIST dataset from scikit learn
# Make sure you upgrade sklearn to version 0.20 (or higher)
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
# Define the features and the labels
X, y = mnist['data'], np.asfarray(mnist['target'])

In [6]:
print(X.shape,y.shape,X.dtype,y.dtype)

(70000, 784) (70000,) float64 float64


In [None]:
# Define some index, get the corresponding image, and plot it
some_index = 4
some_digit = X[some_index]
some_digit_image = some_digit.reshape(28,28)
plt.imshow(some_digit_image, cmap=cm.jet, interpolation='nearest')
plt.axis('off')
# Check the label
y[some_index]

In [7]:
# Define the training and test set
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
# Shuffle the training set to make sure training goes smooth
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

### Train the Network

In [None]:
# Now we can train the neural network

# Define the number of epochs
epochs = 5

for e in range(epochs):
    # Print the current epoch
    print('Epoch', e)
    # Go through all instances in the training data set
    for i in range(y_train.size):
        # Scale and shift the inputs with a minimum of 0.01 and maximum of 1.0
        inputs = (X_train[i] / 255.0 * 0.99) + 0.01
        # Create the target output values (all 0.01, except the desired label which is 0.99)
        targets = np.zeros(output_nodes) + 0.01
        # The target label for this instance is y_train[i]
        targets[int(y_train[i])] = 0.99
        n.train(inputs, targets)
        pass
    pass

In [None]:
#Let's check the predictions for some examples
# Define some index, get the corresponding image and label
some_index = 111
some_digit_image = X_test[some_index].copy().reshape(28,28)
some_digit   = (X_test[some_index]/ 255.0 * 0.99) + 0.01
target_label = np.int64(y_test[some_index])

# Query the network for the predicted label
network_output  = n.query(some_digit)
predicted_label = np.argmax(network_output)
print('True label:',target_label,' - Predicted label:',predicted_label)

plt.imshow(some_digit_image, cmap=cm.jet, interpolation='nearest')
plt.axis('off')

### Test the model

In [None]:
# test the neural network

# scorecard for how well the network performs, initially empty
scorecard = []

# go through all the records in the test data set
for i in range(y_test.size):
    # correct answer is first value
    correct_label = np.int64(y_test[i])
    # scale and shift the inputs
    inputs = (X_test[i] / 255.0 * 0.99) + 0.01
    # query the network
    outputs = n.query(inputs)
    # the index of the highest value corresponds to the label
    label = np.argmax(outputs)
    # append correct or incorrect to list
    if (label == correct_label):
        # network's answer matches correct answer, add 1 to scorecard
        scorecard.append(1)
    else:
        # network's answer doesn't match correct answer, add 0 to scorecard
        scorecard.append(0)
        pass
    
    pass

In [None]:
# calculate the performance score, the fraction of correct answers
scorecard_array = np.asarray(scorecard)
print ("Performance = ", scorecard_array.sum() / scorecard_array.size)

### And now we try it with Keras (TensorFlow)

In [8]:
# First we need to import TensorFlow
# You can install it via 'pip install --upgrade tensorflow'
import tensorflow as tf
# print(tf.__version__)

In [11]:
# Define a simple MLP with Keras (Lego-Style)
model = tf.keras.models.Sequential()
# Add an input layer - a Dense layer is basically a Perceptron
#model.add(tf.keras.layers.Dense(input_nodes, input_shape=(input_nodes,)))
# Add first hidden layer 
model.add(tf.keras.layers.Dense(hidden_nodes, activation='sigmoid', input_shape=(input_nodes,)))
# Add an output layer 
model.add(tf.keras.layers.Dense(output_nodes, activation='sigmoid'))
# Get some information about the model
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 200)               157000    
_________________________________________________________________
dense_4 (Dense)              (None, 10)                2010      
Total params: 159,010
Trainable params: 159,010
Non-trainable params: 0
_________________________________________________________________


In [19]:
sgd  = tf.keras.optimizers.SGD(lr=learning_rate)
model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])

In [16]:
# Transform label into array of categories using one-hot encoding
y_train_cat = tf.keras.utils.to_categorical(y_train, output_nodes)
y_test_cat  = tf.keras.utils.to_categorical(y_test,  output_nodes)

In [20]:
model.fit(X_train, y_train_cat, epochs=5, batch_size=10, verbose=1)

Train on 60000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f8a613cfc50>

In [21]:
score = model.evaluate(X_test, y_test_cat,verbose=1)
print(score[1])

0.9375


In [22]:
# Compute the network prediction for the test sample
network_output  = model.predict(X_test)

In [23]:
print(X_test.shape,network_output.shape)

(10000, 784) (10000, 10)


In [24]:
# Let's check the predictions for some examples
# Define some index, get the corresponding label
some_index = 540
target_label = np.int64(y_test[some_index])
# Query the network for the predicted label
predicted_label = np.argmax(network_output[some_index])
print('True label:',target_label,' - Predicted label:',predicted_label)

True label: 5  - Predicted label: 5


### Some more things to do

Test if you can get a better performance. Some things to vary:

* Change the network architecture, i.e. number of hidden layers and noden in the hidden layers
* Change the activation function. See: https://keras.io/activations/
* Change the optimizer. See: https://keras.io/optimizers/
* Change the loss function (e.g. try categorical_crossentropy). See: https://keras.io/losses/
* Change the number of epochs and the batch size