In [1]:
from __future__          import division
from scipy.stats         import zscore
import matplotlib.pyplot as plt
import numpy             as np
import array
import math
import struct

%matplotlib inline
%pdb

Automatic pdb calling has been turned ON


In [2]:
def read_mnist(images_file, labels_file): 
    f1 = open(labels_file, 'rb')
    magic_number, size = struct.unpack(">II", f1.read(8))
    labels = array.array("b", f1.read())
    f1.close()
    
    f2 = open(images_file, 'rb')
    magic_number, size, rows, cols = struct.unpack(">IIII", f2.read(16))
    raw_images = array.array("B", f2.read())
    f2.close()

    N = len(labels)
    images = np.zeros((N, rows*cols), dtype=np.uint8)
    for i in range(N):
        images[i] = np.array(raw_images[ i*rows*cols : (i+1)*rows*cols ])

    return images, labels

# Read Training data.
TRAIN_IMAGES  = "C:\\Users\\oop\\Desktop\\Winter 2016\\train-images.idx3-ubyte"
TRAIN_LABELS  = "C:\\Users\\oop\\Desktop\\Winter 2016\\train-labels.idx1-ubyte"
images_train, labels_train = read_mnist(TRAIN_IMAGES, TRAIN_LABELS)
#images_train, labels_train = images_train[:20000], labels_train[:20000]

# Read Test data.
TEST_IMAGES = "C:\\Users\\oop\\Desktop\\Winter 2016\\t10k-images.idx3-ubyte"
TEST_LABELS  = "C:\\Users\\oop\\Desktop\\Winter 2016\\t10k-labels.idx1-ubyte"
images_test, labels_test = read_mnist(TEST_IMAGES, TEST_LABELS)
#images_test, labels_test = images_test[:2000], labels_test[:2000]

In [3]:
def sigmoid(x):
    if type(x) is np.ndarray or type(x) is list:
        return np.array([sigmoid(ele) for ele in x])
    else:
        return 1.0 / (1.0 + math.exp(-x))

def sigmoid_derivate(x):
    if type(x) is np.ndarray or type(x) is list:
        return np.array([sigmoid_derivate(ele) for ele in x])
    else:
        a = sigmoid(x)
        return a * (1-a)

In [4]:
X = zscore(images_train, axis=1)
Y = [np.array([1 if i == label else 0 for i in range(10)]) for label in labels_train]
X_test = zscore(images_test, axis=1)
Y_test = [np.array([1 if i == label else 0 for i in range(10)]) for label in labels_test]

In [25]:
class MultiLayerNeuralNetwork:
    
    def __init__(self, inputs, outputs, learning_rate, layers, 
                 activation_fn, activation_derivative_fn, *args, **kwargs):
        """
        TODO: Add doc.
        """
        self.inputs = inputs
        self.outputs = outputs
        self.learning_rate = learning_rate
        self.layers = layers
        self.activation_fn = activation_fn
        self.activation_derivative_fn = activation_derivative_fn
    
    def get_random_weights(self):
        weights = []
        for i in range(len(self.layers)-1):
            weights.append(np.random.random((self.layers[i]+1, 
                                             self.layers[i+1])))
        return weights
    
    def get_gradients(self, X, Y, weights):
        # Forward propogation.
        a,z = self.get_network_output(X, weights)
        
        # Backward error propogation.
        deltas = []
        deltas.append((a[-1] - Y))
        for l in reversed(range(1, len(self.layers)-1)):
            deltas.append(np.dot(weights[l], deltas[-1])*self.activation_derivative_fn(z[l]))
        deltas.reverse()
        
        gradients = []
        for i in range(len(weights)):
            if i != (len(weights)-1):
                deltas[i] = deltas[i][1:]
            gradients.append(np.dot(np.atleast_2d(a[i]).transpose(), 
                                    np.atleast_2d(deltas[i])))
        return gradients
            
    def train(self):
        """
        Trains the data using multilayered.
        """
        weights = self.get_random_weights()
        
        # 5 iterations.
        for i in range(100):
            for i in range(len(self.inputs)):
                X = np.insert(self.inputs[i], 0, 1)
                Y = self.outputs[i]
                gradients = self.get_gradients(X,Y,weights)
                # Use gradient descent algorithm to update
                # accordingly due to error derivates.
                for i in range(len(weights)):
                    weights[i] = weights[i] - self.learning_rate * gradients[i]

            print self.test(self.inputs, self.outputs, weights)
        
        self.weights = weights
        return weights
        
    def get_network_output(self, X, weights):
        """
        Calculates the output at each layer of the network.
        """
        a = [X]
        z = [X]
        for l in range(len(self.layers)-1):
            zl = np.dot(weights[l].transpose(), z[l])
            if l == (len(self.layers)-2):
                output = np.vectorize(math.exp)(zl)
                output = output / output.sum()
                a.append(output)
                z.append(zl)
            else:
                z.append(np.insert(zl,0,1))
                a.append(np.insert(self.activation_fn(zl),0,1))
        return a,z
    
    def cross_entropy(self, weights):
        entropy = 0
        for i in range(len(self.inputs)):
            a,z = self.get_network_output(np.insert(self.inputs[i], 0, 1), 
                                          weights)
            y = a[-1]
            t = self.outputs[i]
            entropy += np.dot(t, np.vectorize(math.log)(y))
        return -entropy
    
    def test(self, test_input, test_output, weights):
        error = 0
        for i in range(len(test_input)):
            X = np.insert(test_input[i], 0, 1)
            T = test_output[i]
            a,z = self.get_network_output(X, weights)
            predicted_digit = a[-1].argmax()
            if T[predicted_digit] != 1:
                error += 1
        print "Error is %.2f" %(error*100/len(test_input))

In [28]:
network = MultiLayerNeuralNetwork(inputs=X[:20000],
                                  outputs=Y[:20000],
                                  learning_rate=0.001,
                                  layers=[784,100,10],
                                  activation_fn=sigmoid,
                                  activation_derivative_fn=sigmoid_derivate)

In [29]:
weights = network.train()

Error is 83.50
None
Error is 68.10
None
Error is 54.20
None
Error is 45.20
None
Error is 40.20
None
Error is 37.90
None
Error is 35.40
None
Error is 33.10
None
Error is 31.30
None
Error is 29.80
None
Error is 27.80
None
Error is 26.10
None
Error is 25.30
None
Error is 24.90
None
Error is 23.40
None
Error is 23.00
None
Error is 22.40
None
Error is 21.50
None
Error is 21.00
None
Error is 20.50
None
Error is 20.10
None
Error is 19.70
None
Error is 19.30
None
Error is 19.00
None
Error is 19.00
None
Error is 18.10
None
Error is 17.30
None
Error is 16.80
None
Error is 16.40
None
Error is 16.40
None
Error is 16.00
None
Error is 15.60
None
Error is 15.10
None
Error is 14.80
None
Error is 14.60
None
Error is 14.40
None
Error is 13.90
None
Error is 13.80
None
Error is 13.50
None
Error is 13.40
None
Error is 13.30
None
Error is 12.90
None
Error is 12.60
None
Error is 12.20
None
Error is 12.20
None
Error is 12.00
None
Error is 11.90
None
Error is 11.70
None
Error is 11.60
None
Error is 11.50
None


In [30]:
network.test(X_test[:100], Y_test[:100], weights)

Error is 32.00
