In [1]:
from __future__          import division
from scipy.stats         import zscore
import matplotlib.pyplot as plt
import numpy             as np
import array
import math
import struct

%matplotlib inline
#%pdb

In [2]:
def read_mnist(images_file, labels_file): 
    f1 = open(labels_file, 'rb')
    magic_number, size = struct.unpack(">II", f1.read(8))
    labels = array.array("b", f1.read())
    f1.close()
    
    f2 = open(images_file, 'rb')
    magic_number, size, rows, cols = struct.unpack(">IIII", f2.read(16))
    raw_images = array.array("B", f2.read())
    f2.close()

    N = len(labels)
    images = np.zeros((N, rows*cols), dtype=np.uint8)
    for i in range(N):
        images[i] = np.array(raw_images[ i*rows*cols : (i+1)*rows*cols ])

    return images, labels

# Read Training data.
TRAIN_IMAGES  = "C:\\Users\\oop\\Desktop\\Winter 2016\\train-images.idx3-ubyte"
TRAIN_LABELS  = "C:\\Users\\oop\\Desktop\\Winter 2016\\train-labels.idx1-ubyte"
images_train, labels_train = read_mnist(TRAIN_IMAGES, TRAIN_LABELS)
#images_train, labels_train = images_train[:20000], labels_train[:20000]

# Read Test data.
TEST_IMAGES = "C:\\Users\\oop\\Desktop\\Winter 2016\\t10k-images.idx3-ubyte"
TEST_LABELS  = "C:\\Users\\oop\\Desktop\\Winter 2016\\t10k-labels.idx1-ubyte"
images_test, labels_test = read_mnist(TEST_IMAGES, TEST_LABELS)
#images_test, labels_test = images_test[:2000], labels_test[:2000]

In [3]:
def sigmoid(x):
    if type(x) is np.ndarray or type(x) is list:
        return np.array([sigmoid(ele) for ele in x])
    else:
        return 1.0 / (1.0 + math.exp(-x))

def sigmoid_derivate(x):
    if type(x) is np.ndarray or type(x) is list:
        return np.array([sigmoid_derivate(ele) for ele in x])
    else:
        a = sigmoid(x)
        return a * (1-a)

In [4]:
X_train = np.matrix(zscore(images_train, axis=1))
Y_train = np.matrix([np.array([1 if i == label else 0 for i in range(10)]) for label in labels_train])
X_test = np.matrix(zscore(images_test, axis=1))
Y_test = np.matrix([np.array([1 if i == label else 0 for i in range(10)]) for label in labels_test])

In [125]:
class MultiLayerNeuralNetwork:
    
    def __init__(self, inputs, outputs, learning_rate, layers, 
                 activation_fn, activation_derivative_fn, 
                 validation_size, *args, **kwargs):
        """
        TODO: Add doc.
        """
        train_data_size = len(inputs) - validation_size
        self.inputs = inputs[:train_data_size]
        self.outputs = outputs[:train_data_size]
        self.cross_validation_inputs = inputs[train_data_size:]
        self.cross_validation_outputs = outputs[train_data_size:]
        self.learning_rate = learning_rate
        self.layers = layers
        self.activation_fn = activation_fn
        self.activation_derivative_fn = activation_derivative_fn
    
    def get_random_weights(self):
        weights = []
        for i in range(len(self.layers)-1):
            weights.append(np.random.random((self.layers[i]+1, 
                                             self.layers[i+1])))
        return weights
    
    def get_gradients(self, weights):
        # Forward propogation.
        
        i = random.randint(0,len(self.inputs)-1)
        X = self.inputs
        Y = self.outputs
        a,z = self.forward_prop(X, Y, weights)
            
        # Backward error propogation.
        deltas = []
        deltas.append((Y - a[-1]))
        for l in reversed(range(1, len(self.layers)-1)):
            g = np.apply_along_axis(self.activation_derivative_fn, axis=1, arr=z[l])
            delta = np.matrix(np.array(np.matmul(deltas[-1], weights[l].transpose()))*np.array(g))
            deltas.append(np.delete(delta, 0, axis=1)) # Note that we remove deltas calculated for bias node.
        deltas.reverse()
            
        gradients = []
        for i in range(len(weights)):
            gradients.append(np.matmul(a[i].transpose(), deltas[i]))
        return gradients
            
    def train(self, weights=None, iterations=100):
        """
        Trains the data using multilayered.
        """
        weights     = weights or self.get_random_weights()
        plot_points = [0,50,100,200,300]
        train_error = []
        test_error  = []
        k = 0
        # On-line Learning.
        for itr in range(0, iterations+1):
            
            if itr != 0:
                # Use gradient descent algorithm to update
                # accordingly due to error derivatives.
                gradients = self.get_gradients(weights)
                for i in range(len(weights)):
                    weights[i] = weights[i] + self.learning_rate * gradients[i]
                    
            if itr in plot_points:
                error = self.test(self.inputs, 
                                  self.outputs,
                                  weights)
                print error
                train_error.append(error)
                error = self.test(self.cross_validation_inputs, 
                                  self.cross_validation_outputs,
                                  weights)
                print error
                test_error.append(error)
                
        self.weights     = weights
        self.train_error = train_error
        self.test_error  = test_error
        
        return weights
        
    def forward_prop(self, inputs, outputs, weights):
        """
        Calculates the output at each layer of the network.
        """
        a = [np.insert(inputs, 0, 1, axis=1)]
        z = [np.insert(inputs, 0, 1, axis=1)]
        for l in range(len(self.layers)-1):
            zl = np.matmul(z[l], weights[l])
            if l == (len(self.layers)-2):
                output = np.exp(zl)
                output = output / output.sum(axis=1)
                a.append(output)
                z.append(zl)
            else:
                z.append(np.insert(zl, 0, 1, axis=1))
                al = np.apply_along_axis(self.activation_fn, axis=1, arr=zl)
                a.append(np.insert(al, 0, 1, axis=1))
        return a,z
    
    def test(self, test_inputs, test_outputs, weights):
        a,z = self.forward_prop(test_inputs, test_outputs, weights)
        predicted_digits = a[-1].argmax(axis=1)
        actual_digits = test_outputs.argmax(axis=1)
        error = (predicted_digits != actual_digits).sum()
        return error * 100 / len(test_inputs)

In [126]:
network = MultiLayerNeuralNetwork(inputs=X_train,
                                  outputs=Y_train,
                                  learning_rate=0.001,
                                  layers=[784,150,10],
                                  activation_fn=sigmoid,
                                  activation_derivative_fn=sigmoid_derivate,
                                  validation_size=10000)

In [127]:
weights = network.train(iterations=300)

87.848
88.09




KeyboardInterrupt: 