In [1]:
from __future__          import division
from scipy.stats         import zscore
import matplotlib.pyplot as plt
import numpy             as np
import array
import math
import struct

%matplotlib inline
%pdb

Automatic pdb calling has been turned ON


In [2]:
def read_mnist(images_file, labels_file): 
    f1 = open(labels_file, 'rb')
    magic_number, size = struct.unpack(">II", f1.read(8))
    labels = array.array("b", f1.read())
    f1.close()
    
    f2 = open(images_file, 'rb')
    magic_number, size, rows, cols = struct.unpack(">IIII", f2.read(16))
    raw_images = array.array("B", f2.read())
    f2.close()

    N = len(labels)
    images = np.zeros((N, rows*cols), dtype=np.uint8)
    for i in range(N):
        images[i] = np.array(raw_images[ i*rows*cols : (i+1)*rows*cols ])

    return images, labels

# Read Training data.
TRAIN_IMAGES  = "C:\\Users\\oop\\Desktop\\Winter 2016\\train-images.idx3-ubyte"
TRAIN_LABELS  = "C:\\Users\\oop\\Desktop\\Winter 2016\\train-labels.idx1-ubyte"
images_train, labels_train = read_mnist(TRAIN_IMAGES, TRAIN_LABELS)
#images_train, labels_train = images_train[:20000], labels_train[:20000]

# Read Test data.
TEST_IMAGES = "C:\\Users\\oop\\Desktop\\Winter 2016\\t10k-images.idx3-ubyte"
TEST_LABELS  = "C:\\Users\\oop\\Desktop\\Winter 2016\\t10k-labels.idx1-ubyte"
images_test, labels_test = read_mnist(TEST_IMAGES, TEST_LABELS)
#images_test, labels_test = images_test[:2000], labels_test[:2000]

In [3]:
def sigmoid(x):
    if type(x) is np.ndarray or type(x) is list:
        return np.array([sigmoid(ele) for ele in x])
    else:
        return 1.0 / (1.0 + math.exp(-x))

def sigmoid_derivate(x):
    if type(x) is np.ndarray or type(x) is list:
        return np.array([sigmoid_derivate(ele) for ele in x])
    else:
        a = sigmoid(x)
        return a * (1-a)

In [4]:
X = zscore(images_train, axis=1)
Y = [[1 if i == label else 0 for i in range(10)] for label in labels_train]
X_test = zscore(images_test, axis=1)
Y_test = [[1 if i == label else 0 for i in range(10)] for label in labels_test]

In [13]:
class MultiLayerNeuralNetwork:
    
    def __init__(self, inputs, outputs, learning_rate, hidden_layers, 
                 activation_fn, activation_derivative_fn, *args, **kwargs):
        """
        TODO: Add doc.
        """
        self.inputs = inputs
        self.outputs = outputs
        self.learning_rate = learning_rate
        self.hidden_layers = hidden_layers
        self.activation_fn = activation_fn
        self.activation_derivative_fn = activation_derivative_fn
    
    def get_random_weights(self):
        weights = []
        # Input layer to first hidden layer.
        weights.append(np.random.random((len(self.inputs[0])+1, self.hidden_layers[0])))
        # Add weights between hidden layer.
        for i in range(0, len(self.hidden_layers)-1):
            weights.append(np.random.random((self.hidden_layers[i]+1, self.hidden_layers[i+1])))
        # Add weights between last hidden layer and output layer.
        weights.append(np.random.random((self.hidden_layers[-1]+1, len(self.outputs[0]))))
        return weights
    
    def get_gradients(self, X, Y, weights):
        # Forward propogation.
        a,z = self.get_network_output(X, weights)
        # Backward error propogation.
        deltas = []
        deltas.append((a[-1] - Y)) #*self.activation_derivative_fn(z[-1]))
        for l in reversed(range(1, len(self.hidden_layers)+1)):
            deltas.append(np.dot(weights[l], deltas[-1])*self.activation_derivative_fn(z[l]))
        deltas.reverse()
        
        gradients = []
        for i in range(len(weights)):
            if i != (len(weights)-1):
                deltas[i] = deltas[i][1:]
            gradients.append(np.matmul(np.matrix(a[i]).transpose(), np.matrix(deltas[i])))
        return gradients
    
    def test_gradient(self):
        weights = self.get_random_weights()
        
        # One train loop.
        gradients = []
        for i in range(len(weights)):
            gradients.append(np.zeros(weights[i].shape))
            
        for i in range(len(self.inputs)):
            X = np.insert(self.inputs[i], 0, 1)
            Y = self.outputs[i]
            batch_gradients = self.get_gradients(X,Y,weights)
            for i in range(len(gradients)):
                gradients[i] += batch_gradients[i]
        
        epsilon = 2*10**-5
        for l in range(len(self.hidden_layers)+1):
            for i in range(len(weights[l])):
                for j in range(len(weights[l][i])):
                    w = weights[l][i][j]
                    weights[l][i][j] = w + epsilon
                    val1 = self.cross_entropy(weights)
                    weights[l][i][j] = w - epsilon
                    val2 = self.cross_entropy(weights)
                    weights[l][i][j] = w
                    print (val1 - val2) / (2*epsilon)
                    print (gradients[l][i][j])
            
    def train(self):
        """
        Trains the data using multilayered.
        """
        
        weights = self.get_random_weights()
        print self.cross_entropy(weights)
        
        # 5 iterations.
        for i in range(50):
            gradients = []
            for i in range(len(weights)):
                gradients.append(np.zeros(weights[i].shape))

            for i in range(len(self.inputs)):
                X = np.insert(self.inputs[i], 0, 1)
                Y = self.outputs[i]
                batch_gradients = self.get_gradients(X,Y,weights)
                for i in range(len(gradients)):
                    if weights[i].shape != batch_gradients[i].shape:
                        raise Exception("FUCK YIU")
                        
                    weights[i] += batch_gradients[i]
            
            # Use gradient descent algorithm to update
            # accordingly due to error derivates.
            #for i in range(len(weights)):
            #    weights[i] = weights[i] + self.learning_rate * gradients[i]

        print self.cross_entropy(weights)
        
        self.weights = weights
        return weights
        
    def get_network_output(self, X, weights):
        """
        Calculates the output at each layer of the network.
        """
        a = [X]
        z = [X]
        for l in range(len(self.hidden_layers)+1):
            zl = np.dot(weights[l].transpose(), z[l])
            if l == len(self.hidden_layers):
                # Softmax output function.
                output = np.vectorize(math.exp)(zl)
                output = output / output.sum()
                a.append(output)
                z.append(zl)
            else:
                z.append(np.insert(zl,0,1))
                al = self.activation_fn(zl)
                # Add bias.
                a.append(np.insert(al,0,1))
        return a,z
    
    def cross_entropy(self, weights):
        entropy = 0
        for i in range(len(self.inputs)):
            a,z = self.get_network_output(np.insert(self.inputs[i], 0, 1), weights)
            y = a[-1]
            t = self.outputs[i]
            entropy = np.dot(t, np.vectorize(math.log)(y))
        return -entropy
    
    def test(self, test_input, test_output):
        weights = self.weights
        error = 0
        for i in range(len(test_input)):
            X = np.insert(test_input[i], 0, 1)
            T = test_output[i]
            a,z = self.get_network_output(X, weights)
            predicted_digit = a[-1].argmax()
            if T[predicted_digit] != 1:
                error += 1
        print "Error is %.2f" %(error*100/len(test_input))

In [14]:
network = MultiLayerNeuralNetwork(inputs=X[:2],
                                  outputs=Y[:2],
                                  learning_rate=0.0001,
                                  hidden_layers=[2],
                                  activation_fn=sigmoid,
                                  activation_derivative_fn=sigmoid_derivate)

In [16]:
weights = network.train()

8.37609340408


OverflowError: math range error

> [1;32mc:\anaconda\lib\site-packages\numpy\lib\function_base.py[0m(1880)[0;36m_vectorize_call[1;34m()[0m
[1;32m   1879 [1;33m[1;33m[0m[0m
[0m[1;32m-> 1880 [1;33m            [0moutputs[0m [1;33m=[0m [0mufunc[0m[1;33m([0m[1;33m*[0m[0minputs[0m[1;33m)[0m[1;33m[0m[0m
[0m[1;32m   1881 [1;33m[1;33m[0m[0m
[0m
ipdb> q


In [8]:
network.test(X_test[:1000], Y_test[:1000])

Error is 88.30


In [187]:
network.get_network_output(X_test[:1][0], network.weights)[0][-1].argmax()

3

In [209]:
(2*np.random.random((3,1))-1).mean()

0.25727236773054124

In [210]:
aa = np.array([1,2])

In [216]:
np.insert(aa, 0, 1)

array([1, 1, 2])

In [242]:
aa[1:]

array([2])

<__main__.MultiLayerNeuralNetwork instance at 0x0000000019F397C8>