In [11]:
from __future__          import division
from scipy.stats         import zscore
import matplotlib.pyplot as plt
import numpy             as np
import array
import math
import struct

%matplotlib inline
%pdb

Automatic pdb calling has been turned OFF


In [12]:
def read_mnist(images_file, labels_file): 
    f1 = open(labels_file, 'rb')
    magic_number, size = struct.unpack(">II", f1.read(8))
    labels = array.array("b", f1.read())
    f1.close()
    
    f2 = open(images_file, 'rb')
    magic_number, size, rows, cols = struct.unpack(">IIII", f2.read(16))
    raw_images = array.array("B", f2.read())
    f2.close()

    N = len(labels)
    images = np.zeros((N, rows*cols), dtype=np.uint8)
    for i in range(N):
        images[i] = np.array(raw_images[ i*rows*cols : (i+1)*rows*cols ])

    return images, labels

# Read Training data.
TRAIN_IMAGES  = "C:\\Users\\oop\\Desktop\\Winter 2016\\train-images.idx3-ubyte"
TRAIN_LABELS  = "C:\\Users\\oop\\Desktop\\Winter 2016\\train-labels.idx1-ubyte"
images_train, labels_train = read_mnist(TRAIN_IMAGES, TRAIN_LABELS)
#images_train, labels_train = images_train[:20000], labels_train[:20000]

# Read Test data.
TEST_IMAGES = "C:\\Users\\oop\\Desktop\\Winter 2016\\t10k-images.idx3-ubyte"
TEST_LABELS  = "C:\\Users\\oop\\Desktop\\Winter 2016\\t10k-labels.idx1-ubyte"
images_test, labels_test = read_mnist(TEST_IMAGES, TEST_LABELS)
#images_test, labels_test = images_test[:2000], labels_test[:2000]

In [13]:
def sigmoid(x):

    if type(x) is np.ndarray or type(x) is list:
        return np.array([sigmoid(ele) for ele in x])
    else:
        return 1.0 / (1.0 + math.exp(-x))

def sigmoid_derivate(x):
    if type(x) is np.ndarray or type(x) is list:
        return np.array([sigmoid_derivate(ele) for ele in x])
    else:
        a = sigmoid(x)
        return a * (1-a)

In [14]:
X = zscore(images_train, axis=1)
Y = [np.array([1 if i == label else 0 for i in range(10)]) for label in labels_train]
X_test = zscore(images_test, axis=1)
Y_test = [np.array([1 if i == label else 0 for i in range(10)]) for label in labels_test]

In [128]:
class MultiLayerNeuralNetwork:
    
    def __init__(self, inputs, outputs, learning_rate, layers, 
                 activation_fn, activation_derivative_fn, *args, **kwargs):
        """
        TODO: Add doc.
        """
        self.inputs = inputs
        self.outputs = outputs
        self.learning_rate = learning_rate
        self.layers = layers
        self.activation_fn = activation_fn
        self.activation_derivative_fn = activation_derivative_fn
    
    def get_random_weights(self):
        weights = []
        for i in range(len(self.layers)-1):
            weights.append(np.random.random((self.layers[i]+1, self.layers[i+1])))
        return weights
    
    def get_gradients(self, X, Y, weights):
        # Forward propogation.
        a,z = self.get_network_output(X, weights)
        # Backward error propogation.
        deltas = []
        #print a[-1], Y, z[-1]
        deltas.append((a[-1] - Y)*self.activation_derivative_fn(z[-1]))
        for l in reversed(range(1, len(self.layers)-1)):
            deltas.append(np.dot(weights[l], deltas[-1])*self.activation_derivative_fn(z[l]))
        deltas.reverse()
        
        gradients = []
        for i in range(len(weights)):
            if i != (len(weights)-1):
                deltas[i] = deltas[i][1:]
            gradients.append(np.dot(np.atleast_2d(a[i]).transpose(), np.atleast_2d(deltas[i])))
        return gradients
            
    def train(self):
        """
        Trains the data using multilayered.
        """
        weights = self.get_random_weights()
        #self.cross_entropy(weights)
        
        # 5 iterations.
        for i in range(10):
            for i in range(len(self.inputs)):
                X = np.insert(self.inputs[i], 0, 1)
                Y = self.outputs[i]
                gradients = self.get_gradients(X,Y,weights)
                # Use gradient descent algorithm to update
                # accordingly due to error derivates.
                for i in range(len(weights)):
                    weights[i] = weights[i] - self.learning_rate * gradients[i]

            #self.cross_entropy(weights)
            self.testXOR(self.inputs, self.outputs, weights)
        
        self.weights = weights
        return weights
        
    def get_network_output(self, X, weights):
        """
        Calculates the output at each layer of the network.
        """
        #print "----"
        #print weights[0].shape, weights[1].shape
        a = [X]
        z = [X]
        for l in range(len(self.layers)-1):
            zl = np.dot(weights[l].transpose(), z[l])
            if l == (len(self.layers)-2):
                if self.layers[-1] == 1:
                    a.append(self.activation_fn(zl))
                    z.append(zl)
                else:
                    # Softmax output function.
                    output = np.vectorize(math.exp)(zl)
                    output = output / output.sum()
                    a.append(output)
                    z.append(zl)
                    #print zl.shape, zl
            else:
                z.append(np.insert(zl,0,1))
                al = self.activation_fn(zl)
                #print zl.shape, zl
                # Add bias.
                a.append(np.insert(al,0,1))
        return a,z
    
    def cross_entropy(self, weights):
        entropy = 0
        for i in range(len(self.inputs)):
            a,z = self.get_network_output(np.insert(self.inputs[i], 0, 1), weights)
            y = a[-1]
            t = self.outputs[i]
            entropy += np.dot(t, np.vectorize(math.log)(y))
        return -entropy
    
    def test(self, test_input, test_output):
        weights = self.weights
        error = 0
        for i in range(len(test_input)):
            X = np.insert(test_input[i], 0, 1)
            T = test_output[i]
            a,z = self.get_network_output(X, weights)
            predicted_digit = a[-1].argmax()
            if T[predicted_digit] != 1:
                error += 1
        print "Error is %.2f" %(error*100/len(test_input))
    
    def testXOR(self, test_input, test_output, weights):
        error = 0
        for i in range(len(test_input)):
            X = np.insert(test_input[i], 0, 1)
            T = test_output[i]
            a,z = self.get_network_output(X, weights)
            print a[-1][0], T
        print "--------"

In [129]:
X1 = np.array([np.array([0,0]), 
              np.array([0,1]), 
              np.array([1,0]), 
              np.array([1,1])
             ])
Y1 = np.array([0,1,1,0])
network = MultiLayerNeuralNetwork(inputs=X1,
                                  outputs=Y1,
                                  learning_rate=0.2,
                                  layers=[2,2,1],
                                  activation_fn=sigmoid,
                                  activation_derivative_fn=sigmoid_derivate
                                 )
weights = network.train()

0.813007915989 0
0.829031395016 1
0.852279713226 1
0.865494713568 0
--------
0.802818050287 0
0.818014059789 1
0.840420623852 1
0.853248475105 0
--------
0.792083814376 0
0.806317372219 1
0.827635501093 1
0.839927105443 0
--------
0.780841497057 0
0.79397559996 1
0.813942226182 1
0.825535836107 0
--------
0.769146610138 0
0.781046610722 1
0.799390016642 1
0.810115910076 0
--------
0.75707471911 0
0.767613398594 1
0.784063239282 1
0.793750060622 0
--------
0.744720648434 0
0.753783622085 1
0.768082629041 1
0.77656504271 0
--------
0.732195763708 0
0.739686719194 1
0.751603041606 1
0.758730020856 0
--------
0.719623327187 0
0.725468528459 1
0.734807353505 1
0.740450175272 0
--------
0.707132294989 0
0.711283821811 1
0.717896847486 1
0.721955781316 0
--------


In [80]:
a,z = network.get_network_output(np.insert(X1[0], 0 , 1) , network.weights)

In [81]:
math

[array([1, 0, 0]), array([ 1.        ,  0.99621497,  0.99636434]), array([ 1.])]


In [83]:
weights[0].transpose()

array([[ 5.57290832,  5.61332277],
       [ 1.95687501,  1.89989565],
       [ 1.98279642,  1.78634371]])

In [66]:
weights = network.train()

2.14302260418
2.15789843117


In [67]:
network.test(X_test[:1000], Y_test[:1000])

Error is 91.30


In [101]:
math.floor(0.920000742938)

0.0

In [6]:
np.dot(np.array([1,2]).transpose(), np.array([2]))

ValueError: shapes (2,) and (1,) not aligned: 2 (dim 0) != 1 (dim 0)

> [1;32m<ipython-input-6-da988d917687>[0m(1)[0;36m<module>[1;34m()[0m
[1;32m----> 1 [1;33m[0mnp[0m[1;33m.[0m[0mdot[0m[1;33m([0m[0mnp[0m[1;33m.[0m[0marray[0m[1;33m([0m[1;33m[[0m[1;36m1[0m[1;33m,[0m[1;36m2[0m[1;33m][0m[1;33m)[0m[1;33m.[0m[0mtranspose[0m[1;33m([0m[1;33m)[0m[1;33m,[0m [0mnp[0m[1;33m.[0m[0marray[0m[1;33m([0m[1;33m[[0m[1;36m2[0m[1;33m][0m[1;33m)[0m[1;33m)[0m[1;33m[0m[0m
[0m
ipdb> q
