**Python Version   : 2.7.11** <br>
**Ipython Version  : 4.0.1**

In [125]:
import os
import six
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [126]:
# forward and backward functions for linar, softmax and cross-entropy nodes. 

def linear_forward(x, W, b):
    return np.dot(x, W.T) + b

def linear_backward(dz, x, W, b):
    dx = np.dot(dz, W)
    dW = np.dot(dz.T, x)
    db = dz.sum(axis=0)
    return dx, dW, db

def cross_entropy_loss_forward(p, y):
    log_prob = np.log(p[np.arange(len(y)), y])
    return -log_prob.mean()

def cross_entropy_loss_backward(p, y):
    dlog_softmax = np.zeros_like(p)
    dlog_softmax[np.arange(len(y)), y] \
        -= 1.0/len(y)
    return dlog_softmax / p

def softmax_forward(z):
    zmax = z.max(axis=1,keepdims=True)
    expz = np.exp(z-zmax)
    Z = expz.sum(axis=1,keepdims=True)
    return expz / Z

def softmax_backward(dp, z):
    p = softmax_forward(z)
    pdp = p * dp
    return pdp - p * pdp.sum(axis=1, keepdims=True)

In [127]:
def load_mnist_data(path):
    mnist_pickle = open(path, 'rb')
    mnist =  six.moves.cPickle.load(mnist_pickle)
    
    num_train = 60000
    num_test = 10000

    mnist['data'] = mnist['data'].astype(np.float32)
    mnist['data'] /= 255
    mnist['data'] = mnist['data'].reshape(mnist['data'].shape[0], 1, 28, 28)
    mnist['target'] = mnist['target'].astype(np.int32)

    input_train, input_test = np.split(mnist['data'],   [num_train])
    target_train, target_test = np.split(mnist['target'], [num_train])
    return input_train, target_train, input_test, target_test

In [128]:
class Node(object):
    def __init__(self):
        self.cache = {}
    def store_cache(self, **kwargs):
        for key, val in kwargs.items():
            self.cache[key] = np.empty_like(val)
            self.cache[key][...] = val
    def read_cache(self, key):
        return self.cache[key]

In [129]:
# Z = Wx + b
class Linear(Node):
    def __init__(self, nin, nout):
        super(Linear,self).__init__()
        self.W = np.random.normal(0, 1.0/np.sqrt(nin), (nout, nin))
        self.b = np.zeros((1,nout))
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)

    def forward(self, x):
        self.store_cache(x=x)
        return linear_forward(x, self.W, self.b)
    
    def backward(self, dy):
        dx, dW, db = linear_backward(dy, self.read_cache('x'), self.W, self.b)
        self.dW += dW
        self.db += db
        return dx
    
    def zerograds(self):
        self.dW.fill(0)
        self.db.fill(0)

In [130]:
class SoftmaxCrossEntropyLoss(Node):
    def __init__(self):
        super(SoftmaxCrossEntropyLoss,self).__init__()

    def forward(self, x, y):
        self.store_cache(x=x,y=y)
        softmax = softmax_forward(x)
        self.store_cache(softmax=softmax)
        return cross_entropy_loss_forward(softmax, y)

    def backward(self):
        xcache = self.read_cache('x')
        ycache = self.read_cache('y')
        softmaxcache = self.read_cache('softmax')
        dsoftmax = cross_entropy_loss_backward(softmaxcache, ycache)
        return softmax_backward(dsoftmax, xcache)

In [131]:
def sgd(params, grads,lr=0.1):
    for param, grad in zip(params, grads):
        param -= lr * grad

In [132]:
input_train, target_train, input_test, target_test = load_mnist_data('data/mnist.pkl')
N_train = len(input_train)
N_test = len(input_test)

ninput  = 784
noutput = 10
eta     = 0.1

linear_object = Linear(ninput, noutput)
loss_func = SoftmaxCrossEntropyLoss()
input_train = input_train.reshape((N_train,ninput))
batch_size = 16

for i in range(0,N_train,batch_size):
    xbatch = input_train[i:i + batch_size]; 
    ybatch = target_train[i:i + batch_size]

    # Forward
    out = linear_object.forward(xbatch)
    loss = loss_func.forward(out, ybatch)

    # Clear the gradient buffer
    linear_object.zerograds()
    
    # Backward
    dout = loss_func.backward()
    dx = linear_object.backward(dout)

    # Parameter update
    sgd([linear_object.W, linear_object.b],[linear_object.dW, linear_object.db], lr=eta)

    # Compute accuracy
    pred = np.argmax(out, axis=1)
    acc = (pred==ybatch).mean()

    # Print statistics
    print '{} loss={} acc={}'.format(i, loss, acc)

0 loss=2.30190342217 acc=0.0625
16 loss=2.28451965302 acc=0.25
32 loss=2.32827549874 acc=0.125
48 loss=2.05519837239 acc=0.3125
64 loss=1.91817853483 acc=0.375
80 loss=1.69666259399 acc=0.4375
96 loss=1.698484436 acc=0.625
112 loss=1.37869853646 acc=0.6875
128 loss=1.85113974214 acc=0.5
144 loss=1.59664259368 acc=0.5625
160 loss=1.64161231643 acc=0.375
176 loss=1.54510842664 acc=0.5625
192 loss=1.27971847723 acc=0.6875
208 loss=1.19790216448 acc=0.875
224 loss=1.43058885359 acc=0.625
240 loss=1.30177630203 acc=0.6875
256 loss=1.39494607379 acc=0.625
272 loss=1.22127957465 acc=0.6875
288 loss=0.894078945817 acc=0.875
304 loss=1.17152359441 acc=0.75
320 loss=1.09879659856 acc=0.6875
336 loss=1.14449730276 acc=0.75
352 loss=0.903938613316 acc=0.875
368 loss=0.870642854705 acc=0.8125
384 loss=0.966169235251 acc=0.875
400 loss=0.997733473638 acc=0.8125
416 loss=1.01832977835 acc=0.75
432 loss=0.953774397609 acc=0.8125
448 loss=0.852434378494 acc=0.875
464 loss=0.93400222421 acc=0.875
480 lo

In [133]:
# Compute test accuracy
input_test = input_test.reshape((N_test,ninput))
test_acc = 0
for i in range(0, len(input_test), batch_size):
    xbatch = input_test[i:i + batch_size]; 
    ybatch = target_test[i:i + batch_size]
    
    # Forward
    out = linear_object.forward(xbatch)
    loss = loss_func.forward(out, ybatch)
    
    pred = np.argmax(out, axis=1);
    test_acc += (pred==ybatch).sum()
test_acc = float(test_acc) / N_test
print "Test accuracy={}".format(test_acc)

Test accuracy=0.9086
