In [1]:
import mnist_loader

In [2]:
import numpy as np

In [3]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1 - sigmoid(z))

In [4]:
def output_cost_derivative(a, y):
    return (a - y)

In [53]:
class Network(object):

    def __init__(self, inpt, hidden, output):
        self.d0 = inpt
        self.d1 = hidden
        self.d2 = output
        self.init_weights(self.d0, self.d1, self.d2)
    
    def init_weights(self, d0, d1, d2):
        self.w1 = np.zeros([d1, d0])
        self.b1 = np.zeros([d1, 1])
        self.w2 = np.zeros([d2, d1])
        self.b2 = np.zeros([d2, 1])
        #     w1 = np.random.randn(d1, d0) / np.sqrt(d0)
        #     b1 = np.random.randn(d1, 1) 
        #     w2 = np.random.randn(d2, d1) / np.sqrt(d1)
        #     b2 = np.random.randn(d2, 1)
        
    def evaluate(self, data):
        results = [(np.argmax(self.feedforward(x)), y) for (x, y) in data]
        return sum(int(x==y) for x,y in results)

    def feedforward(self, x):
        z1 = np.dot(self.w1, x) + self.b1
        a1 = sigmoid(z1)
        z2 = np.dot(self.w2, a1) + self.b2
        a2 = sigmoid(z2)
        return a2

def SGD(net, data, epochs, eta):

    for i in range(epochs):
        dw1 = np.zeros([net.d1, net.d0])
        db1 = np.zeros([net.d1, 1])
        dw2 = np.zeros([net.d2, net.d1])
        db2 = np.zeros([net.d2, 1])
        for x,y in data:
            # forward pass
            z1 = np.dot(net.w1, x) + net.b1
            a1 = sigmoid(z1)
            z2 = np.dot(net.w2, a1) + net.b2
            a2 = sigmoid(z2)

            # backprop
            delta2 = output_cost_derivative(a2, y) * sigmoid_prime(z2)
            dw2 += np.dot(delta2, a1.transpose())
            db2 += delta2

            delta1 = np.dot(net.w2.transpose(), delta2) * sigmoid_prime(z1)
            dw1 += np.dot(delta1, x.transpose())
            db1 += delta1

        # update weights
        net.w2 -= (eta/float(n)) * dw2
        net.b2 -= (eta/float(n)) * db2
        net.w1 -= (eta/float(n)) * dw1
        net.b1 -= (eta/float(n)) * db1
    
#         print net.evaluate(data)

In [7]:
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

In [57]:
# %%timeit
n=1000
epochs = 30
data = training_data[:n]
eta = 3.0

data = training_data[:n]
inpt = data[0][0].shape[0]
hidden = 10
output = 10

net = Network(inpt, hidden, output)

In [58]:
# %%timeit 
SGD(net, data, epochs, eta)

In [59]:
net.w2

array([[-0.22113814, -0.22113814, -0.22113814, -0.22113814, -0.22113814,
        -0.22113814, -0.22113814, -0.22113814, -0.22113814, -0.22113814],
       [-0.20590532, -0.20590532, -0.20590532, -0.20590532, -0.20590532,
        -0.20590532, -0.20590532, -0.20590532, -0.20590532, -0.20590532],
       [-0.21865228, -0.21865228, -0.21865228, -0.21865228, -0.21865228,
        -0.21865228, -0.21865228, -0.21865228, -0.21865228, -0.21865228],
       [-0.22631396, -0.22631396, -0.22631396, -0.22631396, -0.22631396,
        -0.22631396, -0.22631396, -0.22631396, -0.22631396, -0.22631396],
       [-0.21259947, -0.21259947, -0.21259947, -0.21259947, -0.21259947,
        -0.21259947, -0.21259947, -0.21259947, -0.21259947, -0.21259947],
       [-0.22743271, -0.22743271, -0.22743271, -0.22743271, -0.22743271,
        -0.22743271, -0.22743271, -0.22743271, -0.22743271, -0.22743271],
       [-0.22506704, -0.22506704, -0.22506704, -0.22506704, -0.22506704,
        -0.22506704, -0.22506704, -0.22506704