In [2]:
import random
import numpy as np

In [37]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z)*(1-sigmoid(z))

In [None]:
class Network(object):
    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = []
        for i in sizes[1:]:
            bias = []
            for j in range(i):
                bias.append(random.normalvariate(0,1)) # using a guassian distribution with mean 0 and variance 1
            self.biases.append(bias)
        self.weights = []
        weight = []
        for x, y in zip(sizes[:-1], sizes[1:]):
            row = []
            for i in range(y):
                col = []
                for j in range(x):
                    col.append(random.normalvariate(0, 1))
                row.append(col)
            self.weights.append(row)
            
    def feedforward(self, a):
        for b, w in zip(self.biases, self.weight):
            # load storage for activation
            activations = []
            for i in range(len(b)):
                activation = []
                for j in range(len(a[0][:])):
                    activation.append(0)
                activations.append(activation)
            
            # 
            for i, w_col in enumerate(w):
                for a_col in range(len(a[0][:])):
                    plus_sum = 0
                    for a_row in range(len(w_col)):
                        plus_sum += w_col[a_row]*a[a_row][a_col]
                        
                    activations[i][a_col] += plus_sum
                    
            for i in range(len(activations[:])):
                for j in range(len(activations[0][:])):
                    activations[i][j] = sigmoid(activations[i][j])
                    
            return activations
    
    def SGD(self, training_data, epoches, mini_batch_size, eta, test_data = None):
        if test_data:
            n_test = len(test_data)
            n = len(training_data)
            for j in range(epochs):
                random.shuffle(training_data)
                mini_batches = [
                    training_data[k:k+mini_batch_size]
                    for k in range(0, n, mini_batch_size)]
                for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
                if test_data:
                    print "Epoch {0}: {1} / {2}".format(
                        j, self.evaluate(test_data), n_test)
                else:
                    print "Epoch {0} complete".format(j)

    def update_mini_batch(self, mini_batch, eta):
        # load storage for nabla_b nabla_w
        nabla_b = []
        nabla_w = []
        for b in self.biases:
            bias = []
            for i in range(len(b)):
                bias.append(0)
            nabla_b.append(bias)
        for w in self.weights:
            row = []
            for row in range(len(w)):
                col = []
                for col in range(len(w[0])):
                    col.append(0)
                row.append(col)
            nabla_w.append(row)
            
        # bp compute 
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            for row in range(len(delta_nabla_b)):
                for col in range(len(delta_nabla_b[0])):
                    nabla_b += delta_nabla_b[row][col]
                    
        for layer in range(len(delta_nabla_w)):
            for row in range(len(delta_nabla_w[0])):
                for col in range(len(delta_nabla_w[0][0])):
                    nabla_w[layer][row][col] += delta_nabla_w[layer][row][col]
        
        # update w and b
        for row in range(delta_nabla_b):
            for col in range(delta_nabla_b[0]):
                self.biases[row][col] = self.biases[row][col] - (eta/len(mini_batch))*delta_nabla_b[row][col]
                
        for layer in range(len(delta_nabla_w)):
            for row in range(len(delta_nabla_w[0])):
                for col in range(len(delta_nabla_w[0][0])):
                    self.weights[layer][row][col] = self.weights[layer][row][col] - (eta/len(mini_batch))*delta_nabla_w[layer][row][col]
        
    
    def backprop(self, x, y):
        # load storage for nabla_b nabla_w
        nabla_b = []
        nabla_w = []
        for b in self.biases:
            bias = []
            for i in range(len(b)):
                bias.append(0)
            nabla_b.append(bias)
        for w in self.weights:
            row = []
            for row in range(len(w)):
                col = []
                for col in range(len(w[0])):
                    col.append(0)
                row.append(col)
            nabla_w.append(row)
        
        # feedforward
        activation = x # 
        activations = [x] # 
        zs = [] #
        
        for b, w in zip(self.biases, self.weights):
            z = []
            for w_row in range(len(w)):
                z_sum = 0
                for w_col in range(len(w[0])):
                    z_sum += w[w_row][w_col]*activation[w_col]  
                activation[w_col] = sigmoid(z_sum + b[w_row])
                z.append(z_sum)
                activations.append(activation)
            zs.append(z)
        
        # backward pass
        # initialization
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1][0])
        nabla_b[-1][0] = delta
        for i in range(len(nabla_w[-1])):
            nabla_w[-1][i] = delta * activations[-2][i]
        
        z = zs[0]
        w = self.weights[0]
        b = self.biases[0]
        activation = activations[0]
        for i in range(len(z)):
            sp = sigmoid_prime(z[i])
            delta = delta*self.weights[-1][i]*sp
            nabla_b[0][i] = delta
            for j in range(len(w[i])):
                w[i][j] = delta*activation[j]
        
        
        return nabla_b, nabla_w
                
                
    def cost_derivative(self, output_activations, y):
        return output_activations - y
            
        

In [39]:
network = Network([2,3,1])
print('num_lyers', network.num_layers)
print('sizes', network.sizes)
print('biases', network.biases)
print('weight', network.weights)

num_lyers 3
sizes [2, 3, 1]
biases [[0.3136151086017989, -0.9122345572748317, 1.0197929133010748], [0.7405629148534872]]
weight [[[0.27413635865047253, 1.7816434819105802], [0.10477069505515149, 0.22159893944248585], [-1.3535738028265307, 0.5548549279188865]], [[0.6182975632480122, -0.8184378096645144, -0.04042369345630952]]]


In [20]:
random.normalvariate(0,1)

0.4102644554342979

In [16]:
np.random.randn(1,2)

array([[-0.03286934, -0.07088019]])

In [45]:
a = [[1,2],[1,2,3,4,5], [1,2, 3]]
len(a[1])

5