In [1]:
import numpy as np
import random, math
from sklearn.datasets import load_digits

In [2]:
# http://neuralnetworksanddeeplearning.com/chap1.html

def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_deriv(x):
    return np.exp(x)/(1+np.exp(x))**2

def cost_deriv(outputs, cost):
    return outputs-cost

def nnet_setup(node_layout):
    weights = []
    biases = []
    for i in range(1, len(node_layout)):
        weights.append(np.random.randn(node_layout[i], node_layout[i-1]))
        biases.append(np.random.randn(node_layout[i], 1))
    for i in range(0, len(biases)):
        for j in range(0, len(biases[i])):
            biases[i][j] = [0]
    return weights, biases

def nnet_prop(weights, biases, inputs):
    for w, b in zip(weights, biases):
        inputs = sigmoid(np.dot(w, inputs) + b)
    return inputs

In [14]:
def split_set(dataset, point):
    temp1 = list(zip(dataset.images[:point], dataset.target[:point]))
    temp2 = list(zip(dataset.images[point:], dataset.target[point:]))
    random.shuffle(temp1)
    random.shuffle(temp2)
    training_set, training_sols = zip(*temp1)
    test_set, test_sols = zip(*temp2)
    return list(zip(training_set, training_sols)), list(zip(test_set, test_sols))

def split_to_batch(trainset, size):
    return [trainset[n*size:(n+1)*size] for n in range(0, math.floor(len(trainset)/size))]

def conv_to_col(vec):
    return np.rot90([vec.reshape((64))])

def create_tgt_vec(pos):
    tmp = np.zeros(10)
    tmp[pos] = 1
    return np.rot90([tmp])

In [39]:
digits = load_digits()
train_set, test_set = split_set(digits, 5)
train_set = split_to_batch(train_set, 1)

wts, bias = nnet_setup([64, 15, 25, 10])

def backprop(train_set):
    sum_del_w = [np.zeros(wt.shape) for wt in w]
    sum_del_b = [np.zeros(bt.shape) for bt in b]
    del_w = [np.zeros(wt.shape) for wt in w]
    del_b = [np.zeros(bt.shape) for bt in b]
    
    set_0 = train_set[0]

    pre_sig = []; post_sig = []
    for test, sol in set_0:
        inp = conv_to_col(test)
        outp = create_tgt_vec(sol)
        for wt, bt in zip(wts, bias):
            #print(np.dot(wt, inp))
            inp = np.dot(wt, inp) + bt
            pre_sig.append(inp)
            post_sig.append(sigmoid(inp))
            inp = sigmoid(inp)
            
        delta = cost_deriv(post_sig[-1], outp) * sigmoid_deriv(pre_sig[-1])
        del_b[-1] = delta
        del_w[-1] = np.dot(delta, post_sig[-2].transpose())
        
        for i in range(2, len(w)):
            pre_sig_vec = pre_sig[-i]
            sig_deriv = sigmoid_deriv(pre_sig_vec)
            delta = np.dot(w[-i+1].transpose(), delta) * sig_deriv
            del_b[-i] = delta
            del_w[-i] = np.dot(delta, post_sig[-i-1].transpose())

    return;

backprop(train_set);

[array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., 