In [1]:
import numpy as np
import random, math
from sklearn.datasets import load_digits

In [2]:
# http://neuralnetworksanddeeplearning.com/chap1.html

def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoid_deriv(x):
    return np.exp(x)/(1+np.exp(x))**2

def cost_deriv(outputs, cost):
    return outputs-cost

def nnet_setup(node_layout):
    weights = []
    biases = []
    for i in range(1, len(node_layout)):
        weights.append(np.random.randn(node_layout[i], node_layout[i-1]))
        biases.append(np.random.randn(node_layout[i], 1))
    for i in range(0, len(biases)):
        for j in range(0, len(biases[i])):
            biases[i][j] = [0]
    return weights, biases

def nnet_prop(weights, biases, inputs):
    for w, b in zip(weights, biases):
        inputs = sigmoid(np.dot(w, inputs) + b)
    return inputs

In [14]:
def split_set(dataset, point):
    temp1 = list(zip(dataset.images[:point], dataset.target[:point]))
    temp2 = list(zip(dataset.images[point:], dataset.target[point:]))
    random.shuffle(temp1)
    random.shuffle(temp2)
    training_set, training_sols = zip(*temp1)
    test_set, test_sols = zip(*temp2)
    return list(zip(training_set, training_sols)), list(zip(test_set, test_sols))

def split_to_batch(trainset, size):
    return [trainset[n*size:(n+1)*size] for n in range(0, math.floor(len(trainset)/size))]

def conv_to_col(vec):
    return np.rot90([vec.reshape((64))])

def create_tgt_vec(pos):
    tmp = np.zeros(10)
    tmp[pos] = 1
    return np.rot90([tmp])

In [27]:
digits = load_digits()
train_set, test_set = split_set(digits, 5)
train_set = split_to_batch(train_set, 1)

wts, bias = nnet_setup([64, 15, 25, 10])

def backprop(train_set):
    sum_nabla_w = [np.zeros(wt.shape) for wt in w]
    sum_nabla_b = [np.zeros(bt.shape) for bt in b]

    set_0 = train_set[0]

    pre_sig = []; post_sig = []
    for test, sol in set_0:
        inp = conv_to_col(test)
        outp = create_tgt_vec(sol)
        for wt, bt in zip(wts, bias):
            #print(np.dot(wt, inp))
            inp = np.dot(wt, inp) + bt
            pre_sig.append(inp)
            post_sig.append(sigmoid(inp))
            inp = sigmoid(inp)
        
    return;

backprop(train_set);

[array([[ -20.19094092],
       [ -23.37969498],
       [  12.97597003],
       [ -18.96899521],
       [   2.69229635],
       [  12.41895858],
       [ 143.9520522 ],
       [ 117.01614585],
       [  28.57284842],
       [  44.54718814],
       [  14.23720962],
       [ -19.74788425],
       [  -2.12503747],
       [  -6.04173383],
       [   3.54304577]]), array([[ 0.05698156],
       [ 2.43515591],
       [-0.1911002 ],
       [ 0.88774487],
       [ 5.99109535],
       [-1.29723444],
       [-1.08640838],
       [-1.72552695],
       [ 7.20413052],
       [ 1.22747477],
       [-1.92654559],
       [ 2.25204527],
       [-5.46694298],
       [ 4.97399716],
       [ 1.35646873],
       [ 0.04793163],
       [ 1.65720446],
       [-0.04610317],
       [ 2.1413044 ],
       [-3.74998252],
       [-3.63850771],
       [-3.49792644],
       [ 2.72953198],
       [-3.3424208 ],
       [-5.81905897]]), array([[-0.44609753],
       [-5.86544064],
       [ 1.29947947],
       [-4.47965781