In [117]:
%matplotlib inline
import numpy as np
import random, math
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
from matplotlib.pylab import cm
from IPython.html.widgets import interact

In [212]:
# http://neuralnetworksanddeeplearning.com/chap1.html

def sig(x):
    return 1.0/(1.0+np.exp(-x))
sigmoid = np.vectorize(sig)

def sig_deriv(x):
    return sig(x)*(1-sig(x))
sigmoid_deriv = np.vectorize(sig_deriv)

def cost_deriv(outputs, cost):
    return outputs-cost

def nnet_setup(node_layout):
    weights = []
    biases = []
    for i in range(1, len(node_layout)):
        weights.append(np.random.randn(node_layout[i], node_layout[i-1])-0.5)
        biases.append(np.random.randn(node_layout[i], 1))
        #weights.append((np.random.rand(node_layout[i], node_layout[i-1])-0.5)/100.0)
        #biases.append(-1.0*np.random.rand(node_layout[i], 1)/100.0)
    return weights, biases

def nnet_prop(weights, biases, inputs):
    for w, b in zip(weights, biases):
        inputs = sigmoid(np.dot(w, inputs) + b)
    return inputs

In [213]:
def split_set(dataset, point):
    temp1 = list(zip(dataset.images[:point], dataset.target[:point]))
    temp2 = list(zip(dataset.images[point:], dataset.target[point:]))
    random.shuffle(temp1)
    random.shuffle(temp2)
    training_set, training_sols = zip(*temp1)
    test_set, test_sols = zip(*temp2)
    return list(zip(training_set, training_sols)), list(zip(test_set, test_sols))

def split_to_batch(trainset, size):
    return [trainset[n*size:(n+1)*size] for n in range(0, math.floor(len(trainset)/size))]

def conv_to_col(vec):
    return np.rot90([vec.reshape((64))])

def create_tgt_vec(pos):
    tmp = np.zeros(10)
    tmp[pos] = 1
    return np.rot90([tmp])

digits = load_digits()
train_set, test_set = split_set(digits, 1500)
train_set = split_to_batch(train_set, 50)

In [214]:
def backprop(train_set, wts, bias, eta):
    learning_coef = eta / len(train_set[0])
    
    for next_set in train_set:
        sum_del_w = [np.zeros(w.shape) for w in wts]
        sum_del_b = [np.zeros(b.shape) for b in bias]
        
        del_w = [np.zeros(wt.shape) for wt in wts]
        del_b = [np.zeros(bt.shape) for bt in bias]
        pre_sig = []; post_sig = []
        for test, sol in next_set:
            next_input = conv_to_col(test)
            outp = create_tgt_vec(sol)
            for w, b in zip(wts, bias):
                next_input = np.dot(w, next_input) + b
                pre_sig.append(next_input)
                post_sig.append(sigmoid(next_input))
                next_input = sigmoid(next_input)
            
            delta = cost_deriv(post_sig[-1], outp) * sigmoid_deriv(pre_sig[-1])
            del_b[-1] = delta
            del_w[-1] = np.dot(delta, post_sig[-2].transpose())
            
            for i in range(2, len(wts)):
                pre_sig_vec = pre_sig[-i]
                sig_deriv = sigmoid_deriv(pre_sig_vec)
                delta = np.dot(wts[-i+1].transpose(), delta) * sig_deriv
                del_b[-i] = delta
                del_w[-i] = np.dot(delta, post_sig[-i-1].transpose())
            
            sum_del_w = [dw + sdw for dw, sdw in zip(del_w, sum_del_w)]
            sum_del_b = [db + sdb for db, sdb in zip(del_b, sum_del_b)]
        
        wts = [wt - learning_coef * dw for wt, dw in zip(wts, sum_del_w)]
        bias = [bt - learning_coef * db for bt, db in zip(bias, sum_del_b)]
    
    return wts, bias

wts, bias = nnet_setup([64, 25, 10])
backprop(train_set, wts, bias, 0.1);

In [215]:
def eval_output(output):
    output = np.squeeze(np.rot90(np.rot90(np.rot90(output))))
    ind = output.argmax()
    conf = 0.5 #temporary
    return ind, conf

def check_effectiveness(test_set, wts, bias):
    correct = 0
    count = 0
    for test in test_set:
        inp = conv_to_col(test[0])
        tgt = test[1]
        out, _ = eval_output(nnet_prop(wts, bias, inp))
        count += 1
        if tgt == out: correct += 1
    return correct, count

wts, bias = nnet_setup([64, 15, 25, 10])

inp1 = conv_to_col(test_set[0][0])
print(nnet_prop(wts, bias, inp1))
print(check_effectiveness(test_set, wts, bias), "\nBREAK")

wts, bias = backprop(train_set, wts, bias, 0.1)

print(nnet_prop(wts, bias, inp1))
print(check_effectiveness(test_set, wts, bias))

[[  3.76427359e-05]
 [  3.00285097e-04]
 [  9.59958063e-04]
 [  3.29781522e-01]
 [  4.10127341e-04]
 [  2.64720494e-02]
 [  6.10517623e-04]
 [  2.36924422e-04]
 [  4.34063796e-04]
 [  2.34207980e-04]]
(30, 297) 
BREAK
[[  3.79516026e-05]
 [  2.87776748e-04]
 [  9.59685090e-04]
 [  1.52684269e-01]
 [  4.04697145e-04]
 [  2.75537765e-02]
 [  6.07969941e-04]
 [  2.45551269e-04]
 [  4.35394590e-04]
 [  2.32272324e-04]]
(30, 297)
