In [1]:
%matplotlib inline
import numpy as np
import random, math
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
from matplotlib.pylab import cm
from IPython.html.widgets import interact



In [2]:
# http://neuralnetworksanddeeplearning.com/chap1.html

# Sigmoid and cost function derivatives
def sig(x):
    return 1.0/(1.0+np.exp(-x))
sigmoid = np.vectorize(sig)

def sig_deriv(x):
    return sig(x)*(1-sig(x))
sigmoid_deriv = np.vectorize(sig_deriv)

def cost_deriv(outputs, cost):
    return outputs-cost

# Weights initialization and feedforward
def nnet_setup(node_layout):
    weights = []
    biases = []
    for i in range(1, len(node_layout)):
        weights.append(np.random.randn(node_layout[i], node_layout[i-1])-0.5)
        biases.append(np.random.randn(node_layout[i], 1))
    return weights, biases

def nnet_prop(weights, biases, inputs):
    for w, b in zip(weights, biases):
        inputs = sigmoid(np.dot(w, inputs) + b)
    return inputs

In [16]:
# The following functions split data into randomized subsets

def split_set(dataset, point):
    temp1 = list(zip(dataset.images[:point], dataset.target[:point]))
    temp2 = list(zip(dataset.images[point:], dataset.target[point:]))
    random.shuffle(temp1)
    random.shuffle(temp2)
    training_set, training_sols = zip(*temp1)
    test_set, test_sols = zip(*temp2)
    return list(zip(training_set, training_sols)), list(zip(test_set, test_sols))

def split_to_batch(trainset, size):
    return [trainset[n*size:(n+1)*size] for n in range(0, math.floor(len(trainset)/size))]

def conv_to_col(vec):
    return np.rot90([vec.reshape((64))])

def create_tgt_vec(pos):
    tmp = np.ones(10)*-1
    tmp[pos] = 1 #temporary; nominal 1
    return np.rot90([tmp])

digits = load_digits()
train_set, test_set = split_set(digits, 10)
train_set = split_to_batch(train_set, 1)

In [20]:
# http://stackoverflow.com/questions/30491307/why-does-this-backpropagation-implementation-fail

# Trains the network using backpropagation
def backprop(train_set, wts, bias, eta):
    # Learning coeficient is the training rate over the number of elements
    learning_coef = eta / len(train_set[0])
    
    # Iterates over each test batch
    for next_set in train_set:
        # Aggregates the delta values
        sum_del_w = [np.zeros(w.shape) for w in wts]
        sum_del_b = [np.zeros(b.shape) for b in bias]
        
        # Backpropagates each training set
        for test, sol in next_set:
            del_w = [np.zeros(shape=wt.shape) for wt in wts]
            del_b = [np.zeros(shape=bt.shape) for bt in bias]
            #next_input = conv_to_col(test)
            #outp = create_tgt_vec(sol)
            next_input = test
            outp = [[1]]
            
            # Feedforward step
            pre_sig = []; post_sig = []
            for w, b in zip(wts, bias):
                next_input = np.dot(w, next_input) + b
                pre_sig.append(next_input)
                post_sig.append(sigmoid(next_input))
                next_input = sigmoid(next_input)
            
            # Backpropagation step
            delta = cost_deriv(post_sig[-1], outp) * sigmoid_deriv(pre_sig[-1])
            del_b[-1] = delta
            del_w[-1] = np.dot(delta, post_sig[-2].transpose())
            
            for i in range(2, len(wts)):
                pre_sig_vec = pre_sig[-i]
                sig_deriv = sigmoid_deriv(pre_sig_vec)
                delta = np.dot(wts[-i+1].transpose(), delta) * sig_deriv
                del_b[-i] = delta
                del_w[-i] = np.dot(delta, post_sig[-i-1].transpose())
                        
            sum_del_w = [dw + sdw for dw, sdw in zip(del_w, sum_del_w)]
            sum_del_b = [db + sdb for db, sdb in zip(del_b, sum_del_b)]
        
        # Weights adjustment step
        wts = [wt - learning_coef * dw for wt, dw in zip(wts, sum_del_w)]
        bias = [bt - learning_coef * db for bt, db in zip(bias, sum_del_b)]
    
    return wts, bias

wts, bias = nnet_setup([2,3,2])
for i in range(0, 5):
    #print(wts)
    print(nnet_prop(wts, bias, [1, 1]))
    wts, bias = backprop([[(np.array([1, 1]), [0, 0])]], wts, bias, 0.2);
    print(nnet_prop(wts, bias, [1, 1]), "\n\n")

[[ 0.51981043  0.49938199  0.50269545]
 [ 0.5571393   0.69954905  0.74070146]]
[[ 0.53994686  0.53414568  0.54201098]
 [ 0.57003114  0.71476338  0.75585017]] 


[[ 0.53994686  0.53414568  0.54201098]
 [ 0.57003114  0.71476338  0.75585017]]
[[ 0.55864627  0.56605229  0.57789221]
 [ 0.58211769  0.72845477  0.76933031]] 


[[ 0.55864627  0.56605229  0.57789221]
 [ 0.58211769  0.72845477  0.76933031]]
[[ 0.57588614  0.59492482  0.61010768]
 [ 0.5934649   0.74081865  0.78137732]] 


[[ 0.57588614  0.59492482  0.61010768]
 [ 0.5934649   0.74081865  0.78137732]]
[[ 0.59171706  0.62081986  0.63873554]
 [ 0.60413369  0.75202389  0.79219046]] 


[[ 0.59171706  0.62081986  0.63873554]
 [ 0.60413369  0.75202389  0.79219046]]
[[ 0.60623266  0.64393564  0.66403765]
 [ 0.61417978  0.76221559  0.80193773]] 




In [21]:
def eval_output(output):
    output = np.squeeze(np.rot90(np.rot90(np.rot90(output))))
    ind = output.argmax()
    conf = 0.5 #temporary
    return ind, conf

def check_effectiveness(test_set, wts, bias):
    correct = 0
    count = 0
    for test in test_set:
        inp = conv_to_col(test[0])
        tgt = test[1]
        out, _ = eval_output(nnet_prop(wts, bias, inp))
        count += 1
        if tgt == out: correct += 1
    return correct, count

wts, bias = nnet_setup([64, 15, 25, 10])

inp1 = conv_to_col(test_set[0][0])
print(nnet_prop(wts, bias, inp1))
print(check_effectiveness(test_set, wts, bias), "\nBREAK")

wts, bias = backprop(train_set, wts, bias, 0.2)

print(nnet_prop(wts, bias, inp1))
print(check_effectiveness(test_set, wts, bias))

[[  1.52966065e-04]
 [  3.44240678e-02]
 [  2.41531231e-02]
 [  3.98510265e-02]
 [  1.34682214e-02]
 [  2.68861370e-03]
 [  8.12855392e-05]
 [  8.07388740e-04]
 [  2.02381912e-05]
 [  1.10774687e-03]]
(180, 1787) 
BREAK


ValueError: shapes (15,64) and (8,8) not aligned: 64 (dim 1) != 8 (dim 0)