Materijal je pripremljen po uzoru na knjigu http://neuralnetworksanddeeplearning.com/chap1.html

In [1]:
import numpy as np
import random

In [2]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [3]:
sigmoid(0)

0.5

In [4]:
def sigmoid_prime(z):
    return sigmoid(z)* (1-sigmoid(z))

In [5]:
sigmoid_prime(0)

0.25

Algoritam koji cemo implementirati se sastoji od sledecih koraka: 
<img src='backpropagation_algorithm_summary.png'>

In [80]:
class Network:
    # mreza ocekuje listu koja sadrzi brojeve neurona pojedinacnih slojeva
    def __init__(self, sizes):
        self.number_of_layers = len(sizes)
        self.sizes = sizes
        self.biases = [ np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [ np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]
        
        
    # prolaz unapred kroz mrezu    
    def feedforward(self, network_input):
        for b, w in zip(self.biases, self.weights):
            network_input = sigmoid(np.dot(w,network_input) + b)
            
        return network_input
    
    
    def SGD(self, training_inputs, training_results, epochs, mini_batch_size, alpha, test_data = None):
        n = len(training_inputs)
        
        for epoch in range(0, epochs): 
            # data randomization
#             training_data = list(zip(training_inputs, training_results))
#             random.shuffle(training_data)
#             training_inputs = training_data[0]
#             training_results = training_data[1]
            
            mini_batches = [(training_inputs[k: k+mini_batch_size], \
                             training_results[k: k+mini_batch_size] ) \
                            for k in range(0, n, mini_batch_size)]
        
            for mini_batch in mini_batches: 
                self.update_mini_batch(mini_batch, alpha)
            
            if test_data: 
                print("Epoche {0}: {1}/{2}".\
                      format(epoch, self.evaluate(test_data[0], test_data[1]), n)) 
            else:
                print("Epoche {0} completed".format(epoch))
            
            
    def backgropagation(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        
        activations = [x]
        activation = x
        
        zs = []
        
        #forward pass
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)
       
        #backward pass
        delta = self.cost_derivate(activations[-1], y)*sigmoid_prime(zs[-1])
       
        nabla_b[-1] = delta 
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        
        
        for layer in range(2, self.number_of_layers):
            z = zs[-layer]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-layer+1].transpose(), delta)*sp
            nabla_b[-layer] = delta
            nabla_w[-layer] = np.dot(delta, activations[-layer-1].transpose())
            
            
        return (nabla_b, nabla_w)

    
    def cost_derivate(self, activation, y):
        return activation - y
    
    def update_mini_batch(self, mini_batch, alpha):
        n = len(mini_batch)
        
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        
        
        for x, y in zip(mini_batch[0], mini_batch[1]):
            delta_nabla_b, delta_nabla_w = self.backgropagation(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]


        self.weights = [w - alpha/n*nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - alpha/n*nb for b, nb in zip(self.biases, nabla_b)]


    def evaluate(self, test_inputs, test_results):
        results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for x, y in zip(test_inputs, test_results)] 
        return sum([ int(y_predicted ==y) for y_predicted, y in results]) 
        
        

In [67]:
# results = [ (np.argmax(network.feedforward(x)), y) for x, y in zip(test_inputs, test_results)] 

In [68]:
# results[0:10]

In [69]:
network = Network([784, 30, 10])

In [70]:
[w.shape for w in network.weights]

[(30, 784), (10, 30)]

Ucitavanje i priprema podataka:

In [71]:
from keras.datasets import mnist

In [72]:
(x_train, y_train), (x_test, y_test)= mnist.load_data()

In [73]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

In [74]:
x_train = x_train/255
x_test = x_test/255

In [75]:
training_inputs = [ np.reshape(x, (784, 1)) for x in x_train]
test_inputs = [ np.reshape(x, (784, 1)) for x in x_test]

In [76]:
def vectorize(digit):
    v = np.zeros((10, 1))
    v[digit] = 1
    
    return v

In [77]:
training_results = [vectorize(y) for y in y_train]
test_results = [vectorize(y) for y in y_test]

In [78]:
network.SGD(training_inputs, training_results, 10, 128, 0.1,(training_inputs, training_results))

Epoche 0: 49054/60000
Epoche 1: 52761/60000
Epoche 2: 54013/60000
Epoche 3: 54701/60000
Epoche 4: 55112/60000
Epoche 5: 55434/60000
Epoche 6: 55676/60000
Epoche 7: 55887/60000
Epoche 8: 56053/60000
Epoche 9: 56195/60000


In [79]:
56195/60000

0.9365833333333333

### Za vezbu: 
* eksperimentisati sa razlicitim brojem slojeva u mrezi
* eksperimentisati sa drugacijim inicijalizacijama matrice koeficijenata
* eksperimentisati sa drugim aktivacionim funkcijama