<a href="https://colab.research.google.com/github/riccardocappi/Machine_Learning_Course/blob/main/Lec_9_Back_Propagation_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Back-propagation from scratch###
**Riccardo Cappi, 2073768**

In this notebook i implemented the Back-Propagation algorithm shown in Lecture 9 extending the derivation to more than one hidden layer.

I tested the algorithm trying to replicate the network given on slide 15 (Encoder of the identity function)

In [None]:
import numpy as np

###Network components###


In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

class HiddenUnit:
  def __init__(self, weights):
    self.weights = weights[1:]
    self.delta_w = np.zeros(len(weights[1:]))
    self.delta = 0
    self.bias = weights[0]
  
  def net(self, input):
    y = np.dot(self.weights,input) + self.bias
    return y

  def update_weights(self, lr):
    for i in range(len(self.weights)):
      self.weights[i] += lr*self.delta_w[i] 

    self.bias += lr*self.delta


class HiddenLayer:
  def __init__(self, input_size, dim):
    self.hidden_units = []
    for i in range(dim):
      self.hidden_units.append( HiddenUnit(np.random.uniform(low = -0.05, high = 0.05,size = input_size + 1)) )
  
  def get_hidden_layer_nets(self, input):
    # Computes the output of each neuron on this layer
    out = []
    for h in self.hidden_units:
      out.append(h.net(input))
    return np.asarray(out)


def backpropagation(network, output_of_each_layer, lr, t, x):
  for i in reversed(range(len(network))):
    hidden_layer = network[i]
    layer_outputs = output_of_each_layer[i]
    # If we are iterating through the first hidden layer the input will be the input layer, otherwise it will be the output of the previous layer
    layer_input = x if i == 0 else output_of_each_layer[i-1]
    for j, j_unit in enumerate(hidden_layer.hidden_units):
      j_output = layer_outputs[j]
      is_output_layer = i == (len(network) - 1)
      j_unit.delta = j_output * (1.0 - j_output) * get_error(j_output,is_output_layer,network,i,t,j) 
      for k in range(len(j_unit.delta_w)):
        j_unit.delta_w[k] = j_unit.delta * layer_input[k]

  # Update weights
  for hl in network:
    for unit in hl.hidden_units:
      unit.update_weights(lr)

  return network


def get_error(output, is_output_layer, network, i,t,j):
  error = 0.0
  if is_output_layer:
    error = t[j] - output
  else:
    for unit in network[i+1].hidden_units:
      error += unit.weights[j] * unit.delta 
  return error


In [None]:
# The forward function applies the sigmoid activation to each neuron output
def forward(network, input_layer):
  out = []
  input = input_layer
  for hl in network:
    values = sigmoid(hl.get_hidden_layer_nets(input))
    out.append(values)
    input = values
  return out

def train(network, X, y, lr, steps):
  S = list(zip(X,y))
  print('Learning started')
  for _ in range(steps):
    for x,t in S:
      output = forward(network, x)
      network = backpropagation(network, output, lr, t,x)

  print('Learning ended\n')
  return network


###Test on Identity function###

I built a network following the architecture of the encoder shown il Lecture 9 (slide 15) and tried to learn the Identity Function (8x8)

In [None]:

X = np.identity(8)
y = np.identity(8)

h1 = HiddenLayer(8,3)
out = HiddenLayer(3, 8)
network = [h1,out]

network = train(network,X,y,0.2,5000)
print('Input\t\t\t\t\t', 'Hidden Values\t\t\t\t','Output')
for x in X:
  y,z = forward(network,x)
  print(x, '\t\t'+ str([round(y_i,2) for y_i in y])+'\t\t\t', [0 if z_i <0.5 else 1 for z_i in z])




Learning started
Learning ended

Input					 Hidden Values				 Output
[1. 0. 0. 0. 0. 0. 0. 0.] 		[0.81, 0.01, 0.95]			 [1, 0, 0, 0, 0, 0, 0, 0]
[0. 1. 0. 0. 0. 0. 0. 0.] 		[0.88, 0.98, 0.02]			 [0, 1, 0, 0, 0, 0, 0, 0]
[0. 0. 1. 0. 0. 0. 0. 0.] 		[0.97, 0.11, 0.03]			 [0, 0, 1, 0, 0, 0, 0, 0]
[0. 0. 0. 1. 0. 0. 0. 0.] 		[0.01, 0.64, 0.01]			 [0, 0, 0, 1, 0, 0, 0, 0]
[0. 0. 0. 0. 1. 0. 0. 0.] 		[0.05, 0.99, 0.74]			 [0, 0, 0, 0, 1, 0, 0, 0]
[0. 0. 0. 0. 0. 1. 0. 0.] 		[0.04, 0.01, 0.19]			 [0, 0, 0, 0, 0, 1, 0, 0]
[0. 0. 0. 0. 0. 0. 1. 0.] 		[0.98, 0.95, 0.99]			 [0, 0, 0, 0, 0, 0, 1, 0]
[0. 0. 0. 0. 0. 0. 0. 1.] 		[0.01, 0.27, 0.98]			 [0, 0, 0, 0, 0, 0, 0, 1]


###Perceptron as a particular case of MultiLayer Network###
I created a Perceptron using the previous code and learned the OR function

In [None]:
from itertools import product
OR = lambda x1, x2, x3: x1 or x2 or x3
data = list(product((0,1),repeat=3))
data = np.array(data)
t = [OR(*s) for s in data]

t = [[t_i] for t_i in t] # The target must be a list of the same length of the output layer (1 in this case)

lr = 0.1
steps = 1000
out = HiddenLayer(3,1) # Perceptron
network = [out]

network = train(network, data, t, lr, steps)
print('Expected:')
for d in data:
  print(d, '->', OR(*d))

print('\nPredicted:')
for d in data:
  res = forward(network, d)
  print(d, '->', 0 if res[0] < 0.5 else 1)


Learning started
Learning ended

Expected:
[0 0 0] -> 0
[0 0 1] -> 1
[0 1 0] -> 1
[0 1 1] -> 1
[1 0 0] -> 1
[1 0 1] -> 1
[1 1 0] -> 1
[1 1 1] -> 1

Predicted:
[0 0 0] -> 0
[0 0 1] -> 1
[0 1 0] -> 1
[0 1 1] -> 1
[1 0 0] -> 1
[1 0 1] -> 1
[1 1 0] -> 1
[1 1 1] -> 1
