<a href="https://colab.research.google.com/github/riccardocappi/Machine-Learning-From-Scratch/blob/main/Back_Propagation_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Back-propagation from scratch###
Implementation from scratch of Backpropagation. The algorithm is tested to train a 2-layer neural network with a sigmoid activation function for the hidden layer, and a softmax output layer. The network learns an encoding of the identity function.

In [1]:
import numpy as np

###Network components###


In [2]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def softmax(x):
  ex = np.exp(x)
  return ex / ex.sum()



class HiddenLayer:
  def __init__(self, input_size, dim, is_last_layer):
    self.W = np.random.uniform(size=(input_size, dim), high=0.05, low=-0.05)
    self.b = np.random.uniform(size=dim, high=0.05, low=-0.05)
    self.af = softmax if is_last_layer else sigmoid
    self.h = None
    self.is_last_layer = is_last_layer
    self.last_input = None
    self.Delta_W = np.zeros_like(self.W)
    self.Delta_b = np.zeros_like(self.b)

  def compute_neurons_activations(self, input):
    h = self.af(np.dot(input, self.W) + self.b)
    self.last_input = input
    self.h = h
    return h

  def compute_gradients(self, t_i, return_delta):
    if self.is_last_layer:
      error = self.h - t_i
    else:
      error = self.h * (1 - self.h) * t_i
    self.Delta_W += np.outer(self.last_input, error)
    self.Delta_b += error
    return (self.W * error).sum(axis=1) if return_delta else None

  def update_weights(self, lr):
    self.W += (-lr * self.Delta_W)
    self.b += (-lr * self.Delta_b)
    self.Delta_W.fill(0)
    self.Delta_b.fill(0)


In [3]:
def train(network, X, y, lr, steps):
  S = list(zip(X,y))
  print('Learning started')
  for _ in range(steps):
    for x,t in S:
      #Forward phase
      h = x
      for hl in network:
        h = hl.compute_neurons_activations(h)
      #Backpropagation
      delta = network[-1].compute_gradients(t, True)
      for i in reversed(range(len(network)-1)):
        hl = network[i]
        delta = hl.compute_gradients(delta, i != 0)
      #Update weights
      for hl in network:
        hl.update_weights(lr)
  return network


In [4]:
np.random.seed(42)

X = np.identity(8)
y = np.identity(8)

h1 = HiddenLayer(8,3, False)
out = HiddenLayer(3, 8, True)
network = [h1,out]

network = train(network,X,y,0.2,6000)
print('Input\t\t\t\t\t', 'Hidden Values\t\t\t\t','Output')
for x in X:
  h = x
  activations = []
  for hl in network:
    h = hl.compute_neurons_activations(h)
    activations.append(h)
  y = activations[0]
  z = activations[-1]
  print(x, '\t\t'+ str([round(y_i,2) for y_i in y])+'\t\t\t', [0 if z_i <0.5 else 1 for z_i in z])




Learning started
Input					 Hidden Values				 Output
[1. 0. 0. 0. 0. 0. 0. 0.] 		[0.15, 0.99, 0.99]			 [1, 0, 0, 0, 0, 0, 0, 0]
[0. 1. 0. 0. 0. 0. 0. 0.] 		[0.98, 0.02, 0.01]			 [0, 1, 0, 0, 0, 0, 0, 0]
[0. 0. 1. 0. 0. 0. 0. 0.] 		[0.0, 0.98, 0.06]			 [0, 0, 1, 0, 0, 0, 0, 0]
[0. 0. 0. 1. 0. 0. 0. 0.] 		[0.99, 0.81, 0.89]			 [0, 0, 0, 1, 0, 0, 0, 0]
[0. 0. 0. 0. 1. 0. 0. 0.] 		[0.03, 0.02, 0.01]			 [0, 0, 0, 0, 1, 0, 0, 0]
[0. 0. 0. 0. 0. 1. 0. 0.] 		[0.01, 0.11, 0.97]			 [0, 0, 0, 0, 0, 1, 0, 0]
[0. 0. 0. 0. 0. 0. 1. 0.] 		[0.9, 0.99, 0.01]			 [0, 0, 0, 0, 0, 0, 1, 0]
[0. 0. 0. 0. 0. 0. 0. 1.] 		[0.89, 0.01, 0.99]			 [0, 0, 0, 0, 0, 0, 0, 1]
