In [None]:
# Define some activation function and their derivative

import math
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

def sigmoid_derivative(x):
  # understand x as sigmoid(y) itself
  return x * (1 - x)

def relu(x):
  return max(0, x)

def relu_derivative(x):
  # understand x as relu(y) itself
  return 1 if x > 0 else 0

def tanh(x):
  return math.tanh(x)

def tanh_derivative(x):
  # understand x as tanh(y) itself
  return 1 - x**2

derivative_functions = {
    'sigmoid': sigmoid_derivative,
    'tanh': tanh_derivative,
    'relu': relu_derivative
}

def dot_product(vec1, vec2):
  return sum(x * y for x, y in zip(vec1, vec2))

def binary_decision(val):
  if val >= 0.5:
    return 1
  return 0

In [None]:
import math
import random

class Neuron:
    def __init__(self,id = None, value = random.random()):
        # id should be a tuple indexing the neuron in each layer
        self.id = id
        self.value = value

# special class of neuron that will always take value 1 and will not get attach to any neuron in the previous layer
class Bias(Neuron):
    def __init__(self,id = None):
        # id should be a tuple indexing the neuron in each layer
        self.id = id
        self.value = 1

class Layer:
    # note: size when input do not count the bias neuron
    # but len of layer do count the bias neuron
    def __init__(self, id, size, activation_func = sigmoid):
        self.id = id
        self.activation_func = activation_func
        # each layer give it a bias neuron first, even for the ouput layer for convenience
        self.neurons = [] # ..................................................................................
        count = 1
        for neuron in range(size):
            self.neurons.append(Neuron((self.id,count)))
            count += 1
        self.neurons.insert(0,Bias((self.id,0)))

    # len do count the bias neuron
    def __len__(self):
        return len(self.neurons)

    # simply perform dot product to use as inputs of next layers
    def forward(self, layer_link):
      val_vec = [node.value for node in layer_link.from_layer.neurons]
      #print(val_vec)
      update_vals = []

      #Interate through the all the list in layer_link.links
      for nodelinks in layer_link.links:
        weight_vec = [link.weight for link in nodelinks]
        #print(weight_vec)
        val = self.activation_func(dot_product(val_vec, weight_vec))
        #print(val)
        update_vals.append(val)

      # update the value for the destination layer, remember to ignore the bias neuron
      for i in range(len(update_vals)):
        layer_link.to_layer.neurons[i+1].value = update_vals[i]

      #print(f"forward push completed for Layer_link: {layer_link.from_layer.id} to {layer_link.to_layer.id} ")


class Link:
    def __init__(self,source,desti,weight = random.random()):
        # source and destination should be neurons
        self.source = source
        self.desti = desti
        self.weight = weight


class LayerLink:
    def __init__(self, from_layer, to_layer, weights = None):
        self.from_layer = from_layer
        self.to_layer = to_layer
        self.links = []
        # the structure of self.links should be consist of list of links from all neurons in from_layer to a particular neuron in to_layer
        # this make it convenient for forward() later
        # remember not to connect with the bias neuron of the next layer
        for i in range(1,len(to_layer)):
            nodelinks = []
            for j in range(len(from_layer)):
                link = Link(from_layer.neurons[j],to_layer.neurons[i])
                nodelinks.append(link)
                #print(f"Create link from {link.source.id} to {link.desti.id}\n")
            self.links.append(nodelinks)
        if weights != None:
          self.set_weights(weights)

    # set weight accordingly to the structure of self.links
    def set_weights(self,list_list_weights):
      for i in range(len(list_list_weights)):
        list_weights = list_list_weights[i]
        for j in range(len(list_weights)):
          self.links[i][j].weight = list_list_weights[i][j]
          #print(f"Value of weight from {self.links[i][j].source.id} to {self.links[i][j].desti.id} is:{list_list_weights[i][j]}\n")


class NeuralNetwork:
    def __init__(self, learn_rate = 0.001):
        # Layer 0,1,2,3...n
        self.layers = []
        self.depth = 0
        # Layer link 0-->1, 1-->2, ..., n-1 -->n
        self.layer_links = []

        self.dlossda = dict()
        self.dlossdw = dict()
        self.learn_rate =  learn_rate

    def add_layer(self, size):
        new_layer = Layer(self.depth,size)
        self.layers.append(new_layer)
        self.depth += 1
        # Input layer dont need to do this
        if self.depth > 1:
          # Hidden layers and output layer
          # this is the layer_link that connect previous layer --> this new layer
          # -2 because remember depth = 1 correspond to layer 0 in the list
          layer_link = LayerLink(self.layers[self.depth-2], new_layer)
          self.layer_links.append(layer_link)

    def get_weight(self,from_layer_id,from_neuron_id,to_neuron_id):
      return self.layer_links[from_layer_id][to_neuron_id][from_neuron_id]

    def set_weight(self,from_layer_id,from_neuron_id,to_neuron_id,val):
      self.layer_links[from_layer_id][to_neuron_id][from_neuron_id] = val

    # forward from the input layer to the output layer. Return the value of output layer
    def forward(self, inputs):
        # check valid input.. -1 because dont count the bias
        if len(inputs) != len(self.layers[0])-1:
            print("invalid input")

        # initialize the input layer neurons with the input
        for i in range(len(inputs)) :
            self.layers[0].neurons[i+1].value = inputs[i] # +1 to ignore the bias

        # invoke the forward function of each layer (except for the last layer)
        for layer in self.layers[:-1]:
            layer.forward(self.layer_links[layer.id])

        # remember to ignore the bias neuron
        output = [neuron.value for neuron in self.layers[-1].neurons[1:]]
        return output

    # notation w,a based on the diagram above
    def back_propagation(self,loss_function, step_size = 0.01):
      self.dlossda = dict() # clear the gradient value)
      self.dlossdw = dict()

      loss_deri = derivative_functions[loss_function]

      final_output = [neuron.value for neuron in self.layers[-1].neurons[1:]]
      dlossdoutput = loss_deri(*final_output) #Passing a List as Arguments
      # assume that the is only one neuron in the output layer, so that dloss/doutput is a number
      self.dlossda[self.layers[-1].neuron[1].id] = dlossdoutput

      # start backprop, calc the derivative of all weight wrt to loss
      for i in range(self.depth,1,-1): # no backprop in the input layer
        for j in range(1,len(self.layers[i])): # no backprop for bias node of to_layer
          for k in range(0,len(self.layers[i-1])): # iterate for from_layer neurons
            self.dlossdw[(i-1,k,j)] = self.dlossda[(i,j)] * derivative_functions[self.layers[i].activation_func](self.layers[i-1].neurons[k].value)*self.layers[i].neurons[j].value

            # calc dloss/da also to calc gradient of next layer
            if (i-1,k) in self.dlossda:
              # If the key exists, increment its value
              self.dlossda[(i-1,k)] += self.dlossda[(i,j)] * derivative_functions[self.layers[i].activation_func](self.layers[i-1].neurons[k].value)*self.get_weight(i-1,k,j)
            else:
              # If the key does not exist, initialize it
              self.dlossda[(i-1,k)]  = self.dlossda[(i,j)] * derivative_functions[self.layers[i].activation_func](self.layers[i-1].neurons[k].value)*self.get_weight(i-1,k,j)


      # start updating weights (once for each weight)
      for key in self.dlossdw.keys():
        self.set_weight(*key, self.get_weight(*key) + self.dlossdw[key]* self.learning_rate)
