In [1]:
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


In [2]:
from __future__ import print_function
import numpy as np ## For numerical python
np.random.seed(42)

In [3]:
# every layer will have a forward pass and backpass implementation. Let's create a main class layer which 
# can do a forward pass .forward() and backward pass .backward()
class Layer:
  # A building block. Each layer is capable of performing two things:
  # - process input to get output --> output = layer.forward(input)
  # - propagate gradients through itself ---> grad_input = layer.backward(input, grad_output)
  # Some layers also have learnable parameters which they update during layer.backward()

  def __init__(self):
    # Here we can initialize layer parameters (if any) and auxiliary stuff
    # A dummy stuff does nothing
    pass

  def forward(self, input):
    # Takes input data of shape [batch, input_units], returns output data [batch, output_units]
    # A dummy layer just returns whatever it gets as input
    return input

  def backward(self, input, grad_output):
    # Performs a backpropagation step through the layer wrt the given input 
    # to compute loss gradients wrt input, we need to apply chain rule (backprop):
    # d loss / d x = (d loss / d layer) * (d layer / d x)
    # Luckily, we already receive "d loss / d layer" as input, so you only need to multiply it by d layer/ d x
    # If our layer has parameters (e.g. dense layers), we also need to update them here using d loss / d layer
    # The gradient of a dummy layer is precisely  grad_output, but we'll write it more explicitly
    num_units = input.shape[1]
    d_layer_d_input = np.eye(num_units)
    return np.dot(grad_output, d_layer_d_input) # chain rule

In [4]:
class ReLU(Layer):
  def __init__(self):
    # ReLU layer simply applies elementwise rectified linear unit to all inputs
    pass

  def forward(self, input):
    # applies elementwise ReLU to [batch, input_units] matrix
    relu_forward = np.maximum(0,input)
    return relu_forward

  def backward(self, input, grad_output):
    # compute gradient of loss wrt ReLU input
    relu_grad = input > 0
    return grad_output * relu_grad

In [None]:
class Dense(Layer):
  def __init__(self, input_units, output_units, lr=0.1):
    # a dense layer is a layer which performs a learned affine transformation
    # f(x) = <W*x>  + b
    self.lr = lr
    self.weights = np.random.normal(loc=0.0, scale=np.sqrt(2/(input_units + output_units)), size=(input_units,output_units))
    self.biases = np.zeros(output_units)

  def forward(self, input):
    # perform an affine transformation. f(x) = <W*x>  + b
    # input shape: [batch, input_units]
    # output shape: [batch, output_units]
    return np.dot(input, self.weights) + self.biases

  def backwards(self, input, grad_output):
    # compute d f / d x = d f / d dense * d dense / d x where d dense / d x = weights transposed
    grad_input = np.dot(grad_output, self.weights.T)

    # compute gradient wrt weights and biases
    grad_weights = np.dot(input.T, grad_output)