In [1]:
import numpy as np

# Backpropagation

## 1. Gradient of loss with respect to weights

In [2]:
inputs = np.array([[1, 2, 3, 2.5],
                   [2, 5, -1, 2],
                   [-1.5, 2.7, 3.3, -0.8]])

dL_dz = np.array([[1, 1, 1],
                  [2, 2, 2],
                  [3, 3, 3]])

# Gradient of loss with respect to weights
dL_dw = np.dot(inputs.T, dL_dz)
print(dL_dw)

[[ 0.5  0.5  0.5]
 [20.1 20.1 20.1]
 [10.9 10.9 10.9]
 [ 4.1  4.1  4.1]]


## 2. Gradient of loss with respect to biases

In [3]:
# Gradient of loss with respect to 
dL_db = np.sum(dL_dz, axis=0, keepdims=True)
print(dL_db)

[[6 6 6]]


## 3. Gradient of loss with respect to inputs

In [4]:
weights = np.array([[0.2, 0.8, -0.5, 1],
                    [0.5, -0.91, 0.26, -0.5],
                    [-0.26, -0.27, 0.17, 0.87]]).T
dL_dX = np.dot(dL_dz, weights.T)
print(dL_dX)

[[ 0.44 -0.38 -0.07  1.37]
 [ 0.88 -0.76 -0.14  2.74]
 [ 1.32 -1.14 -0.21  4.11]]


## 4. Adding the "backward" method in the Layer_Dense class

In [6]:
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.n_inputs = n_inputs
        self.n_neurons = n_neurons
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases

    def backward(self, dL_dz):
        self.dL_dw = np.dot(self.inputs.T, dL_dz)
        self.dL_db = np.sum(dL_dz, axis=0, keepdims=True)
        self.dL_dX = np.dot(dL_dz, self.weights.T)

## 5. Now adding the "backward" method in the ReLU Activation and backpropagating through the relu activation function

In [12]:
class Activation_ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0, inputs)

    def backward(self, dL_da):
        # Here dL_da is the gradient of L with respect to a. a is the output of the ReLU activation function, a = ReLU(z)
        # We have been given dL_da and we have to find dL_dz (gradient of loss with respect to z), here z is the output of the previous layer which will pass through the activation function
        # So we are basically backpropagating through the relu activation function
        self.dL_dz = dL_da
        self.dL_dz[self.inputs<=0] = 0