In [7]:
import numpy as np


## Activation function

In [8]:
# The ReLu activation function
def ReLu_forward(x):
    return np.where(x > 0, x, 0)

# The partial differential of the ReLu activation function
def ReLu_diff(x):
    return np.where(x > 0, 1, 0)

## Simple neural network

In [9]:
class Simple_network(object):
    def __init__(self): 
        # The weights for the 3 layers, the first column is the bias
        self.w1 = np.array([[-0.1, 0.7],
                            [0.3,-0.2]])
        
        self.w2 = np.array([[-0.2, 1.1, 0.2],
                            [-0.1, 0.5, -0.3]])
        
        self.w3 = np.array([[0.1, 0.5, 2.3]])
        
        
    def forward(self, x):
        # Forward propagation of the network, input -> output
        # First layer
        bias_in  = np.array([[1]]) # The bias wegiths are multiplied by 1
        self.x_b = np.vstack([bias_in, x]) # add the bias term to the input vector
        self.b1 = np.matmul(self.w1, self.x_b)  # Calculate the sum of the input times the weights
        a1 = ReLu_forward(self.b1) # the activation funciton 
        
        # Second layer 
        self.a1_b = np.vstack([bias_in, a1]) # add bisas term to the vector 
        self.b2 = np.matmul(self.w2, self.a1_b) # Calculate the sum of the outpu from previus layer times the weights
        a2 = ReLu_forward(self.b2) # the activation funciton 

        self.a2_b = np.vstack([bias_in, a2]) # add bisas term to the vector
        y_hat = np.matmul(self.w3, self.a2_b) # Calculate the sum of the outpu from previus layer times the weights
        # No activation funciton on the last layer
        return y_hat
        
    def backward(self, part_L_y_hat):
        # Backpropagation to calculate all the gradients. 
        # Third layer
        delta_3 = part_L_y_hat 
        grad_w3 = delta_3*self.a2_b.T 
        
        # Second layer
        delta_2 = np.matmul(self.w3[:,1:].T, delta_3) * ReLu_diff(self.b2)
        grad_w2 = np.matmul(delta_2,self.a1_b.T)
        
        # First layer
        delta_1 = np.matmul(self.w2[:,1:].T, delta_2) * ReLu_diff(self.b1)
        grad_w1 = np.matmul(delta_1,self.x_b.T)
        
        return grad_w1, grad_w2, grad_w3
    
    def update_weights(self, g1, g2, g3, gamma):
        # Use the gradients to update the weights 
        self.w1 = self.w1 - gamma*g1
        self.w2 = self.w2 - gamma*g2
        self.w3 = self.w3 - gamma*g3
        
        

## Forward

In [10]:
net = Simple_network()
x = np.array([[2]])
y_hat = net.forward(x)
print('Prediction by network:',  y_hat[0,0])

Prediction by network: 1.9799999999999998


## Loss

In [11]:
# The ground truth
y = x**2 + 3 
print('Ground truth:', y[0,0])

# The loss function (regression)
L = 1/2*(y-y_hat)**2
print('Loss:', L[0,0])

Ground truth: 7
Loss: 12.600200000000003


## Backward

In [12]:
# Partial derivative of the loss with respect to y_hat
part_L_y_hat = -(y-y_hat)

# Calculate all gradients in the network
g1, g2, g3 = net.backward(part_L_y_hat)
print('Gradients layer 1:')
print(g1)
print('')
print('Gradients layer 2:')
print(g2)
print('')
print('Gradients layer 3:')
print(g3)
print('')

net.update_weights(g1, g2, g3, 0.001)

y_hat = net.forward(x)
print('Updated prediciton', y_hat[0,0])

# The loss function (regression)
L = 1/2*(y-y_hat)**2
print('Loss:', L[0,0])


Gradients layer 1:
[[ -8.534 -17.068]
 [  0.      0.   ]]

Gradients layer 2:
[[ -2.51    -3.263    0.    ]
 [-11.546  -15.0098   0.    ]]

Gradients layer 3:
[[-5.02   -6.1746 -2.761 ]]

Updated prediciton 2.1435048475206897
Loss: 11.792772583027517
