In [1]:
import numpy as np
from scipy import signal

In [2]:
class Layer:
    def __init__(self):
        self.input = None
        self.output = None
    def forward(self, input):
        # PURPOSE: return output
        pass
    def backward(self, output_gradient, learning_rate): # output_gradient = dE/dY ; learning_rate = alpha
        # PURPOSE: update parameters and return input gradient (dE/dX)
        pass

In [3]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size)
        self.biases = np.random.randn(output_size, 1) 

    def forward(self, input):
        self.input = input 
        return np.dot(self.weights, self.input) + self.biases
    
    def backward(self, output_gradient, learning_rate):
        weight_gradient = np.dot(weight_gradient, self.input.T)
        self.weights -= learning_rate * weight_gradient
        self.biases -= learning_rate * output_gradient
        return np.dot(self.weights.T, output_gradient)

In [4]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime
    
    def forward(self, input):
        self.input = input
        return self.activation(input)
    
    def backward(self, output_gradient, learning_rate):
        return np.multiply(output_gradient, self.activation_prime(self.input))

In [5]:
class Tanh(Activation):
    def __init__(self):
        tanh = lambda x : np.tanh(x)
        tanh_prime = lambda x : 1 - (np.tanh(x) ** 2)
        super().__init__(tanh, tanh_prime)

In [6]:
class Convolutional(Layer):
    def __init__(self, input_shape, kernel_size, depth): 
        # Lets say input is a RGB image of dimension 24 pixels (height) * 24 pixels (width)
        # So input_shape is 3 * 24 * 24
        # For simplicity, let's consider kernel_size is 2 * 2
        # Depth is the number of kernels we're using in each layer of the kernel
        input_depth, input_height, input_width = input_shape
        self.depth = depth
        self.input_shape = input_shape
        self.input_depth = input_depth
        # Now output shape can be calculated using the formula Y = I - K + 1 on height and width of the input shape
        # Here Y is the shape of output matrix, I is the shape of input matrix and K is the shape of kernel
        self.output_shape = (depth, input_height - kernel_size + 1, input_width - kernel_size +1)
        # Lets say there are 2 layers of kernel and we know the depth of input image (input_depth) is 3, then depth is 2
        # Then the overall kernel shape is dept(2) * input_depth (3) * kernel_size(2*2) * kernel_size(2*2)
        self.kernels_shape = (depth, input_depth, kernel_size, kernel_size)
        self.kernels = np.random.randn(*self.kernels_shape)
        self.biases = np.random.randn(*self.output_shape)

    # forward propagation parameters
    ## input: input matrix / image
    def forward(self, input):
        self.input = input
        self.output = np.copy(self.biases)
        for i in range(self.depth): # In this case 2 (number of kernel layers)
            for j in range(self.input_depth): # In this case 3 (number of color channels)
                self.output[i] = signal.correlate2d(self.input[j], self.kernels[i,j], "valid")
        return self.output
    
    # backward progagation parameters
    ## output_gradient: differential of error W.R.T output matrix
    ## learning_rate: rate of updation of the input_gradient (how big of a step it is taking to reach global minima)
    def backward(self, output_gradient, learning_rate):
        # Lets assume input kernel is a null matrix
        kernel_gradient = np.zeros(self.kernels_shape)
        # Lets assume that the initial value of input gradient, i.e the amount of correction made to the value of input to be a null matrix
        input_gradient = np.zeros(self.input_shape)

        for i in range(self.depth): # In this case 2 (number of kernel layers)
            for j in range(self.input_depth): # In this case 3 (number of color channels)
                # kernel gradient is calculated as cross-correlation of input matrix and output gradient (refer notes to understand how we arrived at this formula)
                kernel_gradient[i,j] = signal.correlate2d(self.input[j], output_gradient[i], "valid")
                #input gradient is calculated as convolution of output gradient and kernel (refer notes to understand how we arrived at this formula)
                input_gradient[j] = signal.convolve2d(output_gradient[i], self.kernels[i,j],"full")
        
        # kernels and biases has to be updated as computed above according to the learning rate provided
        self.kernels -= learning_rate * kernel_gradient
        self.biases -= learning_rate * output_gradient

        # input_gradient returned in the process of back propagation is used to update the value of input_gradient (error correction)
        return input_gradient 

In [7]:
# Refer notes to see why reshape is used
class Reshape(Layer):
    def __init__(self, input_shape, output_shape):
        self.input_shape = input_shape
        self.output_shape = output_shape
    
    def forward(self, input):
        return np.reshape(input, self.output_shape)
    
    def backward(self, output_gradient, learning_rate):
        return np.reshape(output_gradient, self.input_shape)

In [8]:
# Refer notes to see why binary cross entropy is used and how we arrived at these formulas
def binary_cross_entropy(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1-y_true) * np.log(y_pred))

def binary_cross_entropy_prime(y_true, y_pred):
    return ((1 - y_true) / (1 - y_pred) - (y_true / y_pred)) / np.size(y_true)

In [9]:
class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))
        
        # Refer notes to see how we calculated derivative of sigmoid function
        def sigmoid_prime(x):
            return sigmoid(x) * (1 - sigmoid(x))
        
        super().__init__(sigmoid, sigmoid_prime)