# A single convolution network

In [1]:
# Imports
import os
import numpy as np
from matplotlib import pyplot as plt 
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score
from copy import deepcopy

## The ConvNet

In [3]:
class Relu:
    @staticmethod
    def activation(z):
        z[z < 0] = 0
        return z
    
    @staticmethod
    def derivative(z):
        z[z < 0] = 0
        z[z > 0] = 1
        return z
        
class Sigmoid:
    @staticmethod
    def activation(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def derivative(z):
        return Sigmoid.activation(z) * (1 - Sigmoid.activation(z))
    
class MSE:
    def __init__(self, activation_fn=None):
        self.activation_fn = activation_fn
            
    def activation(self, z):
        return self.activation_fn.activation(z)

    @staticmethod
    def loss(y_true, y_pred):
        return np.mean((y_pred - y_true)**2)

    @staticmethod
    def derivative(y_true, y_pred):
        return y_pred - y_true

    def delta(self, y_true, y_pred):
        return self.derivative(y_true, y_pred) * self.activation_fn.derivative(y_pred)

In [5]:
class Conv:
    def __init__(self, kernel_size, pad=0, stride=1):
        self.kernel_size = kernel_size
        self.filter = np.random.rand(self.kernel_size, self.kernel_size)
        self.cache = None
        self.pad = pad
        self.stride = stride
        
    def forward(self, x):
        '''Forward pass with input x'''
        
        # Compute the output size assuming stride=1 and no padding
        (h_x, w_x) = X.shape
        h_o = int(1  + (h_x  +  2  *  self.pad  -  self.kernel_size))
        w_o = int(1  + (w_x  +  2  *  self.pad  -  self.kernel_size))
        
        # Zero pad x
        xp = np.pad(x , ((pad,), (pad,)), 'constant')

        # Initialize the output with zeros        
        out = np.zeros((h_o, w_o))
    
        # Convolution
        for  i  in  range (h_o):
            for  j  in  range (w_o):
                for  k  in  range (self.kernel_size):
                    for  l  in  range (self.kernel_size):
                        out[i , j] +=  xp[self.stride * i + k , self.stride * j + l] *  self.filter[k , l]

        # Saving information in 'cache' for backprop
        self.cache = x
        return out
    
    def backward(self, dout):
        '''Backward pass with input as the gradient'''
        
        # Retrieving information from the "cache"
        x = self.cache
        (h, w) = x.shape

        # Initializing dx, dw with the correct shapes
        dx = np.zeros_like(x)
        df = np.zeros_like(self.filter)
        
        # Retrieving dimensions from dout's shape
        (h_o, w_o) = dout.shape

        xp = np.pad(x, ((0,), (0,), (pad,), (pad, )), 'constant')
        
        # Calculate dw
        for i in range(self.kernel_size):
            for j in range(self.kernel_size):
                for k in range():
                    for l in range(W_):
                        df[i,j] += xp[self.stride * i + k, self.stride * j + l] * dout[k, l]
                        
        doutp = np.pad(dout, ((self.kernel_size - 1,), (self.kernel_size - 1, )), 'constant')
        dxp = np.pad(dx, ((pad,), (pad, )), 'constant')
        
        # Inverse the filter
        f_ = np.zeros_like(self.filter)
        for i in range(self.kernel_size):
            for j in range(self.kernel_size):
                f_[i,j] = self.filter[self.kernel_size - i - 1, self.kernel_size - j - 1]

        # Calculate dx
        for i in range(h + 2 * self.pad): 
            for j in range(w + 2 * self.pad):
                for k in range(HH):
                    for l in range(WW):
                        dxp[i,j] += doutp[i + k, j + l] * f_[k, l]
        
        #Remove padding for dx
        dx = dxp[pad:-pad, pad:-pad]
        
        self.update(df, learning_rate)
        return dx, df
    
    def update(self, df, learning_rate):
        self.filter -= learning_rate * df