In [1]:
import numpy as np
from abc import ABC, abstractmethod 

In [2]:
def sumit(A, B):
    return np.sum(A * B)

def conv(A, K):
    dima = A.shape[0]
    dimk = K.shape[0]
    dimz = dima - dimk + 1
    Z = np.zeros((dimz, dimz))
    for i in range(dimz):
        for j in range(dimz):
            Z[i,j] = sumit(A[i:i+dimk, j:j+dimk], K) 
    return Z

In [3]:
def relU(x):
    return x if x >= 0 else 0

def relU_prime(x):
    return 1 if x >= 0 else 0

def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))


In [4]:
class Layer(ABC): 

    @property
    def size(self):
        pass

    @abstractmethod
    def feed_forward(self, A):
        pass

    @abstractmethod
    def backprop(self, dA):
        """
        Given the gradient dL/dA for this layer, return the 
        gradient dL/dA for the layer below
        """
        pass


class ConvLayer(Layer):
    def __init__(self, input_size, filter_size, g=relU, dg=relU_prime):
        self.K = np.random.rand(filter_size, filter_size)
        self.b = 1
        self.g = np.vectorize(g)
        self.dg = np.vectorize(dg)
        self._output_size = input_size - filter_size + 1
        self._size = (input_size - filter_size + 1) ** 2; 

    @property
    def size(self):
        return self._size
    
    @size.setter
    def size(self, value):
        self._size = value

    def feed_forward(self, A):
        self.Z = conv(A, self.K) + self.b
        self.A = self.g(self.Z)
        return self.A
    
    def grad_k(self, A, DZ):
        return conv(A, DZ)
    
    def backprop(self, dA):
        dimk = self.K.shape[0]
        dZ = dA.reshape(self._output_size, self._output_size) * self.dg(self.Z)
        return conv(np.pad(dZ, dimk-1), np.flip(self.K))
    
    def incoming_layer(self):
        return self._incoming_layer

class DenseLayer(Layer):
    def __init__(self, input_size, size, g = sigmoid, dg = sigmoid_prime):
        self.B = np.random.rand(size)
        self.W = np.random.randn(size, input_size)
        self._size = size
        self.dg = dg
        self.g = g

    @property
    def size(self):
        return self._size
    
    @size.setter
    def size(self, value):
        self._size = value

    def feed_forward(self, A):
        self.Z = self.W @ A.flatten() + self.B
        self.A = self.g(self.Z)
        return self.A

    def dLdA(self, dZ):
        return self.K @ dZ

    def backprop(self, dA):
        dZ = dA * self.dg(self.Z)
        return (self.W.T @ dZ)
    
    def incoming_layer(self):
        return self._incoming_layer


class InputLayer(Layer):
    def __init__(self, size):
        self._size = size

    @property
    def size(self):
        return self._size
    
    @size.setter
    def size(self, value):
        self._size = value

    def feed_forward(self, A):
        self.A = A
        return self.A
    
    def backprop(self, dA):
        pass

In [5]:
layer1 = InputLayer(size = 5 * 5)
layer2 = ConvLayer(input_size=5, filter_size=2)
layer3 = DenseLayer(input_size=layer2.size, size=4)
layer4 = DenseLayer(input_size=layer3.size, size=5)

model = [layer1, layer2, layer3, layer4]

a = np.arange(5 * 5).reshape(5,5)
for layer in model:
    a = layer.feed_forward(a)
    print(a)

dA = np.random.randn(5)
for layer in reversed(model):
    print(f"backprop", layer)
    dA = layer.backprop(dA)


[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]
[[ 4.04489608  6.22456405  8.40423201 10.58389998]
 [14.94323591 17.12290388 19.30257184 21.48223981]
 [25.84157574 28.02124371 30.20091167 32.38057964]
 [36.73991557 38.91958354 41.0992515  43.27891947]]
[1. 1. 1. 1.]
[0.73458053 0.56788047 0.11487934 0.66927916 0.44748707]
backprop <__main__.DenseLayer object at 0x1092c3f10>
backprop <__main__.DenseLayer object at 0x1092c3f70>
backprop <__main__.ConvLayer object at 0x1092c3160>
backprop <__main__.InputLayer object at 0x1092c30d0>
