In [None]:
import mnist
import numpy as np

In [None]:
class ConvLayer:
    """
    Convolution Layer with filters having forward and backward pass operations.
    """
    def __init__(self, num_filters):
        """
         num_fileters: total number of filters
         filters: 3d array with dimentions (num_filters, 3, 3)
        """
        self.num_filters = num_filters
        self.filters = np.random.randn(num_filters, 3, 3) / 9
        
    def get_regions(self, image):
        """
        # image: matrix of image
        # getting regions for filters to convolve
        """
        h, w = image.shape
        regions_list = []
        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + 3), j:(j + 3)]
                regions_list.append([im_region,i,j])
        return regions_list
        
                
    def forward(self, input):
        """
        Forward pass for convolution layer
        input: input matrix
        return: output after convolution operation
        """
        
        self.last_input = input
        h, w = input.shape
        output = np.zeros((h - 2, w - 2, self.num_filters))
        regions_list = self.get_regions(input)
        for im_region, i, j in regions_list:
            output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))
            
        return output
    
    def backprop(self, d_out, lr=0.001):
        """
        d_out: gradient loss
        lr: learning rate
        return: None
        """
        d_filters = np.zeros(self.filters.shape)
        regions_list = self.get_regions(self.last_input)
        for im_region, i, j in regions_list:
            for f in range(self.num_filters):
                d_filters[f] += d_out[i, j, f] * im_region
                
        # Update filters
        self.filters -= lr * d_filters
        return None

In [None]:
class Sigmoid:
    """
    class for calculating forward and backward pass 
    with sigmoid as activation function
    """
    def __init__(self):
        self.lastf = None
        
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def forward(self,x):
        self.last = self.sigmoid(x)
        return self.last
    
    def backprop(self,x):
        sig = self.sigmoid(self.last)
        return x * sig * (1 - sig)

In [None]:
class Relu:
    """
    class for calculating forward and backward pass 
    with Relu as activation function
    """
    def __init__(self):
        self.lastf = None
    
    def forward(self,x):
        self.lastf = x
        return np.maximum(0,x)
    
    def backprop(self,x):
        dZ = np.array(self.lastf, copy = True)
        dZ[x <= 0] = 0;
        return dZ;

In [None]:
class MaxPool:
    """
    Maxpool: decrease size of input matrix by half. Picks up maximum val in window.
    """
    def __init__(self):
        pass
    
    def get_regions(self, image):
        '''
        Generates image regions image regions to pool over.
        '''
        h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2
        
        regions_list = []
        
        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                regions_list.append([im_region,i,j])
                
        return regions_list
        

    def forward(self, input):
        '''
        Performs a forward pass of the maxpool layer using the given input.
        Returns a 3d numpy array with dimensions (h / 2, w / 2, num_filters).
        input is a 3d numpy array with dimensions (h, w, num_filters)
        '''
        
        self.last_input = input
        h, w, num_filters = input.shape
        output = np.zeros((h // 2, w // 2, num_filters))
        
        regions_list = self.get_regions(input)
        for im_region, i, j in regions_list:
            output[i, j] = np.amax(im_region, axis=(0, 1))
        
        return output
    
    def backprop(self, d_out):
        """
        backprop
        d_out: incoming gradient loss
        return: outgoing gradient loss
        """
        
        d_input = np.zeros(self.last_input.shape)
        regions_list = self.get_regions(self.last_input)
        for im_region, i, j in regions_list:
            h, w, f = im_region.shape
            amax = np.amax(im_region, axis=(0, 1))
            
            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                        if im_region[i2, j2, f2] == amax[f2]:
                            d_input[i + i2, j + j2, f2] = d_out[i, j, f2]
                            
        return d_input

In [None]:
class Dense:
    def __init__(self,n,input_len):
        """
        n: number of neurons
        input_len: length of input
        """
        self.neurons = n
        self.weights = np.random.randn(input_len, n) / input_len
        self.biases = np.zeros(n)
        self.last = None
        self.total = None
        self.lastinput = None
        self.lastinputshape = None
        
    def forward(self,input):
        """
        input: matrix
        o/p: sigmoid(wx+b)
        """
        self.lastinputshape = input.shape
        input = input.flatten()
        self.lastinput = input
        z1 = np.dot(self.weights.T,input) + self.biases
        
        self.total = z1
        a1 = 1/(1+np.exp(-z1))
        self.last = a1
        return a1

    def sigmoid(self,x):
        return 1/(1+np.exp(-x))

    def backprop(self,gradient,lr):
        """
        gradient: gradient loss from following layers
        lr: learning rate
        o/p: loss to be propagated
        """
        z1 = self.total
        derivative = self.sigmoid(z1)*(1-self.sigmoid(z1))
        dtd= derivative*gradient
        # print(derivative.shape)
        # print(self.lastinput.shape)
        
        dldw = np.matmul(self.lastinput[np.newaxis].T,dtd[np.newaxis])
        # print(dldw.shape)
        # print(dldw)
        
        dldb = dtd
        dlinput = self.weights
        
        dldinp=np.matmul(self.weights,dtd)
        
        # updating weights and biases
        # w = w - lr*d/dw
        # b = b - lr*d/db
        self.weights -= lr*dldw
        self.biases -= lr*dldb
        
        return dldinp.reshape(self.lastinputshape)

In [None]:
class Softmax:
    """
    Dense Layer with Softmax activation function
    """
    
    def __init__(self, input_len,n):
        """
        n: number of neurons
        input_len: input size
        """
        self.weights = np.random.randn(input_len, n) / input_len
        self.biases = np.zeros(n)

    def forward(self, input):
        '''
        Performs a forward pass of the softmax layer using the given input.
        Returns a 1d numpy array containing the respective probability values.
        - input can be any array with any dimensions.
        '''
        self.last_input_shape = input.shape
        self.last_input = input
        input_len, nodes = self.weights.shape
        z2 = np.dot(input, self.weights) + self.biases
        self.last_totals = z2
        
        # activation function : softmax
        exp = np.exp(z2)
        return exp / np.sum(exp, axis=0)
    
    def backprop(self, d_out, lr):
        """
        d_out: loss from following layers
        lr: learning rate
        o/p: loss to be propagated
        """
        for i, gradient in enumerate(d_out):
            if gradient == 0:
                continue
            t_exp = np.exp(self.last_totals)
        
            # Sum of all e^totals
            S = np.sum(t_exp)
            
            # changing only set value
            d_outdz = -t_exp[i] * t_exp / (S ** 2)
            # change the value of k == c
            d_outdz[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)
        
            #derivative 
            d_outdw = self.last_input 
            d_outdb = 1
            d_outdinp = self.weights
        
            # Gradients of loss against totals
            dg = gradient * d_outdz
        
            d_L_d_w = np.matmul(d_outdw[np.newaxis].T , dg[np.newaxis])
            d_L_d_b = dg * d_outdb
            d_L_d_inputs = np.matmul( d_outdinp , dg)
        
            # Update weights / biases
            self.weights -= (lr * d_L_d_w)
            self.biases -= (lr * d_L_d_b)
        
            # it will be used in previous pooling layer
            # reshape into that matrix
            return d_L_d_inputs.reshape(self.last_input_shape)

In [None]:
# We only use the first 1k examples of each set in the interest of time.
# Feel free to change this if you want.
train_images = mnist.train_images()[:1000]
train_labels = mnist.train_labels()[:1000]
test_images = mnist.test_images()[:1000]
test_labels = mnist.test_labels()[:1000]

In [None]:
conv = ConvLayer(8)                    # 28x28x1 -> 26x26x8
pool = MaxPool()                    # 26x26x8 -> 13x13x8
dense = Dense(20,13 * 13 * 8)
softmax = Softmax(20, 10)    # 13x13x8 -> 10
act = Sigmoid()

In [None]:
def forward(image, label):
    '''
    Completes a forward pass of the CNN and calculates the accuracy and
    cross-entropy loss.
    - image is a 2d numpy array
    - label is a digit
    '''
    out = conv.forward((image / 255))
    out = act.forward(out)
    out = pool.forward(out)
    out = dense.forward(out)
    out = softmax.forward(out)
    
    # Calculate cross-entropy loss 
    loss = -np.log(out[label])
    acc = 1 if np.argmax(out) == label else 0

    return out, loss, acc

In [None]:
def backward(gradient,lr):
    """
    Backward Controller for entire model
    gradient: loss
    lr: learning rate
    """
    gradient = softmax.backprop(gradient, lr)
    gradient = dense.backprop(gradient,lr)
    gradient = pool.backprop(gradient)
    gradient = act.backprop(gradient) 
    gradient = conv.backprop(gradient, lr)

In [None]:

def train(im, label, lr=0.01):
    """
    Controller for forward , backward and loss of entire model
    im: image matrix
    label: label/class of image
    lr: learning rate
    """
    out, loss, acc = forward(im, label)
    gradient = np.zeros(10)
    gradient[label] = -1 / out[label]
    backward(gradient,lr)
    return loss, acc

In [None]:
for epoch in range(100):
    print('--- Epoch %d ---' % (epoch + 1))    
    
    loss = 0
    num_correct = 0
    for i, (im, label) in enumerate(zip(train_images, train_labels)):
        l, acc = train(im, label)
        loss += l
        num_correct += acc
    print(loss/1000,num_correct/1000)

In [None]:
# Test the CNN
print('\n--- Testing the CNN ---')
loss = 0

num_correct = 0
for im, label in zip(test_images, test_labels):
    _, l, acc = forward(im, label)
    loss += l
    num_correct += acc

num_tests = len(test_images)
print('Test Loss:', loss / num_tests)
print('Test Accuracy:', num_correct / num_tests)

In [None]:
import gc 
gc.collect()

In [None]:
##### TESTING ############
# img = None
# for i,j in enumerate(train_images):
#     img = j
#     if i==1:
#         break
# test = forward(img,0)
# # ss = np.array([1.0]*1352)
# d = Dense(30,13*13*8)
# dd = d.forward(test)
# dd.shape
# ss = np.array([1.0]*30)
# out = d.backprop(ss,0.1)
# print(out)
# print(out.shape)