# Convolutional Neural Network
Spencer Hann
EE 584 - Final Project

In [1]:
import numpy as np

from matplotlib import pyplot as plt
from cnn.data import preprocess_data

In [2]:
training_examples, training_targets = \
    preprocess_data("data/mnist_train.csv", max_rows=2000)
testing_examples, testing_targets = \
    preprocess_data("data/mnist_test.csv", max_rows=1000)

data = (training_examples, training_targets, testing_examples, testing_targets)
train_set = (training_examples, training_targets)
test_set = (testing_examples, testing_targets)

loading from file... done.
loading from file... done.


## Proof of Concept: Simple Neural Net Layer

In [3]:
from cnn.cnn_basic import CNN, Layer, DenseSoftmaxLayer

In [4]:
class DenseLayer:
    def __init__(self, insize, outsize=10):
        self.insize = insize
        self.outsize = outsize

        self.w = np.random.randn(insize, outsize) / insize
        self.b = np.random.randn(outsize) / outsize

    def forward(self, image):
        image = image.flatten()

        # fully-connected/matmul phase
        result = np.dot(image, self.w) + self.b
        return result


In [5]:
layer1 = DenseLayer(28*28, 64)
layer2 = DenseLayer(64, 10)

In [6]:
def forward(image, label):
    middle = layer1.forward(image)
    out = layer2.forward(middle)
    
    is_correct = np.argmax(out) == label
    return None, is_correct

In [7]:
def test(images, labels):
    n_correct = 0.0
    for image, label in zip(images, labels):
        _, c = forward(image, label)
        n_correct += c
    return n_correct / len(images)

In [8]:
accuracy = 100 * test(*test_set)  # should be about 1 / n_classes
print(f"Accuracy: {round(accuracy)}%")

Accuracy: 9.0%


## Adding Back Propagation

In [9]:
class DenseLayer:
    def __init__(self, insize, outsize=10):
        self.insize = insize
        self.outsize = outsize

        self.w = np.random.randn(insize, outsize) / insize
        self.b = np.random.randn(outsize) / outsize

    def forward(self, image):
        self.last_image = image        # <<----
        image = image.flatten()

        # fully-connected/matmul phase
        fc = np.dot(image, self.w) + self.b
        self.last_fc = fc              # <<----

        return fc

    def backprop(self, loss_grad, lr=0.002):
        # output gradients wrt input, biases, weights
        ograd_input = self.w
        ograd_biases = 1
        ograd_weights = self.last_image.flatten()

        # loss gradients wrt input, biases, weights
        lgrad_input = ograd_input @ loss_grad
        lgrad_biases = ograd_biases * loss_grad
        lgrad_weights = ograd_weights[:,np.newaxis] @ loss_grad[np.newaxis]

        # update layer
        self.w += lr * lgrad_weights
        self.b += lr * lgrad_biases
        return lgrad_input.reshape(self.last_image.shape)


In [10]:
layer1 = DenseLayer(28*28, 64)
layer2 = DenseLayer(64, 10)
layers = (layer1, layer2,)

In [11]:
def forward(image, label):
    out = image
    for layer in layers:
        out = layer.forward(out)
    
    is_correct = np.argmax(out) == label
    loss = -np.log(out[label])           # <<------
    
    return out, loss, is_correct

In [12]:
def learn(image, label):
    out, loss, correct = forward(image, label)
    
    if correct:
        return loss, correct

    # cross entropy gradient
    grad = np.zeros(10)
    grad[label] = - 1 / out[label]

    for layer in layers[::-1]:
        grad = layer.backprop(grad, lr=0.02)

    return loss, correct

In [13]:
def train(n_epochs, images, labels):
    n = len(images)
    
    for epoch in range(n_epochs):
        ncorrect = 0
        for image, label in zip(images, labels):
            _, c = learn(image, label)
            ncorrect += c

        accuracy = round(100 * ncorrect / n)
        print(f"Epoch:{epoch}, Accuracy: {100 * ncorrect / n}")

## Demonstration of Forward propogation

This is a randomly initialized network that we will test the feed forward functionality on.  With 10 evenly represented output classes in our testing data, we expect to see roughly 10% accuracy.

In [14]:
from cnn.cnn_basic import (
    CNN, 
    ConvolutionalLayer, 
    MaxPoolingLayer, 
    DenseSoftmaxLayer,
)

In [15]:
cnn = CNN((
    ConvolutionalLayer(1,3,3, first_layer=True),
    MaxPoolingLayer(2),
    ConvolutionalLayer(3,1,3,),
    DenseSoftmaxLayer(14*14, 10),
))

In [16]:
cnn.test(*test_set);

100%|██████████| 1000/1000 [00:08<00:00, 112.58it/s]

Test: 2.31 loss, 11.00% accurate





Approximately random perfomance, this is to be expected.  It shows that feed forward is working properly.

## Demonstration of Back Propogation

In [17]:
cnn = CNN(
    (
        ConvolutionalLayer(1,3,3, first_layer=True),
        DenseSoftmaxLayer(28*28*3, 10),
    ),
    lr = 0.01
)

In [18]:
%time cnn.train_epochs(6, *train_set);

100%|██████████| 2000/2000 [01:21<00:00, 24.45it/s]
  0%|          | 2/2000 [00:00<01:52, 17.83it/s]

Epoch 0/6: 1.82 loss, 51.20% accurate


100%|██████████| 2000/2000 [01:23<00:00, 23.85it/s]
  0%|          | 2/2000 [00:00<01:40, 19.95it/s]

Epoch 1/6: 1.31 loss, 60.25% accurate


100%|██████████| 2000/2000 [01:25<00:00, 23.27it/s]
  0%|          | 3/2000 [00:00<01:33, 21.39it/s]

Epoch 2/6: 1.31 loss, 60.75% accurate


100%|██████████| 2000/2000 [01:30<00:00, 22.08it/s]
  0%|          | 2/2000 [00:00<02:00, 16.58it/s]

Epoch 3/6: 1.37 loss, 59.95% accurate


100%|██████████| 2000/2000 [01:46<00:00, 18.74it/s]
  0%|          | 3/2000 [00:00<01:32, 21.54it/s]

Epoch 4/6: 1.43 loss, 59.25% accurate


100%|██████████| 2000/2000 [01:25<00:00, 23.37it/s]

Epoch 5/6: 1.48 loss, 58.95% accurate
CPU times: user 19min 15s, sys: 12min 30s, total: 31min 46s
Wall time: 8min 54s





In [19]:
cnn.test(*test_set);

100%|██████████| 1000/1000 [00:13<00:00, 73.38it/s]

Test: 2.07 loss, 44.80% accurate





Though this is significantly better than random, it is still not as high as I'd like it to be.  I believe the reason for this is that the network in it current state is relatively low-capacity, and does not contain any non-linearities.