# **CNN From Scratch**
In the following notebook, i have created Convolution Neural Networks simply using numpy. CNNs are considered as one of the best architectures for image classification. Let's dive deep into their implementation from scratch.

# 1) Conv Layer:
The following class implements conv layer of 3x3 filters

In [0]:
import numpy as np

class Conv3x3:
  def __init__(self, num_filters):
    self.num_filters = num_filters
    self.filters = np.random.randn(num_filters, 3, 3) / 9

  def iterate_regions(self, image):
    h,w = image.shape

    for i in range (h-2):
      for j in range (w-2):
        im_region = image[i:(i+3), j:(j+3)]
        yield im_region,i,j
    
  def forward (self, input):
    
    self.last_input = input
    h, w = input.shape
    output = np.zeros((h-2,w-2,self.num_filters))

    for im_region, i, j in self.iterate_regions(input):
      output[i, j] = np.sum(im_region * self.filters, axis=(1, 2))
    
    return output
  
  def backprop(self, d_L_d_out, learn_rate):

    d_L_d_filters = np.zeros(self.filters.shape)

    for im_region, i, j in self.iterate_regions(self.last_input):
      for f in range(self.num_filters):
        d_L_d_filters[f] += d_L_d_out[i,j,f]*im_region
    
    self.filters -=learn_rate * d_L_d_filters

    return None

# 2) Max Pooling Layer:
This creates a maxpool layer with a pooling size of 2

In [0]:
class MaxPool2:

  def iterate_regions(self, image):

    h, w, _ = image.shape 
    new_h = h // 2
    new_w = w // 2

    for i in range(new_h):
      for j in range(new_w):
        im_region = image[(i*2):(i*2+2), (j*2): (j*2+2)]
        yield im_region, i ,j

  def forward(self,input):

    self.last_input = input
    h, w, num_filters = input.shape
    output = np.zeros((h//2, w//2, num_filters))

    for im_region, i, j in self.iterate_regions(input):
      output[i, j] = np.amax(im_region, axis = (0,1))
      
    return output
  
  def backprop(self, d_L_d_out):

    d_L_d_input = np.zeros(self.last_input.shape)

    for im_region, i, j in self.iterate_regions(self.last_input):
      h, w, f = im_region.shape
      amax = np.amax(im_region, axis=(0,1))

      for i2 in range(h):
        for j2 in range(w):
          for f2 in range(f):

            if im_region[i2, j2, f2] == amax[f2]:
              d_L_d_input[i * 2 + i2, j*2 + j2, f2] = d_L_d_out[i,j,f2]
    return d_L_d_input

# 3) Softmax Layer:
This creates a softmax activation layer, that helps our CNN in actually making predictions

In [0]:
class Softmax:

  def __init__(self, input_len, nodes):

    self.weights = np.random.randn(input_len, nodes) / input_len
    self.biases = np.zeros(nodes)

  def forward(self, input):

    self.last_input_shape = input.shape

    input = input.flatten()
    self.last_input = input

    input_len, nodes = self.weights.shape

    totals = np.dot(input, self.weights) + self.biases
    self.last_totals = totals

    exp = np.exp(totals)
    return exp / np.sum(exp, axis = 0)
  
  def backprop(self, d_L_d_out, learn_rate):

    for i, gradient in enumerate(d_L_d_out):
      if gradient == 0:
        continue
      
      t_exp = np.exp(self.last_totals)

      S = np.sum(t_exp)

      d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
      d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)

      d_t_d_w = self.last_input
      d_t_d_b = 1
      d_t_d_inputs = self.weights

      d_L_d_t = gradient * d_out_d_t

      d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
      d_L_d_b = d_L_d_t * d_t_d_b
      d_L_d_inputs = d_t_d_inputs @ d_L_d_t

      self.weights -= learn_rate * d_L_d_w
      self.biases -= learn_rate * d_L_d_b

      return d_L_d_inputs.reshape(self.last_input_shape)

# 4) Training over MNIST:
The following code initializes the layers, takes 1000 training images from MNIST dataset and trains the model

In [4]:

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()


conv = Conv3x3(8)
maxpool = MaxPool2()
softmax = Softmax(13*13*8,10)

def forward(image, label):

  out = conv.forward((image/255)-0.5)
  # print(out.shape)
  out = maxpool.forward(out)
  # print(out.shape)
  out = softmax.forward(out)
  # print(out.shape)

  loss = -np.log(out[label])
  acc = 1 if np.argmax(out) == label else 0 

  return out,loss, acc

def train(im, label, lr=.01):

  out,loss,acc = forward(im, label)

  gradient = np.zeros(10)
  gradient[label] = -1/out[label]

  gradient = softmax.backprop(gradient, lr)
  gradient = maxpool.backprop(gradient)
  gradient = conv.backprop(gradient, lr)

  return loss, acc

print("Training over frst 1000 images of MNIST")
train_images = x_train[:1000]
train_labels = y_train[:1000]
num_epochs = 5
for epoch in range(num_epochs):
  print ("Epoch # %d: " % (epoch+1), end=" ")
  loss = 0
  num_correct = 0


  for i, (im, label) in enumerate(zip(train_images, train_labels)):
      if i%1000 == 99:
        print (
        'Average Loss: %.3f || Accuracy: %d%%' %
        (loss / 1000, num_correct)) 
        loss = 0
        num_correct = 0
      l, acc = train(im, label)
      loss += l
      num_correct += acc

Using TensorFlow backend.


Training over frst 1000 images of MNIST
Epoch # 1:  Average Loss: 0.217 || Accuracy: 30%
Epoch # 2:  Average Loss: 0.051 || Accuracy: 85%
Epoch # 3:  Average Loss: 0.035 || Accuracy: 87%
Epoch # 4:  Average Loss: 0.024 || Accuracy: 91%
Epoch # 5:  Average Loss: 0.016 || Accuracy: 94%


# 5) Test on MNIST images:
The train model is tested over 100 test images from MNIST dataset


In [5]:

print ("Testing the first 100 images of MNIST")
test_images = x_test[:100]
test_labels = y_test[:100]
loss = 0
num_correct = 0

for im, label in zip(test_images, test_labels):
  _, l, acc = forward(im, label)
  loss += l
  num_correct += acc
  

num_tests = len(test_images)
print ("Test loss: %.3f || Test Accuracy: %.2f" % (loss/num_correct, num_correct/num_tests))

Testing the first 100 images of MNIST
Test loss: 0.393 || Test Accuracy: 0.89


# 6) Creating a similar keras model:
Trained a similar CNN model using keras library, over 5 epochs as above. And the results obtained are comparable to the CNN model above

In [6]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.utils import to_categorical
from keras.optimizers import SGD

(x_train, y_train), (x_test, y_test) = mnist.load_data()

train_images = np.expand_dims(x_train[:1000], axis = 3)
train_images = (train_images / 255) - 0.5
test_images = np.expand_dims(x_test[:100], axis = 3)
test_images = (test_images / 255) - 0.5

model = Sequential()
model.add(Conv2D(8, 3, input_shape=(28,28,1), use_bias=False))
model.add(MaxPooling2D(pool_size=2))
model.add(Flatten())
model.add(Dense(10, activation="softmax"))

model.compile(SGD(lr=0.01), loss= "sparse_categorical_crossentropy", metrics=["accuracy"])

model.fit(train_images, train_labels, batch_size=1, epochs=5)
score = model.evaluate(test_images, test_labels)
print ("Test loss: %.3f || Test Accuracy: %.2f" % (score[0], score[1]))







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/5





Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test loss: 0.289 || Test Accuracy: 0.89
