In [None]:
import numpy as np
import tensorflow as tf
from keras.datasets import mnist

# layers
class Conv:
  def __init__(self,num_filters, k, p, s):
    self.num_filters = num_filters
    self.filters = np.random.randn(num_filters, k, k)/(k**2)
    self.k = k
    self.p = p
    self.s = s

  def get_size(self, image):
    h, w= image.shape
    h_size = int((h-self.k+2*self.p)/self.s + 1)
    w_size = int((w-self.k+2*self.p)/self.s + 1)
    return h_size, w_size

  def forward_prop(self, inp):
    self.back_input = inp

    h, w = self.get_size(inp)
    output = np.zeros((h, w, self.num_filters))

    for i in range(h):
      for j in range(w):
        pix_mat = inp[i:(i+self.k), j:(j+self.k)]
        output[i, j] = np.sum(pix_mat * self.filters, axis=(1,2))
    return output

  def back_prop(self, out, learn_rate):
    filter_loss = np.zeros(self.filters.shape)
    h,w = self.get_size(self.back_input)
    for i in range(h):
      for j in range(w):
        for f in range(self.num_filters):
          filter_loss[f] = out[i, j, f] * self.back_input[i:(i+self.k), j:(j+self.k)]
    
    self.filters -= learn_rate * filter_loss

    return None

class MaxPool:
  def forwardprop(self, input):
    self.back_input = input
    h, w, num_filters = input.shape

    output = np.zeros((h//2, w//2, num_filters))

    for i in range(h//2):
      for j in range(w//2):
        output[i,j] = np.amax(input[(i*2):(i*2+2), (j*2):(j*2+2)],axis=(0,1))
    
    return output
  
  def backprop(self, filter_loss):
    filter_loss = np.zeros(self.back_input.shape)
    h,w,_ = self.back_input.shape

    for i in range(h//2):
      for j in range(w//2):
        region = self.back_input[(i*2):(i*2+2), (j*2):(j*2+2)]
        h, w, f = region.shape
        amax = np.amax(region, axis=(0,1))

        for i2 in range (h):
          for j2 in range(w):
            for f2 in range(f):
              if(region[i2,j2,f2] == amax[f2]):
                filter_loss[i*2+i2, j*2+j2, f2] = filter_loss[i,j,f2]
                break;
    return filter_loss

class Softmax:
  def __init__(self, input_len, nodes):
    self.weights = np.random.randn(input_len, nodes)/input_len
    self.biases = np.zeros(nodes)

  def forwardprop(self, input):
    self.back_input_shape = input.shape

    input = input.flatten()
    self.back_input = input

    input_len, nodes = self.weights.shape

    totals = np.dot(input, self.weights) + self.biases
    self.back_totals = totals

    exp = np.exp(totals)
    return (exp/np.sum(exp, axis=0))
    
  def backprop(self, filter_loss, learn_rate):
    for i, gradient in enumerate(filter_loss):
      if(gradient == 0):
        continue
      
      t_exp = np.exp(self.back_totals)

      S = np.sum(t_exp)

      filter_x = -t_exp[i] * t_exp/ (S**2)
      filter_x[i] = t_exp[i] * (S-t_exp[i]) / (S**2)

      dweights = self.back_input
      dbiases = 1
      dinputs = self.weights

      dgradients = gradient * filter_x

      dweights = dweights[np.newaxis].T @ dgradients[np.newaxis]
      dbiases = dgradients * dbiases
      dinputs = dinputs @ dgradients

      self.weights -= learn_rate * dweights
      self.biases -= learn_rate * dbiases
      return dinputs.reshape(self.back_input_shape)




c = Conv(20, 3, 0, 1)
pool = MaxPool()
softmax = Softmax(13 * 13 * 20, 10)

(trainX, trainy), (testX, testy) = mnist.load_data()
print('Train: X=%s, y=%s' % (trainX.shape, trainy.shape))
print('Test: X=%s, y=%s' % (testX.shape, testy.shape))

def forward(image, label):
  out = c.forward_prop((image/255) - 0.5)
  out = pool.forwardprop(out)
  out = softmax.forwardprop(out)

  loss = -np.log(out[label])
  acc = 1 if(np.argmax(out)==label) else 0

  return out, loss, acc

def train(image, label, learning=0.05):
  out,loss,acc = forward(image, label)
  gradient=np.zeros(10)
  gradient[label] = -1/out[label]


  gradient = softmax.backprop(gradient, learning)
  gradient = pool.backprop(gradient)
  gradient = c.back_prop(gradient, learning)

  return loss, acc


print("CIFAR CNN Initialized")

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

for epoch in range(3):
  print('----EPOCH %d ---'%(epoch+1))
  
  permutation = np.random.permutation(len(trainX))
  trainX = trainX[permutation]
  trainy = trainy[permutation]

  loss = 0
  num_correct = 0
  total_num_correct = 0
  for i, (image, label) in enumerate(zip(trainX, trainy)):
    if(i>0 and i %500 == 499):
      print('[Step %d] Past 500 steps: Average Loss %.3f | Accuracy: %d%%' %(i + 1, loss / 500, (num_correct/500)*100))

      loss = 0
      num_correct = 0
    l, acc = train(image, label)
    loss += l
    num_correct += acc
    total_num_correct += acc
  print("Accuracy after epoch: %d%%" %((total_num_correct/60000)*100))
  total_num_correct = 0


Train: X=(60000, 28, 28), y=(60000,)
Test: X=(10000, 28, 28), y=(10000,)
CIFAR CNN Initialized
Found GPU at: /device:GPU:0
----EPOCH 1 ---
[Step 500] Past 500 steps: Average Loss 1.333 | Accuracy: 54%
[Step 1000] Past 500 steps: Average Loss 0.690 | Accuracy: 77%
[Step 1500] Past 500 steps: Average Loss 0.626 | Accuracy: 78%
[Step 2000] Past 500 steps: Average Loss 0.623 | Accuracy: 80%
[Step 2500] Past 500 steps: Average Loss 0.643 | Accuracy: 82%
[Step 3000] Past 500 steps: Average Loss 0.702 | Accuracy: 80%
[Step 3500] Past 500 steps: Average Loss 0.454 | Accuracy: 83%
[Step 4000] Past 500 steps: Average Loss 0.465 | Accuracy: 86%
[Step 4500] Past 500 steps: Average Loss 0.535 | Accuracy: 84%
[Step 5000] Past 500 steps: Average Loss 0.458 | Accuracy: 85%
[Step 5500] Past 500 steps: Average Loss 0.426 | Accuracy: 87%
[Step 6000] Past 500 steps: Average Loss 0.515 | Accuracy: 85%
[Step 6500] Past 500 steps: Average Loss 0.518 | Accuracy: 83%
[Step 7000] Past 500 steps: Average Loss 0.