In [1]:
import numpy as np
import random

def sigmoid(z):
  return 1 / (1 + np.exp(-z))

def sigmoid_prime(z):
  return sigmoid(z) * (1 - sigmoid(z))

class NoTorch:
  def __init__(self, layers):
    self.layers = layers
    self.n_layers = len(layers)
    self.biases = [np.random.randn(nodes, 1) for nodes in layers[1:]]
    self.weights = [np.random.randn(nodes_cur, nodes_last) / np.sqrt(nodes_last) for nodes_cur, nodes_last in zip(layers[1:], layers[:-1])]
  def SGD(self, training_data, epochs, batch_sz, eta, lmbda, test_data):
    training_data = list(training_data)
    test_data = list(test_data)
    n = len(training_data)
    for j in range(epochs):
      random.shuffle(training_data)
      batches = [training_data[k : k + batch_sz] for k in range(0, n, batch_sz)]
      for batch in batches:
        self.update_parameters(batch, eta, lmbda, n)
      print("Accuracy after epoch {}: {}%".format(j + 1, self.accuracy(test_data)))
  def update_parameters(self, batch, eta, lmbda, n):
    nabla_b = [np.zeros(np.shape(b)) for b in self.biases]
    nabla_w = [np.zeros(np.shape(w)) for w in self.weights]
    m = len(batch)
    for x, y in batch:
      delta_nabla_b, delta_nabla_w = self.backprop(x, y)
      nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
      nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
    self.weights = [(1 - eta * lmbda / n) * w - (eta / m) * nw for w, nw in zip(self.weights, nabla_w)]
    self.biases = [b - (eta / m) * nb for b, nb in zip(self.biases, nabla_b)]
  def feedforward(self, a):
    for b, w in zip(self.biases, self.weights):
      a = sigmoid(np.matmul(w, a) + b)
    return a
  def accuracy(self, data):
    n = len(data)
    correct = 0
    for x, y in data:
      prediction = np.argmax(self.feedforward(x))
      correct += int(prediction == y)
    return correct / n * 100
  def backprop(self, x, y):
    nabla_b = [None] * len(self.biases)
    nabla_w = [None] * len(self.weights)
    a, z = [x], [x]
    for b, w in zip(self.biases, self.weights):
      z.append(np.matmul(w, a[-1]) + b)
      a.append(sigmoid(z[-1]))
    delta = a[-1] - y
    nabla_b[-1] = delta
    nabla_w[-1] = np.matmul(delta, a[-2].transpose())
    for l in range(2, self.n_layers):
      delta = sigmoid_prime(z[-l]) * np.matmul(self.weights[-l+1].transpose(), delta)
      nabla_b[-l] = delta
      nabla_w[-l] = np.matmul(delta, a[-l-1].transpose())
    return nabla_b, nabla_w

In [None]:
import mnist_loader
TRAIN, VALIDATION, TEST = mnist_loader.load_data_wrapper()
EPOCHS = 30
ETA = 0.5
LMBDA = 5
BATCH_SZ = 10
net = NoTorch([784, 100, 100, 10])
net.SGD(TRAIN, EPOCHS, BATCH_SZ, ETA, LMBDA, TEST)