In [None]:
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import numpy as np

In [None]:
class Tensor(object):

  def __init__(self, data, requires_grad=False):
      self.data = data
      if not isinstance(data, np.ndarray):
          self.data = np.array(data)
      self.requires_grad = requires_grad
      self._grad = None
      self._grad_fn = None

  def shape(self):
      return self.data.shape

  def grad_fn(self):
      return self._grad_fn

  def grad(self):
      return self._grad

  def backward(self, grad=None):

      if grad is None and self._grad is None:
          grad = self.__class__(1., requires_grad=False)

      elif self.grad is not None:
          grad = self._grad

      self.grad_fn.backward(grad)
      return True

  def __repr__(self):
      return str(self.data.__repr__())

  def add_grad(self, grad):
      if self._grad is None:
          self._grad = grad
      else:
          self._grad += grad


class AddOp(object):
  def forward(self, x: Tensor, y: Tensor):
      self.x = x
      self.y = y
      requires_grad = x.requires_grad or y.requires_grad
      return Tensor(x.data + y.data, requires_grad=requires_grad)

  def backward(self, grad):
      if self.x.requires_grad:
          self.x.add_grad(Tensor(grad.data.sum(axis=axis, keepdims=True)))
          if self.x.grad_fn:
              self.x.backward()
      if self.y.requires_grad:
          self.y.add_grad(Tensor(grad.data.sum(axis=axis, keepdims=True)))
          if self.y.grad_fn:
              self.y.backward()

class Multiplication(object):
  def forward(self, x: Tensor, y: Tensor):
      self.x = x
      self.y = y
      requires_grad = x.requires_grad or y.requires_grad
      return Tensor(x.data * y.data, requires_grad=requires_grad)

  def backward(self, grad):
      if self.x.requires_grad:
          self.x.add_grad(Tensor(grad.data * self.y.data, False))
          if self.x.grad_fn:
              self.x.backward()
      if self.y.requires_grad:
          self.y.add_grad(Tensor(grad.data * self.x.data, False))
          if self.y.grad_fn:
              self.y.backward()

class Negation(object):
  def forward(self, x: Tensor):
      self.x = x
      requires_grad = x.requires_grad 
      return Tensor(x.data * (-1), requires_grad=requires_grad)

  def backward(self, grad):
      if self.x.requires_grad:
          self.x.add_grad(Tensor(grad.data * -1, False))
          if self.x.grad_fn:
              self.x.backward()

In [None]:
class Layer:

  def __call__(self, *args):
      return self.forward(*args)

class Relu(Layer):

  def forward(self,x):
      self.x = x
      return np.maximum(np.zeros_like(x), x)
    
  def backward(self, grad):
      grad_input = (self.x > 0) * grad
      return grad_input

class MSE(Layer):

  def forward(self, x, y):
      self.x = x
      self.y = y
      return ((x - y)**2) / (self.x.shape[0]*2)

  def backward(self, grad=None):
      return (self.x - self.y) / self.x.shape[0]

In [None]:
class Linear(Layer):

  def __init__(self, input, output, lr=0.0002):
    super().__init__()
    self.weight = 2*np.random.random((input, output)) - 1
    self.bias = 2*np.random.random((output)) - 1
    self.lr = lr

  def forward(self, x):
    self.x = x
    return np.dot(x,self.weight) + self.bias

  def backward(self, grad):
    bias_grad = grad.mean(axis=0)*self.x.shape[0]
    weight_grad = np.dot(self.x.T, grad)
    grad_input = np.dot(grad, self.weight.T)
    
    self.weight -= weight_grad * self.lr
    self.bias -= bias_grad * self.lr

    return grad_input

In [None]:


class Model(Layer):
  def __init__(self, lr=0.00001):
      self.lr = lr
      self.layers = [
          Linear(784,400, lr=self.lr),
          Relu(),
          Linear(400,100, lr=self.lr),
          Relu(),
          Linear(100,10, lr=self.lr)        
      ]

  def forward(self,x):
      for l in self.layers:
          x = l(x)
      return x

  def backward(self, grad):
      for l in self.layers[::-1]:
          grad = l.backward(grad)

      return grad

In [None]:
simple = transforms.Compose([
    transforms.ToTensor(),
])
ds = MNIST('./mnist', download=True, transform=simple)
ld = DataLoader(ds, batch_size=2, pin_memory=True, drop_last=True) 

In [None]:
mm = Model()
loss = MSE()
_loss_avg = 0 
for epochs in range(10):
    cnt = 0
    total = 0
    for i, (img, label) in enumerate(ld):
        x = img.view(2,-1).numpy()
        res = mm(x)
        _loss = loss(res, label.numpy())
        total += label.data.size(0)
        cnt += int(np.argmax(res) == np.argmax(label.numpy()))
        _loss_avg += _loss.mean() 
        grad = loss.backward(1)
        mm.backward(grad)
        if i % 1000 == 0:
          print('Train - Epoch %d, Batch: %d, Loss: %f' % (epochs, i, _loss_avg/1000))
          _loss_avg = 0
        loss.backward()
        mm.backward(grad)
    print(' Accuracy: %f' % (float(cnt) / len(ds)))

Train - Epoch 0, Batch: 0, Loss: 0.323625
Train - Epoch 0, Batch: 1000, Loss: 124.349192
Train - Epoch 0, Batch: 2000, Loss: 66.812781
Train - Epoch 0, Batch: 3000, Loss: 50.472935
Train - Epoch 0, Batch: 4000, Loss: 43.582088
Train - Epoch 0, Batch: 5000, Loss: 36.746565
Train - Epoch 0, Batch: 6000, Loss: 30.987646
Train - Epoch 0, Batch: 7000, Loss: 32.905916
Train - Epoch 0, Batch: 8000, Loss: 28.759991
Train - Epoch 0, Batch: 9000, Loss: 27.538699
Train - Epoch 0, Batch: 10000, Loss: 22.896226
Train - Epoch 0, Batch: 11000, Loss: 20.877211
Train - Epoch 0, Batch: 12000, Loss: 21.706594
Train - Epoch 0, Batch: 13000, Loss: 20.512052
Train - Epoch 0, Batch: 14000, Loss: 20.801178
Train - Epoch 0, Batch: 15000, Loss: 19.861267
Train - Epoch 0, Batch: 16000, Loss: 20.947768
Train - Epoch 0, Batch: 17000, Loss: 17.711630
Train - Epoch 0, Batch: 18000, Loss: 17.718749
Train - Epoch 0, Batch: 19000, Loss: 16.721489
Train - Epoch 0, Batch: 20000, Loss: 16.093615
Train - Epoch 0, Batch: 21