In [1]:
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import numpy as np

In [27]:
class Numpy(object):

  def __init__(self, data, requires_grad=False):
      self.data = data
      if not isinstance(data, np.ndarray):
          self.data = np.array(data)
      self.requires_grad = requires_grad
      self._grad = None
      self._grad_fn = None


  def grad_fn(self):
      return self._grad_fn

  def grad(self):
      return self._grad

  def backward(self, grad=None):

      if grad is None and self._grad is None:
          grad = self.__class__(1., requires_grad=False)

      elif self.grad is not None:
          grad = self._grad

      self.grad_fn.backward(grad)
      return True

  def __repr__(self):
      return str(self.data.__repr__())

  def add_grad(self, grad):
      if self._grad is None:
          self._grad = grad
      else:
          self._grad += grad




In [28]:
class Layer:

  def __call__(self, *args):
      return self.forward(*args)

class Relu(Layer):

  def forward(self,x):
      self.x = x
      return np.maximum(np.zeros_like(x), x)
    
  def backward(self, grad):
      grad_input = (self.x > 0) * grad
      return grad_input

class MSE(Layer):

  def forward(self, x, y):
      self.x = x
      self.y = np.expand_dims(y, axis=-1)
      #print(self.x.shape[0]*2)
      return ((x - self.y)**2) / (self.x.shape[0]*2)

  def backward(self, grad=None):
      return (self.x - self.y) / self.x.shape[0]

In [29]:
class Linear(Layer):

  def __init__(self, input, output, lr=0.0002):
    super().__init__()
    self.weight = 2*np.random.random((input, output)) - 1
    self.bias = 2*np.random.random((output)) - 1
    self.lr = lr

  def forward(self, x):
    self.x = x
    return np.dot(x,self.weight) + self.bias

  def backward(self, grad):
    bias_grad = grad.mean(axis=0)*self.x.shape[0]
    weight_grad = np.dot(self.x.T, grad)
    grad_input = np.dot(grad, self.weight.T)
    
    self.weight -= weight_grad * self.lr
    self.bias -= bias_grad * self.lr

    return grad_input

In [30]:

lr=0.00001
class Model(Layer):
  def __init__(self, lr=0.00001):
      self.lr = lr
      self.layers = [
          Linear(784,400, lr=self.lr),
          Relu(),
          Linear(400,100, lr=self.lr),
          Relu(),
          Linear(100,10, lr=self.lr)        
      ]

  def forward(self,x):
      for l in self.layers:
          x = l(x)
      return x

  def backward(self, grad):
      for l in self.layers[::-1]:
          grad = l.backward(grad)

      return grad

In [6]:
simple = transforms.Compose([
    transforms.ToTensor(),
])
ds = MNIST('./mnist', download=True, transform=simple)
ld = DataLoader(ds, batch_size=2, pin_memory=True, drop_last=True) 

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting ./mnist/MNIST/raw/train-images-idx3-ubyte.gz to ./mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting ./mnist/MNIST/raw/train-labels-idx1-ubyte.gz to ./mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting ./mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to ./mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting ./mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./mnist/MNIST/raw

Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [31]:
mm = Model()
loss = MSE()
_loss_avg = 0 
for epochs in range(5):
    cnt = 0
    total = 0
    for i, (img, label) in enumerate(ld):
        x = img.view(2,-1).numpy()
        res = mm(x)
        _loss = loss(res, label.numpy())
        total += label.data.size(0)
        cnt += int(np.argmax(res) == np.argmax(label.numpy()))
        _loss_avg += _loss.mean() 
        grad = loss.backward(1)
        mm.backward(grad)
        if i % 1000 == 0:
          print('Train - Epoch %d, Batch: %d, Loss: %f' % (epochs, i, _loss_avg/1000))
          _loss_avg = 0
        loss.backward()
        mm.backward(grad)
    print(' Accuracy: %f' % (float(cnt) / len(ds)))

Train - Epoch 0, Batch: 0, Loss: 9.140558
Train - Epoch 0, Batch: 1000, Loss: 79.931840
Train - Epoch 0, Batch: 2000, Loss: 3.839712
Train - Epoch 0, Batch: 3000, Loss: 2.839282
Train - Epoch 0, Batch: 4000, Loss: 2.540469
Train - Epoch 0, Batch: 5000, Loss: 2.425599
Train - Epoch 0, Batch: 6000, Loss: 1.827842
Train - Epoch 0, Batch: 7000, Loss: 1.982463
Train - Epoch 0, Batch: 8000, Loss: 2.171599
Train - Epoch 0, Batch: 9000, Loss: 1.778993
Train - Epoch 0, Batch: 10000, Loss: 1.671248
Train - Epoch 0, Batch: 11000, Loss: 1.622605
Train - Epoch 0, Batch: 12000, Loss: 1.682661
Train - Epoch 0, Batch: 13000, Loss: 1.503634
Train - Epoch 0, Batch: 14000, Loss: 1.569863
Train - Epoch 0, Batch: 15000, Loss: 1.479401
Train - Epoch 0, Batch: 16000, Loss: 1.587463
Train - Epoch 0, Batch: 17000, Loss: 1.429244
Train - Epoch 0, Batch: 18000, Loss: 1.346901
Train - Epoch 0, Batch: 19000, Loss: 1.471290
Train - Epoch 0, Batch: 20000, Loss: 1.370047
Train - Epoch 0, Batch: 21000, Loss: 1.365826
