In [1]:
import numpy as np
import libdl as l
import torch, torchvision
from torch.utils.data import DataLoader
np.random.seed(0)

In [2]:
# A small helperfunction that instanciates my own tensors from a numpy array
def from_numpy(a, requiresGrad=False):
    a = np.array(a)
    if len(a.shape) == 1:
        return l.Tensor1(np.asfortranarray(a).astype(np.float32), requiresGrad)
    elif len(a.shape) == 2:
        return l.Tensor2(np.asfortranarray(a).astype(np.float32), requiresGrad)
    elif len(a.shape) == 3:
        return l.Tensor3(np.asfortranarray(a).astype(np.float32), requiresGrad)
    elif len(a.shape) == 4:
        return l.Tensor4(np.asfortranarray(a).astype(np.float32), requiresGrad)

In [3]:
# hyperparameter
lr = .1
lr_decay = .99
filter_size = 5
hidden_units = [16, 32, 32]
log_every = 500
epochs = 10
batch_size = 64

In [4]:
train_loader = DataLoader(torchvision.datasets.MNIST('dataset/', train=True, download=True,
                          transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size, shuffle=False)

test_loader = DataLoader(
  torchvision.datasets.MNIST('dataset/', train=False, download=True,
                             transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize(
                                 (0.1307,), (0.3081,))
                             ])),
  batch_size=batch_size, shuffle=False)

In [5]:
# weights and biases for the convolutional neural network
F1 = from_numpy(np.random.normal(0, 1 / np.sqrt(filter_size * filter_size), (filter_size, filter_size, 1, hidden_units[0])), True)
B1 = from_numpy(np.zeros(hidden_units[0]), True)
F2 = from_numpy(np.random.normal(0, 1 / np.sqrt(hidden_units[0] * filter_size * filter_size), (filter_size, filter_size, hidden_units[0], hidden_units[1])), True)
B2 = from_numpy(np.zeros(hidden_units[1]), True)

W3 = from_numpy(np.random.normal(0, 1 / np.sqrt(25 * hidden_units[1]), (hidden_units[2], 25 * hidden_units[1])), True)
B3 = from_numpy(np.zeros(hidden_units[2]), True)
W4 = from_numpy(np.random.normal(0, 1 / np.sqrt(hidden_units[2]), (10, hidden_units[2])), True)
B4 = from_numpy(np.zeros(10), True)
parameters = [F1, B1, F2, B2, W3, B3, W4, B4]

In [6]:
# forward pass
def forward(x):
    h1 = l.leakyRelu(l.maxpool2d(l.conv2d(x, F1, B1, int(filter_size / 2)), 2))
    h2 = l.leakyRelu(l.maxpool2d(l.conv2d(h1, F2, B2, int(filter_size / 2)), 2))
    h3 = l.leakyRelu(l.matmul(W3, l.reshape(h2, (25 * hidden_units[1], -1))) + B3)
    return l.matmul(W4, h3) + B4

In [7]:
# training
print("iteration | loss")
for i, (x, y) in enumerate(train_loader):
    x = x.numpy()[:, :, 4:24, 4:24].transpose()
    x = from_numpy(x)
    y = y.numpy()
    y = from_numpy(np.eye(10)[y].transpose())

    yp = forward(x)
    loss = l.crossEntropyWithLogits(yp, y)
    
    loss.backward()
    for p in parameters:
        p.applyGradient(lr)
        g = p.grad() 
        g *= .5  # momentum
    lr *= lr_decay
    
    print(f"{i:9d} | {loss.numpy():.4f}")
    if i >= 30:
        break

iteration | loss
        0 | 2.6143
        1 | 2.4228
        2 | 2.4254
        3 | 2.2341
        4 | 2.2128
        5 | 2.2251
        6 | 2.1814
        7 | 2.0718
        8 | 2.0437
        9 | 2.1114
       10 | 2.0444
       11 | 2.4280
       12 | 2.0949
       13 | 2.1333
       14 | 2.0423
       15 | 1.8629
       16 | 1.9163
       17 | 1.9972
       18 | 1.7830
       19 | 1.8194
       20 | 1.7478
       21 | 1.6707
       22 | 1.8571
       23 | 1.8995
       24 | 1.8240
       25 | 1.5974
       26 | 1.3154
       27 | 1.4736
       28 | 1.9052
       29 | 2.2353
       30 | 2.0739


In [8]:
accs = []
for i, (x, y) in enumerate(test_loader):
    x = x.numpy()[:, :, 4:24, 4:24].transpose()
    x = from_numpy(x)
    y = y.numpy()

    yp = forward(x).numpy().argmax(axis=0)
    accs.append(np.mean(yp == y))
    if i >= 10:
        break

print(f"Test Accuracy: {np.mean(accs)*100:.2f}%")

Test Accuracy: 45.88%
