In [1]:
import numpy as np
from os.path import normpath as fn

import autograd 
import layers

np.random.seed(0)

In [2]:
# Load data
data = np.load(fn('data/mnist_26k.npz'))
train_im = np.float32(data['im_train'])/255.-0.5
train_im = np.reshape(train_im,[-1,28,28,1])
train_lb = data['lbl_train']

val_im = np.float32(data['im_val'])/255.-0.5
val_im = np.reshape(val_im,[-1,28,28,1])
val_lb = data['lbl_val']

In [3]:
###################################
# build static computational graph
###################################
C1 = 8
C2 = 16

# placeholder for input
inp = autograd.Value()
lab = autograd.Value()

model = layers.Sequential([layers.Conv2d(1, C1, kernel_size=4, stride=2, bias=True),
                           layers.RELU(),
                           layers.Conv2d(C1, C2, kernel_size=2, stride=2, bias=True),
                           layers.RELU(),
                           layers.Flatten(),
                           layers.Linear(C2*36, 10)
                          ])
print(model)

# loss function: softmax + crossentropy
smaxloss = layers.SmaxCELoss()
accuracy = layers.Accuracy()

# build computational graph
y = model(inp)

# Cross Entropy of Soft-max
loss = smaxloss(y,lab)

# Accuracy
acc = accuracy(y,lab)

Sequential(
  (0) Conv2d(in_channels=1, out_channels=8, kernel_size=4, stride=2, bias=True)
  (1) RELU()
  (2) Conv2d(in_channels=8, out_channels=16, kernel_size=2, stride=2, bias=True)
  (3) RELU()
  (4) Flatten()
  (5) Linear(in_features=576, out_features=10)
)


In [4]:
# Training loop
BSZ=64
lr=0.01

NUM_EPOCH=10
DISPITER=50
batches = range(0,len(train_lb)-BSZ+1,BSZ)

## Implement Momentum and uncomment following line
autograd.init_momentum()


niter, avg_loss, avg_acc=0, 0., 0.
for ep in range(NUM_EPOCH+1):

    # As we train, let's keep track of val accuracy
    vacc, vloss, viter= 0., 0., 0
    for b in range(0,len(val_lb)-BSZ+1,BSZ):
        inp.set(val_im[b:b+BSZ,...])
        lab.set(val_lb[b:b+BSZ])
        autograd.Forward()
        viter += 1
        vacc += acc.top
        vloss += loss.top
    vloss, vacc = vloss / viter, vacc / viter * 100
    print("%09d: #### %d Epochs: Val Loss = %.3e, Accuracy = %.2f%%" % (niter,ep,vloss,vacc))
    if ep == NUM_EPOCH:
        break

    # Shuffle Training Set
    idx = np.random.permutation(len(train_lb))

    # Train one epoch
    for b in batches:
        # Load a batch
        inp.set(train_im[idx[b:b+BSZ],...])
        lab.set(train_lb[idx[b:b+BSZ]])

        autograd.Forward()
        avg_loss += loss.top 
        avg_acc += acc.top
        niter += 1
        if niter % DISPITER == 0:
            avg_loss = avg_loss / DISPITER
            avg_acc = avg_acc / DISPITER * 100
            print("%09d: Training Loss = %.3e, Accuracy = %.2f%%" % (niter,avg_loss,avg_acc))
            avg_loss, avg_acc = 0., 0.

        autograd.Backward(loss)
        autograd.momentum(lr,0.9)

000000000: #### 0 Epochs: Val Loss = 2.299e+00, Accuracy = 10.42%
000000050: Training Loss = 2.184e+00, Accuracy = 34.28%
000000100: Training Loss = 1.812e+00, Accuracy = 61.28%
000000150: Training Loss = 1.477e+00, Accuracy = 67.94%
000000200: Training Loss = 1.266e+00, Accuracy = 72.53%
000000250: Training Loss = 1.179e+00, Accuracy = 75.19%
000000300: Training Loss = 1.101e+00, Accuracy = 76.22%
000000350: Training Loss = 9.795e-01, Accuracy = 77.72%
000000390: #### 1 Epochs: Val Loss = 8.257e-01, Accuracy = 79.79%
000000400: Training Loss = 8.907e-01, Accuracy = 80.09%
000000450: Training Loss = 8.422e-01, Accuracy = 78.94%
000000500: Training Loss = 7.838e-01, Accuracy = 81.09%
000000550: Training Loss = 7.574e-01, Accuracy = 82.47%
000000600: Training Loss = 7.403e-01, Accuracy = 83.59%
000000650: Training Loss = 7.388e-01, Accuracy = 82.19%
000000700: Training Loss = 7.258e-01, Accuracy = 82.47%
000000750: Training Loss = 6.522e-01, Accuracy = 84.41%
000000780: #### 2 Epochs: Va