In [1]:
import numpy as np
from os.path import normpath as fn # Fix Linux/Windows path issue
import sys
sys.path.append("nn") # add the nn module into system path

from nn.container import Placeholder, Sequential
from nn.solver import Adam
from nn.loss import SmaxCELoss, accuracy
from nn.graph import Graph, Seesion
from nn import layer

np.random.seed(0)

In [2]:
# Load data
data = np.load(fn('data/mnist_26k.npz'))
train_im = np.float32(data['im_train'])/255.-0.5
train_im = np.reshape(train_im,[-1,28,28,1])
train_lb = data['lbl_train']

val_im = np.float32(data['im_val'])/255.-0.5
val_im = np.reshape(val_im,[-1,28,28,1])
val_lb = data['lbl_val']

In [3]:
C1 = 16
C2 = 32

###################################
# build static computational graph
###################################
graph = Graph()
graph.as_default()

# placeholder for input
inp = Placeholder()
lab = Placeholder()

model = Sequential([layer.Conv2d(1, C1, kernel_size=3, stride=1, padding=1, bias=False),
                           layer.BatchNorm2d(C1),
                           layer.RELU(),
                           layer.Maxpool2d(),
                           layer.Conv2d(C1, C2, kernel_size=3, stride=1, padding=1, bias=False),
                           layer.BatchNorm2d(C2),
                           layer.RELU(),
                           layer.Maxpool2d(),
                           layer.Dropout(0.3),
                           layer.Flatten(),
                           layer.Linear(C2*49 , 10)
                          ])
print(model)

# build computational graph for model
y = model(inp)

# loss function: softmax + crossentropy
criterion = SmaxCELoss()

# Cross Entropy of Soft-max
loss = criterion(y, lab)

(main) Sequential(
  (0) Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1, bias=False)
  (1) BatchNorm2d(num_features=16, eps=1e-05, momentum=0.1)
  (2) RELU()
  (3) Maxpool2d(kernel_size=2, stride=2)
  (4) Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1, bias=False)
  (5) BatchNorm2d(num_features=32, eps=1e-05, momentum=0.1)
  (6) RELU()
  (7) Maxpool2d(kernel_size=2, stride=2)
  (8) Dropout(p=0.3)
  (9) Flatten()
  (10) Linear(in_features=1568, out_features=10)
)


In [4]:
lr=1e-3
NUM_EPOCH=10
DISPITER=100
BSZ=64

optimizer = Adam(graph.variables, lr)

In [5]:
# Training loop
batches = range(0,len(train_lb)-BSZ+1,BSZ)
niter, avg_loss, avg_acc=0, 0., 0.

sess = Seesion()
for ep in range(NUM_EPOCH+1):
    sess.eval()
    # As we train, let's keep track of val accuracy
    vacc, vloss, viter= 0., 0., 0
    for b in range(0,len(val_lb)-BSZ+1,BSZ):
        sess.run(loss, {inp: val_im[b:b+BSZ,...], lab:val_lb[b:b+BSZ]})
        viter += 1
        vacc += accuracy(y.top, val_lb[b:b+BSZ])
        vloss += loss.top
    vloss, vacc = vloss / viter, vacc / viter * 100
    print("%09d: #### %d Epochs: Val Loss = %.3e, Accuracy = %.2f%%" % (niter,ep,vloss,vacc))
    if ep == NUM_EPOCH:
        break
    
    # Shuffle Training Set
    idx = np.random.permutation(len(train_lb))
    sess.train()
    # Train one epoch
    for b in batches:
        # Load a batch
        sess.run(loss, {inp: train_im[idx[b:b+BSZ],...], lab: train_lb[idx[b:b+BSZ]]})
        avg_loss += loss.top 
        avg_acc += accuracy(y.top, train_lb[idx[b:b+BSZ]])
        niter += 1
        if niter % DISPITER == 0:
            avg_loss = avg_loss / DISPITER
            avg_acc = avg_acc / DISPITER * 100
            print("%09d: Training Loss = %.3e, Accuracy = %.2f%%" % (niter,avg_loss,avg_acc))
            avg_loss, avg_acc = 0., 0.
        
        optimizer.step(niter)

000000000: #### 0 Epochs: Val Loss = 2.330e+00, Accuracy = 7.40%
000000100: Training Loss = 3.384e+00, Accuracy = 12.45%
000000200: Training Loss = 2.906e+00, Accuracy = 17.30%
000000300: Training Loss = 2.440e+00, Accuracy = 25.48%
000000390: #### 1 Epochs: Val Loss = 1.033e+00, Accuracy = 77.08%
000000400: Training Loss = 1.995e+00, Accuracy = 36.38%
000000500: Training Loss = 1.723e+00, Accuracy = 43.30%
000000600: Training Loss = 1.460e+00, Accuracy = 50.91%
000000700: Training Loss = 1.305e+00, Accuracy = 56.25%
000000780: #### 2 Epochs: Val Loss = 5.396e-01, Accuracy = 88.12%
000000800: Training Loss = 1.129e+00, Accuracy = 62.66%
000000900: Training Loss = 9.904e-01, Accuracy = 67.05%
000001000: Training Loss = 9.651e-01, Accuracy = 67.97%
000001100: Training Loss = 8.953e-01, Accuracy = 70.22%
000001170: #### 3 Epochs: Val Loss = 3.900e-01, Accuracy = 91.04%
000001200: Training Loss = 8.346e-01, Accuracy = 72.55%
000001300: Training Loss = 7.782e-01, Accuracy = 74.02%
000001400