In [1]:
import numpy as np
import edf

In [2]:
ops = edf.ops
params = edf.params
values = edf.values

# Average pooling with stride
class avg_pool_with_stride:
    def __init__(self,x,sz,stride):
        ops.append(self)
        self.x = x
        self.sz = sz
        self.stride = stride

    def forward(self):
        B,H,W,C = self.x.top.shape
        nH, nW = (H-self.sz)//self.stride+1, (W-self.sz)//self.stride+1
        top = np.zeros([B,nH,nW,C])
        for i in range(self.sz):
            for j in range(self.sz):
                xcrop = self.x.top[:, i:(H-self.sz+1+i):self.stride, j:(W-self.sz+1+j):self.stride, :].copy()
                top = top + xcrop
        
        self.top = top / np.float32(self.sz*self.sz)
#         print(self.x.top[0,:,:,0])
#         print(self.top[0,:,:,0])

    def backward(self):
        if self.x in ops or self.x in params:
            B,H,W,C = self.x.top.shape
            xgrad = np.zeros([B,H,W,C])
            for i in range(self.sz):
                for j in range(self.sz):
                    xgrad[:, i:(H-self.sz+1+i):self.stride, j:(W-self.sz+1+j):self.stride, :] += self.grad / (self.sz*self.sz)

            self.x.grad = self.x.grad + xgrad
#             print(self.grad[0,:,:,0])
#             print(self.x.grad[0,:,:,0])
            
            
edf.avg_pool_with_stride = avg_pool_with_stride

In [3]:
# #######################################

# # Inputs and parameters
# inp = edf.Param()
# lab = edf.Value()

# # Model
# y = edf.avg_pool_with_stride(inp,2,2)

# # Loss
# loss = edf.add(y,lab)

# # Forward test
# data = np.arange(32).reshape([1,4,4,2])
# inp.set(data)
# l = np.ones([1,2,2,2])*(-1.0)
# lab.set(l)

# edf.Forward()

# # Backward test
# edf.Backward(loss)

In [4]:
np.random.seed(0)

from os.path import normpath as fn
from time import time

# Load data
data = np.load(fn('inputs/mnist_26k.npz'))

train_im = np.float32(data['im_train'])/255.-0.5
train_im = np.reshape(train_im,[-1,28,28,1])
train_lb = data['lbl_train']

val_im = np.float32(data['im_val'])/255.-0.5
val_im = np.reshape(val_im,[-1,28,28,1])
val_lb = data['lbl_val']


#######################################

# Inputs and parameters
inp = edf.Value()
lab = edf.Value()

K1 = edf.Param()
B1 = edf.Param()

K2 = edf.Param()
B2 = edf.Param()

W3 = edf.Param()
B3 = edf.Param()


# Model
y = edf.conv2(inp,K1)
# y = edf.down2(y);
y = edf.avg_pool_with_stride(y, 2, 2) # replace downsampling with avg pooling with stride
y = edf.add(y,B1)
y = edf.RELU(y)

y = edf.conv2(y,K2)
# y = edf.down2(y);
y = edf.avg_pool_with_stride(y, 2, 2) # replace downsampling with avg pooling with stride
y = edf.add(y,B2)
y = edf.RELU(y)


y = edf.flatten(y)

y = edf.matmul(y,W3)
y = edf.add(y,B3) # This is our final prediction


# Cross Entropy of Soft-max
loss = edf.smaxloss(y,lab)
loss = edf.mean(loss)

# Accuracy
acc = edf.accuracy(y,lab)
acc = edf.mean(acc)

###################################

# Init Weights
def xavier(shape):
    sq = np.sqrt(3.0/np.prod(shape[:-1]))
    return np.random.uniform(-sq,sq,shape)

C1 = 8
C2 = 16

K1.set(xavier((4,4,1,C1)))
B1.set(np.zeros((C1)))

K2.set(xavier((2,2,C1,C2)))
B2.set(np.zeros((C2)))

W3.set(xavier((C2*25,10)))
B3.set(np.zeros((10)))


# Training loop

BSZ=50
lr=0.001

NUM_EPOCH=10
DISPITER=50
batches = range(0,len(train_lb)-BSZ+1,BSZ)

## Implement Momentum and uncomment following line
edf.init_momentum()


niter=0; avg_loss = 0.; avg_acc = 0.
for ep in range(NUM_EPOCH+1):

    # As we train, let's keep track of val accuracy
    vacc = 0.; vloss = 0.; viter = 0
    for b in range(0,len(val_lb)-BSZ+1,BSZ):
        inp.set(val_im[b:b+BSZ,...]); lab.set(val_lb[b:b+BSZ])
        edf.Forward()
        viter = viter + 1;vacc = vacc + acc.top;vloss = vloss + loss.top
    vloss = vloss / viter; vacc = vacc / viter * 100
    print("%09d: #### %d Epochs: Val Loss = %.3e, Accuracy = %.2f%%" % (niter,ep,vloss,vacc))
    if ep == NUM_EPOCH:
        break

    # Shuffle Training Set
    idx = np.random.permutation(len(train_lb))

    # Train one epoch
    for b in batches:
        # Load a batch
        inp.set(train_im[idx[b:b+BSZ],...])
        lab.set(train_lb[idx[b:b+BSZ]])

        edf.Forward()
        avg_loss = avg_loss + loss.top; avg_acc = avg_acc + acc.top;
        niter = niter + 1
        if niter % DISPITER == 0:
            avg_loss = avg_loss / DISPITER; avg_acc = avg_acc / DISPITER * 100
            print("%09d: Training Loss = %.3e, Accuracy = %.2f%%" % (niter,avg_loss,avg_acc))
            avg_loss = 0.; avg_acc = 0.;

        edf.Backward(loss)
        #edf.SGD(lr)
        # Replace previous line with following
        edf.momentum(lr,0.9)



000000000: #### 0 Epochs: Val Loss = 2.324e+00, Accuracy = 9.60%
000000050: Training Loss = 2.311e+00, Accuracy = 10.28%
000000100: Training Loss = 2.290e+00, Accuracy = 12.32%
000000150: Training Loss = 2.273e+00, Accuracy = 16.68%
000000200: Training Loss = 2.250e+00, Accuracy = 16.32%
000000250: Training Loss = 2.218e+00, Accuracy = 25.24%
000000300: Training Loss = 2.170e+00, Accuracy = 41.12%
000000350: Training Loss = 2.088e+00, Accuracy = 47.16%
000000400: Training Loss = 1.950e+00, Accuracy = 54.60%
000000450: Training Loss = 1.706e+00, Accuracy = 65.20%
000000500: Training Loss = 1.320e+00, Accuracy = 72.32%
000000500: #### 1 Epochs: Val Loss = 1.104e+00, Accuracy = 75.80%
000000550: Training Loss = 1.012e+00, Accuracy = 75.00%
000000600: Training Loss = 8.007e-01, Accuracy = 79.24%
000000650: Training Loss = 6.916e-01, Accuracy = 80.28%
000000700: Training Loss = 5.819e-01, Accuracy = 82.36%
000000750: Training Loss = 5.467e-01, Accuracy = 84.16%
000000800: Training Loss = 5.

KeyboardInterrupt: 