In [1]:
import node
import numpy as np
import cupy as cp

Works on GPU


In [2]:
from node.node import _single_oprand_op

class Activate(node.Op):

    def __init__(self, x, *args):
        super(Activate, self).__init__()
        self.register(x)
        self.output = self.forward()
        
    def forward(self):
        x = self.cache[0]
        return cp.sign(x.value)

    def backward(self, error):
        x = self.cache[0]
        x.accumulate(error * 1 * (cp.abs(self.output) <= 1))
    
@_single_oprand_op
def activate(self):
    return Activate(self)

setattr(node.Node, "activate", activate)

In [3]:
class BinaryLinear(node.Layer):
    
    def __init__(self, num_in_units, num_h_units):
        super(BinaryLinear, self).__init__()
        
        self.parameters = {
            "W": node.Node(cp.random.randn(num_in_units, num_h_units), name="W")
        }
        
    def __call__(self, input):
        return input.dot(self.parameters["W"].activate())

In [4]:
class BinaryNet(node.Network):
    
    def __init__(self, num_in_units, num_h_units, num_out_units):
        self.layers = [BinaryLinear(num_in_units, num_h_units),
                       node.BatchNormalization(num_h_units),
                       BinaryLinear(num_h_units, num_h_units),
                       node.BatchNormalization(num_h_units),
                       BinaryLinear(num_h_units, num_out_units),
                       node.BatchNormalization(num_out_units)]
        
    def __call__(self, input):
        hidden = input
        hidden = self.layers[1](self.layers[0](hidden)).activate()
        hidden = self.layers[3](self.layers[2](hidden)).activate()
        hidden = self.layers[5](self.layers[4](hidden))
        return hidden
    
classifier = BinaryNet(784, 1024, 10)
optimizer = node.Adam(classifier.get_parameters(), eta=0.001)

Has 1865748 parameters


In [5]:
train_dataset = node.MNIST(train=True, flatten=True)
train_dataloader = node.DataLoader(train_dataset, 100)

In [6]:
test_dataset = node.MNIST(train=False, flatten=True)
test_dataloader = node.DataLoader(test_dataset, 100)

In [7]:
def train(input, target):
    output = classifier(input / 255).softmax_with_binary_cross_entropy(target)
    optimizer.clear()
    output.backward()
    optimizer.update()
    return output.numpy()

In [8]:
def measure(prediction, target):
    prediction = np.argmax(prediction, axis=1)
    target = np.argmax(target, axis=1)
    return np.sum(np.where(prediction == target, 1, 0))

In [9]:
def evaluate(input, target):
    with node.zero_grad():
        prediction = classifier(input/255)
        output = prediction.softmax_with_binary_cross_entropy(target)
        
    loss = output.numpy()
    accuracy = measure(prediction.numpy(), target.numpy())
    return loss, accuracy

In [10]:
for epoch in range(26):
    
    # Train Loss, Test Loss, Accuracy
    metrics = [0, 0, 0]
    
    classifier.train()
    for input, target in train_dataloader:
        metrics[0] += train(input, target)

    classifier.test()
    for input, target in test_dataloader:
        loss, accuracy = evaluate(input, target)
        metrics[1] += loss 
        metrics[2] += accuracy
        
    metrics[0] /= len(train_dataloader)
    metrics[1] /= len(test_dataloader)
    metrics[2] /= 100 * len(test_dataloader)
    
    if epoch % 5 == 0:
        print("epoch {0:3}, training loss {1:.4f}, test loss {2:.4f}, accuracy {3:.4f}".format(epoch, *metrics))

epoch   0, training loss 1.7738, test loss 1.1775, accuracy 0.8756
epoch   5, training loss 0.5439, test loss 0.4837, accuracy 0.9222
epoch  10, training loss 0.4234, test loss 0.4054, accuracy 0.9301
epoch  15, training loss 0.3820, test loss 0.3828, accuracy 0.9324
epoch  20, training loss 0.3537, test loss 0.3679, accuracy 0.9339
epoch  25, training loss 0.3355, test loss 0.3604, accuracy 0.9345
