In [1]:
import node 
import numpy as np

Works on CPU


In [2]:
from tqdm import tqdm_notebook as tqdm

## Solver Construction

In [3]:
class EularMethod(object):
    
    def __init__(self, fn, input):
        self.fn = fn
        self.value = input
    
    def step(self, time, diff, input):
        return diff * self.fn(time, input)
    
    def integrate(self, seq):
        outputs = []        
        for t0, t1 in zip(seq[:-1], seq[1:]):
            dout = self.step(t0, t1 - t0, self.value)
            self.value = dout + self.value
            outputs.append(self.value)
        return outputs

In [4]:
def solve(fn, input, seq):
    solver = EularMethod(fn, input)
    outputs = solver.integrate(seq)
    return outputs

## Model Construction

Model consists of   　　
```sequence
DownSampler ===> NeuralODEBlock x n ===> Classifier
```

In [None]:
class DownSampler(node.Network):
    
    def __init__(self):
        self.layers = [node.Convolution2D(1, 64, 3, 1),
                       node.BatchNormalization(64),
                       node.Convolution2D(64, 64, 4, 2, 1),
                       node.BatchNormalization(64),
                       node.Convolution2D(64, 64, 4, 2, 1)]
        
    def __call__(self, input):
        hidden = input
        
        # Block 1
        # Output: 64 x 26 x 26
        hidden = self.layers[0](hidden)
        hidden = self.layers[1](hidden)
        hidden = hidden.relu()
        
        # Block 2 
        # Output: 64 x 13 x 13
        hidden = self.layers[2](hidden)
        hidden = self.layers[3](hidden)
        hidden = hidden.relu()
        
        # Block 3
        # Output: 64 x 6 x 6
        hidden = self.layers[4](hidden)
        
        return hidden

In [None]:
class ConcatenatedConvolution2D(node.Network):
    
    def __init__(self, num_in_ch, num_out_ch, *args):
        self.layers = [node.Convolution2D(num_in_ch+1, num_out_ch, *args)]
        
    def __call__(self, time, input):
        hidden = node.Node(np.ones_like(input.value[:, :1, :, :])) * time
        hidden = node.concatenate([hidden, input], 1)
        hidden = self.layers[0](hidden)
        return hidden

class NeuralODEBlock(node.Network):
    
    def __init__(self):
        self.layers = [node.BatchNormalization(64),
                       ConcatenatedConvolution2D(64, 64, 3, 1, 1),
                       node.BatchNormalization(64),
                       ConcatenatedConvolution2D(64, 64, 3, 1, 1),
                       node.BatchNormalization(64)]
        
        self.start2stop = np.arange(0, 2, 1)
        
    def fn(self, time, input):
        hidden = input
        
        # Block 1 
        hidden = self.layers[0](hidden)
        hidden = hidden.relu()
        
        # Block 2
        hidden = self.layers[1](time, hidden)
        hidden = self.layers[2](hidden)
        
        # Block 3
        hidden = self.layers[3](time, hidden)
        hidden = self.layers[4](hidden)
        
        return hidden
    
    def __call__(self, input):
        output = solve(self.fn, input, self.start2stop)
        return output[-1]

In [None]:
class Classifier(node.Network):
    
    def __init__(self):
        self.layers = [node.BatchNormalization(64),
                       node.Linear(2304, 10)]
        
    def __call__(self, input):
        hidden = input
        
        # Block 1 
        # Output: 64 x 6 x 6
        hidden = self.layers[0](hidden)
        hidden = hidden.relu()
        
        # Fully-connected Layer
        hidden = hidden.reshape(input.value.shape[0], -1)
        hidden = self.layers[1](hidden)
        
        return hidden

In [None]:
class MainClassifier(node.Network):
    
    def __init__(self):
        self.layers = [DownSampler(),
                       NeuralODEBlock(),
                       Classifier()]
        
    def __call__(self, input):
        hidden = input
        hidden = self.layers[0](hidden)
        hidden = self.layers[1](hidden)
        hidden = self.layers[2](hidden)
        return hidden
    
classifier = MainClassifier()
optimizer = node.Adam(classifier.get_parameters(), 0.001)

# Slightly larger than 0.22M because I did not use adaptive average pooling layer
# Rather, I used fully-connected layer
print("parameter size: {}".format(classifier.get_num_parameters()))

parameter size: 230666


## Training Procedure

In [None]:
mini_batch_size = 100

datasets = [node.MNIST(train=True), 
            node.MNIST(train=False)]

dataloaders = [node.DataLoader(datasets[0], mini_batch_size),
               node.DataLoader(datasets[1], mini_batch_size)]

In [None]:
def train(input, target):
    prediction = classifier(input / 255)
    output = prediction.softmax_with_binary_cross_entropy(target)
    
    optimizer.clear()
    output.backward()
    optimizer.update()
    
    return output.numpy()

In [None]:
for epoch in range(11):
    # Train Loss, Test Loss, Accuracy
    metrics = [0, 0, 0]

    for input, target in tqdm(dataloaders[0]):
        metrics[0] += train(input, target)
        
    if epoch % 5 == 0:
        print("epoch {0:2}, train {1:.4f}, test {2:.4f}, acc {3:.4f}".format(epoch, *metrics))

HBox(children=(IntProgress(value=0, max=600), HTML(value='')))

In [None]:
seq = [node.Node(np.random.randn(3, 1, 3)) for _ in range(10)]
seq = node.concatenate(seq, 1)
print(seq.value.shape)
seq.backward()