In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

from models import *

In [4]:
def train(model, optimizer, data, target, num_iters):
    for i in range(num_iters):
        out = model(data)
        loss = F.mse_loss(out, target)
        mea = torch.mean(torch.abs(target - out))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 1000 == 0:
            print("\t{}/{}: loss: {:.3f} - mea: {:.3f}".format(
                i+1, num_iters, loss.item(), mea.item())
            )

## Permutation

In [5]:
# permute the first column with the third

A = torch.from_numpy(np.array([
    [0, 1, -1],
    [3, -1, 1],
    [1, 1, -2],
])).float()

B = torch.from_numpy(np.array([
    [-1, 1, -0],
    [1, -1, 3],
    [-2, 1, 1],
])).float()

P = torch.from_numpy(np.array([
    [0, 0, 1],
    [0, 1, 0],
    [1, 0, 0],
])).float()

assert torch.allclose(torch.matmul(A, P), B)

In [6]:
net = NeuralAccumulatorCell(3, 3)
optim = torch.optim.RMSprop(net.parameters(), lr=1e-2)

train(net, optim, A, B, int(1e4))

	1/10000: loss: 1.034 - mea: 0.740
	1001/10000: loss: 0.000 - mea: 0.014
	2001/10000: loss: 0.000 - mea: 0.002
	3001/10000: loss: 0.000 - mea: 0.000
	4001/10000: loss: 0.000 - mea: 0.000
	5001/10000: loss: 0.000 - mea: 0.000
	6001/10000: loss: 0.000 - mea: 0.000
	7001/10000: loss: 0.000 - mea: 0.000
	8001/10000: loss: 0.000 - mea: 0.000
	9001/10000: loss: 0.000 - mea: 0.000


In [7]:
W = torch.tanh(net.W_hat) * torch.sigmoid(net.M_hat)

print("actual: \n{}\n".format(W.transpose(0, 1).data))
print("expected: \n{}\n".format(P))

actual: 
tensor([[-0.0000, -0.0000,  1.0000],
        [-0.0001,  0.9999, -0.0000],
        [ 1.0000, -0.0000, -0.0000]])

expected: 
tensor([[0., 0., 1.],
        [0., 1., 0.],
        [1., 0., 0.]])



## Column Scaling

A single NAC cell can't learn column scaling since the weight matrix is constrained to -1, 0 or 1.

In [16]:
# scale the first column by 5

A = torch.from_numpy(np.array([
    [0, 1, -1],
    [3, -1, 1],
    [1, 1, -2],
])).float()

B = torch.from_numpy(np.array([
    [0, 1, -1],
    [15, -1, 1],
    [5, 1, -2],
])).float()

P = torch.from_numpy(np.array([
    [5, 0, 0],
    [0, 1, 0],
    [0, 0, 1],
])).float()

assert torch.allclose(torch.matmul(A, P), B)

In [28]:
net = NAC(2, 3, 3, 3)
optim = torch.optim.RMSprop(net.parameters(), lr=1e-3)

train(net, optim, A, B, int(1e4))

	1/10000: loss: 5.623 - mea: 1.764
	1001/10000: loss: 1.071 - mea: 0.894
	2001/10000: loss: 0.101 - mea: 0.269
	3001/10000: loss: 0.035 - mea: 0.141
	4001/10000: loss: 0.007 - mea: 0.050
	5001/10000: loss: 0.001 - mea: 0.022
	6001/10000: loss: 0.000 - mea: 0.011
	7001/10000: loss: 0.000 - mea: 0.005
	8001/10000: loss: 0.000 - mea: 0.003
	9001/10000: loss: 0.000 - mea: 0.002


## Column Elimination

In [20]:
def basis_vec(k, n):
    """Creates the k'th standard basis vector in R^n."""
    error_msg = "[!] k cannot exceed {}.".format(n)
    assert (k < n), error_msg
    b = np.zeros([n, 1])
    b[k] = 1
    return b

In [21]:
# add -3x the second column to the first => P = (I - (c)(e_k)(e_l.T))

A = torch.from_numpy(np.array([
    [3, 1, -1],
    [3, -1, 1],
    [1, 1, -2],
])).float()

B = torch.from_numpy(np.array([
    [0, 1, -1],
    [6, -1, 1],
    [-2, 1, -2],
])).float()

P = torch.from_numpy(
    np.eye(3) + (-3)*basis_vec(1, 3).dot(basis_vec(0, 3).T)
).float()

assert torch.allclose(torch.matmul(A, P), B)

In [27]:
net = NAC(2, 3, 3, 3)
optim = torch.optim.RMSprop(net.parameters(), lr=1e-3)

train(net, optim, A, B, int(1e4))

	1/10000: loss: 4.037 - mea: 1.414
	1001/10000: loss: 0.437 - mea: 0.520
	2001/10000: loss: 0.084 - mea: 0.257
	3001/10000: loss: 0.017 - mea: 0.112
	4001/10000: loss: 0.001 - mea: 0.022
	5001/10000: loss: 0.000 - mea: 0.009
	6001/10000: loss: 0.000 - mea: 0.005
	7001/10000: loss: 0.000 - mea: 0.002
	8001/10000: loss: 0.000 - mea: 0.001
	9001/10000: loss: 0.000 - mea: 0.001
