In [12]:
import torch
from torch.autograd import Variable, grad
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions.normal import Normal
from torch.nn.utils import clip_grad_norm

import matplotlib.pyplot as plt
%matplotlib inline

In [86]:
class NNBlock(torch.nn.Module):
    def __init__(self, features):
        """ Mini neural network"""
        super(NNBlock, self).__init__()
        self.features = features
        
    def forward(self, x):
        print(self.features, x.shape)
        x = F.leaky_relu(nn.Linear(self.features, self.features)(x))
        x = F.leaky_relu(nn.Linear(self.features, self.features)(x))
        x = F.leaky_relu(nn.Linear(self.features, self.features)(x))
        return x
        
class InvertibleBlock(torch.nn.Module):
    def __init__(self, features):
        """ Invertible block for invertible neural network
        """
        super(InvertibleBlock, self).__init__()
        self.features = features
        self.s1 = NNBlock(self.features//2)
        self.s2 = NNBlock(self.features//2)
        self.t1 = NNBlock(self.features//2)
        self.t2 = NNBlock(self.features//2)

    def forward(self, u):
        """ Prediction step for invertible block"""        
        u1, u2 = torch.split(u, 2, dim=1)
        v1 = u1 * torch.exp(self.s2(u2)) + self.t2(u2)
        v2 = u2 * torch.exp(self.s2(u1)) + self.t2(u1)
        return torch.cat((v1, v2), dim=1)
            
    def invert(self, v):
        """ Inverse prediction step for invertible block """
        v1, v2 = torch.split(v, 2, dim=1)
        u2 = (v2 - self.t1(v1)) * torch.exp(-self.s1(v1))
        u1 = (v1 - self.t2(v2)) * torch.exp(-self.s2(u2))
        return torch.cat((u1, u2), dim=1)    

In [87]:
import numpy as np
r1, r2 = 0, 10
N = 100
x = (r1 - r2) * torch.rand(N, D) + r2

In [88]:
# nn.Linear(D, 1)(NNBlock(D)(nn.Linear(1, D)(torch.rand(N, 1))))

In [89]:
D = 4
model = nn.Sequential(
    nn.Linear(1, D), 
    InvertibleBlock(D),
    nn.Linear(D, 1)    
)

def loss_function(dFdx, logfx, x):
    err = dFdx(x) * torch.exp(x**2/2) - logfx(x)
    mse = torch.mean(torch.pow(err, 2))
    return mse

In [90]:
logfx = lambda x: (x - 2)**2 / 4

In [95]:
import tqdm
optimizer = optim.Adamax(model.parameters(), lr=0.001)
epochs = 100

for i in tqdm.tqdm(range(epochs)):
    optimizer.zero_grad()
    
    dFdx = model.grad_fn
    loss = loss_function(dFdx, logfx, x)
    loss.backward()
    optimizer.step()

  0%|          | 0/100 [00:00<?, ?it/s]


AttributeError: 'Sequential' object has no attribute 'grad_fn'

In [42]:
x

tensor([[5.1739, 6.4259, 2.1677],
        [7.8291, 6.7644, 8.2152],
        [1.7953, 9.0421, 5.4770],
        [1.3593, 9.2200, 6.7415],
        [0.3938, 6.8967, 7.2334],
        [1.7706, 3.1816, 7.4935],
        [2.4664, 7.1811, 2.0965],
        [7.6287, 8.5933, 7.4886],
        [2.9009, 4.4174, 9.5906],
        [3.2544, 6.6561, 9.7100],
        [1.3539, 3.6694, 2.0668],
        [2.0109, 1.5469, 9.8021],
        [5.5447, 7.2284, 3.8696],
        [6.4288, 6.3455, 6.5286],
        [7.8471, 7.9702, 8.0500],
        [0.5765, 0.9542, 8.0135],
        [9.7116, 4.6032, 9.8829],
        [9.3068, 1.4064, 3.4627],
        [3.1410, 5.7822, 4.8473],
        [3.9539, 2.4349, 8.5069],
        [6.4294, 2.0363, 7.3910],
        [5.6892, 3.5412, 2.2405],
        [0.5620, 1.1726, 2.3146],
        [6.0615, 6.0958, 7.3219],
        [1.4351, 1.0259, 2.4657],
        [4.1600, 3.1493, 1.1988],
        [9.1426, 3.7643, 2.0887],
        [3.3389, 9.2688, 1.2136],
        [7.5713, 3.5864, 3.9415],
        [2.836