In [1]:
import torch
from torch import Tensor

In [2]:
from torch.autograd import Variable

In [3]:
import numpy as np

In [4]:
import sys, inspect
sys.path.insert(0, '..')

In [5]:
%matplotlib inline
# import pymc
import matplotlib.pyplot as plt

# from learn_smc_proposals import cde
# from learn_smc_proposals.utils import systematic_resample

import seaborn as sns
sns.set_context("notebook", font_scale=1.5, rc={"lines.markersize": 12})
sns.set_style('ticks')

import scipy.stats as stats
import scipy.special as special
import scipy

In [None]:
plt.rcParams["figure.figsize"] = (16,10)

In [None]:
l = 4
n = 1000
x = np.linspace(-l, l, n)
p = (x>1) * stats.norm(loc=0, scale=1).pdf(x)

q = lambda mu, sigma: stats.norm(loc=mu, scale=sigma).pdf(x)
res = scipy.optimize.minimize(fun=lambda args: stats.entropy(p, q(args[0], args[1])), x0=[1, 1], method='Nelder-Mead')
mu_q, sigma_q = res.x

In [None]:
plt.plot(x, p/sum(p*(2*l/n)), label='f(x)p(x|y)')
plt.plot(x, q(mu_q, sigma_q), label='q(x)')

plt.legend()
plt.xlabel('x')
plt.show()

print("mu_q {:.2f}, sigma_q {:.2f}".format(mu_q, sigma_q))

In PyTorch

In [None]:
l = 4
n = 1000
x = Variable(torch.linspace(-l, l, n))
p = (x<1).float() * -1e38 + torch.distributions.Normal(0, 1).log_prob(x)

mean = Variable(Tensor([0]), requires_grad=True)
std = Variable(Tensor([1]), requires_grad=True)

optimizer = torch.optim.Adam([mean, std], lr = 0.01)

for t in range(500):
    q = torch.distributions.Normal(mean, std).log_prob(x)
    loss = torch.dot(p.exp(), p-q)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
mean, std

In [None]:
l = 4
n = 1000
x = Variable(torch.linspace(-l, l, n)).cuda()
p = (x<1).float() * -1e38 + torch.distributions.Normal(0, 1).log_prob(x)

mean = Variable(Tensor([0]).cuda(), requires_grad=True)
std = Variable(Tensor([1]).cuda(), requires_grad=True)

optimizer = torch.optim.Adam([mean, std], lr = 0.01)

for t in range(500):
    q = torch.distributions.Normal(mean, std).log_prob(x)
    loss = torch.dot(p.exp(), p-q)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
mean, std

In [None]:
mu_q, sigma_q

# Model

\begin{align}
p(x) &= \mathcal{N}(x|\mu_0, \sigma_0^2) \\
p(y|x) &= \mathcal{N}(y|x, \sigma^2) \\
q_\phi(x|y) &= \mathcal{N}(x| y; \phi) \\
f(x) &= \mathbb{1}_{x>1}
\end{align}

Optimizing $$\mathbb{E}_{p(x,y)}\left[-f(x) \log q(x \mid y;\phi)\right]$$

In [None]:
batch_size = 3000
epochs = 1000
H = 10

f = lambda x: (x>1).float()

q_params = torch.nn.Sequential(
          torch.nn.Linear(1, H),
          torch.nn.ReLU(),
          torch.nn.Linear(H, H),
          torch.nn.ReLU(),
          torch.nn.Linear(H, 2),
        )
q_params.cuda()

optimizer = torch.optim.Adam(q_params.parameters(), lr = 0.001)

In [None]:
lr = 0.00001

state_dict = optimizer.state_dict()
for param_group in state_dict['param_groups']:
    param_group['lr'] = lr
optimizer.load_state_dict(state_dict)

In [None]:
for e in range(epochs):
#     x = Variable(torch.distributions.Normal(0, 1).sample((batch_size,)))
    x = Variable(torch.distributions.Normal(0, 1).sample_n(batch_size)).cuda()
    y = torch.distributions.Normal(x, 1).sample()
    
#     q_mean, q_log_std = q_params(y.unsqueeze(1)).split(1, dim=1)
    q_mean, q_log_std = q_params(y).split(1, dim=1)
    q_std = q_log_std.exp()
    q = torch.distributions.Normal(q_mean, q_std)
    loss = -torch.mean(f(x) * q.log_prob(x))
#     print(e, loss.data[0].numpy()) if e%1==0 else None
    print(e, loss.data[0]) if e%100==0 else None
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
# x = Variable(Tensor([[-1, 0 ,1, 3, -2]])).transpose(0,1)
y = Variable(Tensor([[-1.1, 0.1, 1.1, 3.1, -2.1]])).transpose(0,1).cuda()

q_mean, q_log_std = q_params(y).split(1, dim=1)
q_std = q_log_std.exp()

q_mean, q_std

May want to substitute fully connected with a NF

# Try pyro IAFs

In [6]:
import pyro
from pyro.distributions.transformed_distribution \
    import InverseAutoregressiveFlow, TransformedDistribution
pyro.__version__

'0.1.2'

In [7]:
mean = Variable(Tensor([0]), requires_grad=True).cuda()
std = Variable(Tensor([1]), requires_grad=True).cuda()

base_dist = pyro.distributions.Normal(mean, std)

In [8]:
iaf = InverseAutoregressiveFlow(input_dim=2, hidden_dim=1)

In [9]:
dist = TransformedDistribution(base_dist, [iaf])

In [10]:
x = Variable(Tensor([[0]]))
dist.log_pdf(x)

AttributeError: 'TransformedDistribution' object has no attribute 'bijectors'

# Random explorations

In [None]:
torch.ones(3,3).tril(-1).t()