In [1]:
import numpy as np
import pandas as pd
import torch
sys.path.append("./semi-supervised-pytorch/semi-supervised")

In [2]:
from models import VariationalAutoencoder
from layers import GaussianSample
model = VariationalAutoencoder([512, 128, [256,]])
model

  init.xavier_normal(m.weight.data)


VariationalAutoencoder(
  (encoder): Encoder(
    (hidden): ModuleList(
      (0): Linear(in_features=512, out_features=256, bias=True)
    )
    (sample): GaussianSample(
      (mu): Linear(in_features=256, out_features=128, bias=True)
      (log_var): Linear(in_features=256, out_features=128, bias=True)
    )
  )
  (decoder): Decoder(
    (hidden): ModuleList(
      (0): Linear(in_features=128, out_features=256, bias=True)
    )
    (reconstruction): Linear(in_features=256, out_features=512, bias=True)
    (output_activation): Sigmoid()
  )
)

In [3]:
from torch.autograd import Variable
gaussian = GaussianSample(10, 1)
z, mu, log_var = gaussian(Variable(torch.ones(1, 10)))

print(f"sample {float(z.data):.2f} drawn from N({float(mu.data):.2f}, {float(log_var.exp().data):.2f})")

sample -0.60 drawn from N(-0.69, 1.61)


In [4]:
gaussian(Variable(torch.ones(1, 10)))

(tensor([[0.8089]], grad_fn=<AddcmulBackward0>),
 tensor([[-0.6889]], grad_fn=<AddmmBackward0>),
 tensor([[0.4750]], grad_fn=<SoftplusBackward0>))

In [5]:
print(model._kld.__doc__)


        Computes the KL-divergence of
        some element z.

        KL(q||p) = -∫ q(z) log [ p(z) / q(z) ]
                 = -E[log p(z) - log q(z)]

        :param z: sample from q-distribuion
        :param q_param: (mu, log_var) of the q-distribution
        :param p_param: (mu, log_var) of the p-distribution
        :return: KL(q||p)
        


In [6]:
from UJIDataset import UJIDataset

In [7]:
# We use this custom BCE function until PyTorch implements reduce=False
def binary_cross_entropy(r, x):
    return -torch.sum(x * torch.log(r + 1e-8) + (1 - x) * torch.log(1 - r + 1e-8), dim=-1)

optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, betas=(0.9, 0.999))

import torch.utils.data

tr_dataset = UJIDataset()
train_loader = torch.utils.data.DataLoader(dataset=tr_dataset, batch_size=128, num_workers=4, shuffle=True)

In [8]:
for epoch in range(50):
    model.train()
    total_loss = 0
    for (u, _) in enumerate(train_loader):
        u = Variable(u)

        reconstruction = model(u)
        
        likelihood = -binary_cross_entropy(reconstruction, u)
        elbo = likelihood - model.kl_divergence
        
        L = -torch.mean(elbo)

        L.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += L.data[0]

    m = len(train_loader)

    print(f"Epoch: {epoch}\tL: {total_loss/m:.2f}")