In [2]:
import os
import math

import numpy as np
import torch as t
from pathlib import Path
import matplotlib.pyplot as plt
from pyro.infer.mcmc import NUTS, HMC
from pyro.infer.mcmc.api import MCMC

from bnn_priors.data import UCI
from bnn_priors.models import RaoBDenseNet, DenseNet
from bnn_priors.prior import LogNormal
from bnn_priors.inference import SGLDRunner

In [3]:
# TODO: try different data sets
# data = UCI("yacht", 0)
data = UCI("boston", 0)

In [4]:
device = ('cuda' if t.cuda.is_available() else 'cpu')
x_train = data.norm.train_X
y_train = data.norm.train_y

x_test = data.norm.test_X
y_test = data.norm.test_y

In [5]:
# TODO: change model type
# model = RaoBDenseNet(x_train, y_train, 50, noise_std=LogNormal((), -1., 0.2)).to(x_train)
model = DenseNet(x_train.size(-1), y_train.size(-1), 50, noise_std=LogNormal((), -1., 0.2)).to(x_train)

In [8]:
dir(model)

['__abstractmethods__',
 '__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_apply',
 '_backward_hooks',
 '_buffers',
 '_forward_hooks',
 '_forward_pre_hooks',
 '_get_name',
 '_load_from_state_dict',
 '_load_state_dict_pre_hooks',
 '_modules',
 '_named_members',
 '_parameters',
 '_register_load_state_dict_pre_hook',
 '_register_state_dict_hook',
 '_replicate_for_data_parallel',
 '_save_to_state_dict',
 '_slow_forward',
 '_state_dict_hooks',
 '_version',
 'add_module',
 'apply',
 'bfloat16',
 'buffers',
 'children',
 'cpu',
 'cuda',
 'double',
 'dump_patches',
 'eval',
 'extra_repr',
 'float',
 'forw

In [5]:
N_steps = 200 # 2000
warmup = 200 # 2000

In [58]:
# TODO: use SVGD
kernel = HMC(potential_fn=lambda p: model.get_potential(x_train, y_train, eff_num_data=1*x_train.shape[0])(p),
             adapt_step_size=False, adapt_mass_matrix=False,
             step_size=1E-3, num_steps=32)
mcmc = MCMC(kernel, num_samples=N_steps, warmup_steps=warmup, initial_params = model.params_dict())

In [74]:
skip = 5 # 50
cycles =  2
warmup_epochs = warmup
sample_epochs = N_steps * skip // cycles
epochs_per_cycle = 2 * warmup + sample_epochs
temperature = 1.0
momentum = 0.9
precond_update = None
lr = 5E-4

dataloader = t.utils.data.DataLoader(data.norm.train, batch_size = len(data.norm.train), shuffle=True)

mcmc = SGLDRunner(model=model, dataloader=dataloader, epochs_per_cycle=epochs_per_cycle,
                  warmup_epochs=warmup_epochs, sample_epochs=sample_epochs, learning_rate=lr,
                  skip=skip, sampling_decay=True, cycles=cycles, temperature=temperature,
                  momentum=momentum, precond_update=precond_update)

In [90]:
mcmc.run(progressbar=True)
samples = mcmc.get_samples()

Cycle 0, Sampling: 100%|██████████| 900/900 [00:17<00:00, 50.11it/s]
Cycle 1, Sampling: 100%|██████████| 900/900 [00:18<00:00, 48.91it/s]


In [91]:
# TODO: finally fix this '.p' shenanigans
samples = {(key[:-2] if key[-2:] == ".p" else key) : val for key, val in samples.items()}
# TODO: do we still need the learning rate in SVGD?
if "lr" in samples:
    del samples["lr"] 

In [92]:
lps = t.zeros(N_steps, *y_test.shape)

for i in range(N_steps):
    sample = dict((k, v[i]) for k, v in samples.items())
    with t.no_grad(), model.using_params(sample):
        lps[i] = model(x_test).log_prob(y_test)

final_params = dict((k, v[-1]) for k, v in samples.items())
with t.no_grad(), model.using_params(sample):
    P = model(x_test)
    noise_std = model.noise_std()

lps = lps.logsumexp(0) - math.log(N_steps)
lp = lps.mean()

In [93]:
print(lps.mean(), lps.std())

tensor(-1.2337) tensor(1.0185)
