In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from activations import Cosine
from networks import WHVIRegression
from layers import WHVILinear
from torch_datasets import ToyDataset

In [2]:
torch.manual_seed(0)  # Seed for reproducibility
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')

# Data
dataset = ToyDataset(n=128, device=device)
data_loader = DataLoader(dataset, batch_size=64)

In [3]:
net = WHVIRegression([
    nn.Linear(1, 128),
    Cosine(),
    WHVILinear(128, lambda_=0.01),
    Cosine(),
    WHVILinear(128, lambda_=0.01),
    Cosine(),
    nn.Linear(128, 1)
])

gamma=0.0005
p = 0.3
optimizer = optim.Adam(net.parameters(), lr=0.001)
lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lambda t: (1 + gamma * t)**(-p))

In [None]:
net = net.to(device)
net.train_model(data_loader, optimizer, epochs1=500, epochs2=5000)

[Fix. var.] KL = 206.87, MNLL = 18.61: 100%|██████████| 500/500 [00:15<00:00, 31.86it/s]     
[Opt. var.] KL = 2.13, MNLL = -5.57:  27%|██▋       | 1373/5000 [00:43<01:57, 30.78it/s]         

In [None]:
net.eval()
net.eval_samples = 1500
x_test = torch.reshape(torch.linspace(-2, 3, 1000), (-1, 1))
y_test = dataset.f(x_test)
y_pred = net(x_test)

In [None]:
plt.figure()
plt.ylim(-1, 2.5)
plt.xlim(-2, 3)
for i in range(y_pred.size()[2]):
    plt.plot(
        x_test.numpy().ravel(),
        y_pred[..., i].detach().numpy().ravel(),
        c='r', 
        alpha=0.05
    )

plt.scatter(dataset.x, dataset.y, ec='k', label='Noisy training measurements')
plt.plot(x_test, y_test, label='True function')
plt.legend()
plt.show()