In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt

import torch.nn as nn

In [None]:
def generate_data(n_samples):
    """
    Sinusoidal function as defined in D'Angelo et al.
    """
    x = np.concatenate((np.random.uniform(1.5, 2.5, n_samples // 2), np.random.uniform(4.5, 6.0, n_samples - n_samples // 2)))
    y = x * np.sin(x) + np.random.normal(0, 0.25, n_samples)
    return x, y

In [None]:
x, y = generate_data(128)
xx = np.linspace(0, 8)
plt.scatter(x, y, marker=".", color="k", alpha=0.2)
plt.plot(xx, xx*np.sin(xx), color="C1")
plt.ylim(-6,4)

In [None]:
#Or for more data

batch_size = 64
n_samples = batch_size*10

x, y = generate_data(n_samples)
xx = np.linspace(0, 8)
plt.scatter(x, y, marker=".", color="k", alpha=0.2)
plt.plot(xx, xx*np.sin(xx), color="C1")
plt.ylim(-6,4)

In [None]:
# The neural network should be defined as "model"

In [None]:
# After training, you can use the model for prediction
model.eval()
with torch.no_grad():
    xx_tensor = torch.tensor(xx, dtype=torch.float32).unsqueeze(1)
    mean_pred, log_var_pred = model(xx_tensor)
    mean_pred = mean_pred.squeeze().numpy()
    var_pred = torch.exp(log_var_pred).squeeze().numpy()
    std_pred = np.sqrt(var_pred)

# Plotting the results
plt.figure(figsize=(8, 6))
plt.scatter(x, y, marker=".", color="k", alpha=0.2, label='Data')
plt.plot(xx, xx * np.sin(xx), color="C1", label='True function')
plt.plot(xx, mean_pred, color="C0", label='Predicted mean')
plt.fill_between(xx, mean_pred - std_pred, mean_pred + std_pred, color="C0", alpha=0.2, label='+/- 1 std dev')
plt.ylim(-6, 4)
plt.legend()
plt.show()

# MC Dropout

In [None]:
predictions = np.array(predictions)
var_pred = torch.exp(log_var_pred_dropout).squeeze().numpy()
std_pred = np.sqrt(std_pred)

# Calculate mean and standard deviation across the MC dropout predictions
mc_mean = np.mean(predictions, axis=0)
mc_std = np.std(predictions, axis=0)

# Plotting the results with MC Dropout uncertainty
plt.figure(figsize=(8, 6))
plt.scatter(x, y, marker=".", color="k", alpha=0.2, label='Data')
plt.plot(xx, xx * np.sin(xx), color="C1", label='True function')
plt.plot(xx, mc_mean, color="C0", label='MC Dropout Mean')
plt.fill_between(xx, mc_mean - mc_std, mc_mean + mc_std, color="C0", alpha=0.2, label='+/- 1 std dev (MC Dropout)')
plt.fill_between(xx, mc_mean - std_pred, mc_mean + std_pred, color="C1", alpha=0.2, label='+/- 1 std dev')
plt.ylim(-6, 4)
plt.legend()
plt.show()

# Variational Inference

In [None]:
vb_predictions = np.array(vb_predictions)
vb_variance = np.array(torch.exp(vb_log_var_pred.mean(dim=0)))
vb_pred_std = np.sqrt(vb_variance)

# Calculate mean and standard deviation across the VB predictions
vb_mean = np.mean(vb_predictions, axis=0)
vb_std = np.std(vb_predictions, axis=0)

# Plotting the results with VB uncertainty
plt.figure(figsize=(8, 6))
plt.scatter(x, y, marker=".", color="k", alpha=0.2, label='Data')
plt.plot(xx, xx * np.sin(xx), color="C1", label='True function')
plt.plot(xx, vb_mean, color="C0", label='VB Mean')
plt.fill_between(xx, vb_mean - vb_std, vb_mean + vb_std, color="C0", alpha=0.2, label='+/- 1 std dev (VB)')
plt.fill_between(xx, vb_mean - vb_pred_std, vb_mean + vb_pred_std, color="C1", alpha=0.2, label='+/- 1 std dev')
plt.ylim(-6, 4)
plt.legend()
plt.show()

# Deep ensembles

In [None]:

# Plotting the results for deep repulsive ensembles
plt.figure(figsize=(8, 6))
plt.scatter(x, y, marker=".", color="k", alpha=0.2, label='Data')
plt.plot(xx, xx * np.sin(xx), color="C1", label='True function')
plt.plot(xx, ensemble_mean, color="C0", label='Ensemble Mean')
plt.fill_between(xx, ensemble_mean - ensemble_std, ensemble_mean + ensemble_std, color="C0", alpha=0.2, label='+/- 1 std dev (Ensemble)')
plt.ylim(-6, 4)
plt.legend()
plt.title("StackedGaussianRegressionNet Predictions")
plt.show()

Food for thought:
- What are the trade-offs in terms of computational cost and performance between these methods?
- How would the choice of activation function or network architecture impact the uncertainty estimates?
- How does the dropout rate affect the uncertainty estimates? How could you systematically explore different dropout rates?
- Why are the variational inference uncertainties unreliable? Can you think of possible solutions to improve the predictive uncertaintites?
- How does the number of models in the ensemble affect the uncertainty estimates?