Os modelos que usei para a dissertação nao tem o ruído de saída como parâmetro (são fixos durante todo o treino e determinados por validação cruzada), com excessão do **Probabilistic Backprop** (**PBP**) cuja saída é média e variância preditivas e a variância estimada para o ruído aditivo de observação.

Uma abordagem mais interessante é estabelecer tal variável de ruído como parâmetro tunável interno do modelo. Como participa do cálculo da função objetivo, terá seu valor atualizado pelo otimizador. Nesse caso, haverá um parâmetro de ruído único para toda e qualquer entrada, logo o modelo será homocedástico. Note que esse representará o ruído dos dados (incerteza aleátoria) e não a do modelo (incerteza epistêmica), essa última é calculada fazendo-se MC nos pesos da rede e calculando a variância observada. Caso se deseje que o modelo seja capaz de predizer a incerteza heterocedástica, o parâmetro de variância deverá fazer parte da saída da rede. Os dois tipos de variância podem coexistir no modelo.

Para que sejam compatíveis com o **PBP**, certamente deve ser implementado nos outros modelos a capacidade de prever o ruído aditivo de saída (o parâmetro tunável atrelado ao modelo). E quanto a predição da variância pela rede, deve ser inclusa também ou apenas a oriunda da integração por amostragem de MC é suficiente?

Refletindo sobre o significado e origem desses dois termos, chega-se a conclusão que o MC é suficiente para os outros modelos e a rede não deve prever um termo de variância. Esse último corresponde a uma variânca que a rede preve para a *entrada* enquanto o PBP calcula sua variância analiticamente a partir da distribuição dos *pesos* e isso (variância oriunda dos pesos) é feito nos outros modelos por MC.

In [None]:
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [None]:
import time
import torch
import theano
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from torch import autograd

from bayesnn.datasets import Dataset

from bayesnn.uci_code.experiments_pbp import ExperimentPBPReg
from bayesnn.uci_code.experiments import ExperimentBBBMLPReg, ExperimentVadamMLPReg, ExperimentDropoutMLPReg

from bayesnn.utils import plot_1d_results

In [None]:
sns.palplot(sns.color_palette())

In [None]:
sns.set('paper', 'white', 'colorblind', font_scale=2.2,
        rc={'lines.linewidth': 2,
            'lines.markersize': 10,
            'figure.figsize': (6.0, 6.0),
            'image.interpolation': 'nearest',
            'image.cmap': 'gray',
            'text.usetex' : True,
            }
        )

In [None]:
def plot_curve(x_train, y_train, x_domain, means, aleatoric, epistemic, ideal_output=None):
    total_unc = (aleatoric**2 + epistemic**2)**0.5

    fig = plt.figure(figsize = (6, 5))
    plt.style.use('default')
    c = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
         '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

    plt.scatter(x_train, y_train, s = 10, marker = 'x', color = c[3], alpha = 0.5, label = 'data samples')

    plt.fill_between(x_domain, means + aleatoric, means + total_unc, color = c[0], alpha = 0.3, label = r'model uncertainty')
    plt.fill_between(x_domain, means - total_unc, means - aleatoric, color = c[0], alpha = 0.3)
    plt.fill_between(x_domain, means - aleatoric, means + aleatoric, color = c[1], alpha = 0.4, label = r'data uncertainty')

    plt.plot(x_domain, means, color = 'black', linewidth = 1, label = 'model mean')
    
    if ideal_output is not None:
        plt.plot(x_domain, ideal_output, color = c[3], linewidth = 1)
    
    # plt.xlim([-5, 5])
    plt.ylim([-8, 11])
    plt.xlabel('$x$')
    plt.ylabel('$y$')

    plt.gca().yaxis.grid(alpha=0.3)
    plt.gca().xaxis.grid(alpha=0.3)
    
    return fig


## Toy set #1

https://github.com/microsoft/deterministic-variational-inference/blob/master/ToyData.ipynb

In [None]:
import GPy

np.random.seed(2)
no_points = 400
lengthscale = 1
variance = 1.0
sig_noise = 0.3
x = np.random.uniform(-3, 3, no_points)[:, None]
x.sort(axis = 0)

k = GPy.kern.RBF(input_dim = 1, variance = variance, lengthscale = lengthscale)
C = k.K(x, x) + np.eye(no_points)*sig_noise**2

y = np.random.multivariate_normal(np.zeros((no_points)), C)[:, None]
y = (y - y.mean())
x_train = x[75:325]
y_train = y[75:325]

# Do similar procedure to create test samples (using the whole range)
x_test = np.random.uniform(-5, 5, 100)[:, None]
x_test.sort(axis = 0)
C_test = k.K(x_test, x_test) + np.eye(100)*sig_noise**2
y_test = np.random.multivariate_normal(np.zeros((100)), C_test)[:, None]
y_test = (y_test - y_test.mean())

# X domain for visualizing learned model
x_domain = torch.linspace(-5,5,100)[..., None]

In [None]:
# np.random.seed(seed)

def base_model(x):
    return -(x+1.0)*np.sin(3 * np.pi *x)

def noise_model(x):
    return 0.3*np.ones_like(x)
#     return 0.45*(x+0.5)**2

def sample_data(x):
    return base_model(x) + np.random.normal(0, noise_model(x))

train_size, test_size = 400, 100

# X values: U[-0.5, 0.5]
x_train = np.random.rand(train_size, 1) - 0.5
y_train = sample_data(x_train)

# Y values: U[-0.5, 0.5]
x_test = np.random.rand(test_size, 1) - 0.5
y_test = sample_data(x_test)

# X domain for visualizing learned model
x_domain = torch.linspace(-4,4,100)[..., None]

## Bayes By Backprop (BBB)

### 400 points

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
#               'prior_prec': lengthscale**2, # = 1
              'prior_prec': 1, # = 1
              'noise_prec': 1/0.3**2 * y_train.var()}

train_params={'batch_size': len(x_train),
              'num_epochs': 2000,
              'seed': 2,
              'train_mc_samples': 10,
              'eval_mc_samples': 100}

dataset = 'toydata1d'
data_params={'x_points': (x_train, x_test),
             'y_points': (y_train, y_test)}

optim_params={'optim': 'adam',
              'learning_rate': 0.01,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

BBB = ExperimentBBBMLPReg(dataset,
                            model_params,
                            train_params,
                            optim_params,
                            data_params,
                            evals_per_epoch=1,
                            normalize_x=True,
                            normalize_y=True,
                            use_cuda=torch.cuda.is_available(),
                            print_freq=199
    )

BBB.run(log_metric_history=False)

In [None]:
x_mean, x_std = BBB.x_means.cpu().numpy(), BBB.x_stds.cpu().numpy()
y_mean, y_std = BBB.y_mean.cpu().numpy(), BBB.y_std.cpu().numpy()

out = BBB.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std + y_mean

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
aleatoric = torch.exp(-0.5*BBB.log_noise.detach()).item()*y_std
epistemic = out.var(axis = 0)**0.5

fig = plot_curve(x_train, y_train, x_domain.view(-1).cpu().numpy()*x_std + x_mean,  means,
                 aleatoric, epistemic)

fig.savefig('figures/chap4/toy1d/bbb/large-sample.pdf', bbox_inches='tight', dpi=200)

### 40-points

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
#               'prior_prec': lengthscale**2, # = 1
              'prior_prec': 1, # = 1
              'noise_prec': 1/0.3**2* y_train.var()}

train_params={'batch_size': len(x_train),
              'num_epochs': 2000,
              'seed': 2,
              'train_mc_samples': 10,
              'eval_mc_samples': 100}

dataset = 'toydata1d'
data_params={'x_points': (x_train[:40], x_test),
             'y_points': (y_train[:40], y_test)}

optim_params={'optim': 'adam',
              'learning_rate': 0.01,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

BBB_small = ExperimentBBBMLPReg(dataset,
                            model_params,
                            train_params,
                            optim_params,
                            data_params,
                            evals_per_epoch=1,
                            normalize_x=True,
                            normalize_y=True,
                            use_cuda=torch.cuda.is_available(),
                            print_freq=199
    )

BBB_small.run(log_metric_history=False)

In [None]:
x_mean, x_std = BBB_small.x_means.cpu().numpy(), BBB_small.x_stds.cpu().numpy()
y_mean, y_std = BBB_small.y_mean.cpu().numpy(), BBB_small.y_std.cpu().numpy()

out = BBB_small.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std + y_mean

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
aleatoric = torch.exp(-0.5*BBB_small.log_noise.detach()).item()*y_std
epistemic = out.var(axis = 0)**0.5

fig = plot_curve(x_train[:40], y_train[:40], x_domain.view(-1).cpu().numpy()*x_std + x_mean,  means,
                 aleatoric, epistemic)

fig.savefig('figures/chap4/toy1d/bbb/small-sample.pdf', bbox_inches='tight', dpi=200)

## Monte Carlo Dropout (MCDO)

### 400-points

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
              'dropout': 0.03 ,
              'prior_prec': 1, # lengthscale = 1
              'noise_prec': 1/0.3**2 * y_train.var()}

train_params={'batch_size': len(x_train),
              'num_epochs': 10000, # 20000
              'seed': 2,
              'train_mc_samples': 1,
              'eval_mc_samples': 100}

optim_params={'optim': 'adam',
              'learning_rate': 0.01,
#               'betas': 0.,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

dataset = 'toydata1d'
data_params={'x_points': (x_train, x_test),
             'y_points': (y_train, y_test)}

# Manually set a value for weight decay (wd) by forcing the prior precision
# wd = 1e-4
# model_params['prior_prec'] = 10
# model_params['prior_prec'] = (2 * model_params['noise_prec'] * len(x_train))*wd/(1 - model_params["dropout"])

print('prior prec', model_params['prior_prec'])
print('lenghtscale', np.sqrt(model_params["prior_prec"]))
print('wd', (1 - model_params["dropout"]) * np.sqrt(model_params["prior_prec"]) ** 2 / (2 * model_params["noise_prec"] * len(x_train)))

In [None]:
Dropout = ExperimentDropoutMLPReg(dataset,
                                    model_params,
                                    train_params,
                                    optim_params,
                                    data_params=data_params,
                                    evals_per_epoch=1,
                                    normalize_x=True,
                                    normalize_y=True,
                                    use_cuda=torch.cuda.is_available(),
                                    print_freq=1999)

Dropout.run(log_metric_history=False)

In [None]:
x_mean, x_std = Dropout.x_means.cpu().numpy(), Dropout.x_stds.cpu().numpy()
y_mean, y_std = Dropout.y_mean.cpu().numpy(), Dropout.y_std.cpu().numpy()

out = Dropout.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std + y_mean

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
aleatoric = 1/np.sqrt(model_params['noise_prec'])*y_std
epistemic = out.var(axis = 0)**0.5

fig = plot_curve(x_train, y_train, x_domain.view(-1).cpu().numpy()*x_std + x_mean,  means, aleatoric, epistemic)
fig.savefig('figures/chap4/toy1d/mcdo/large-sample.pdf', bbox_inches='tight', dpi=200)

### 40-point

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
              'dropout': 0.05 ,
              'prior_prec': 1, # lengthscale = 1
              'noise_prec': 1/0.3**2 * y_train.var()}

train_params={'batch_size': len(x_train),
              'num_epochs': 10000, # 20000
              'seed': 2,
              'train_mc_samples': 1,
              'eval_mc_samples': 100}

optim_params={'optim': 'adam',
              'learning_rate': 0.01,
#               'betas': 0.,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

dataset = 'toydata1d'
data_params={'x_points': (x_train[:40], x_test),
             'y_points': (y_train[:40], y_test)}

# Manually set a value for weight decay (wd) by forcing the prior precision
# wd = 1e-4
# model_params['prior_prec'] = 10
# model_params['prior_prec'] = (2 * model_params['noise_prec'] * len(x_train))*wd/(1 - model_params["dropout"])

print('prior prec', model_params['prior_prec'])
print('lenghtscale', np.sqrt(model_params["prior_prec"]))
print('wd', (1 - model_params["dropout"]) * np.sqrt(model_params["prior_prec"]) ** 2 / (2 * model_params["noise_prec"] * len(x_train)))

In [None]:
Dropout_small = ExperimentDropoutMLPReg(dataset,
                                    model_params,
                                    train_params,
                                    optim_params,
                                    data_params=data_params,
                                    evals_per_epoch=1,
                                    normalize_x=True,
                                    normalize_y=True,
                                    use_cuda=torch.cuda.is_available(),
                                    print_freq=1999)

Dropout_small.run(log_metric_history=False)

In [None]:
x_mean, x_std = Dropout_small.x_means.cpu().numpy(), Dropout_small.x_stds.cpu().numpy()
y_mean, y_std = Dropout_small.y_mean.cpu().numpy(), Dropout_small.y_std.cpu().numpy()

out = Dropout_small.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std + y_mean

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
aleatoric = 1/np.sqrt(model_params['noise_prec'])*y_std
epistemic = out.var(axis = 0)**0.5

fig = plot_curve(x_train[:40], y_train[:40], x_domain.view(-1).cpu().numpy()*x_std + x_mean,  means, aleatoric, epistemic)
fig.savefig('figures/chap4/toy1d/mcdo/small-sample.pdf', bbox_inches='tight', dpi=200)

### Not converged

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
              'dropout': 0.05 ,
              'prior_prec': 1, # lengthscale = 1
              'noise_prec': 1/0.3**2 * y_train.var()}

train_params={'batch_size': len(x_train),
              'num_epochs': 200, # 20000
              'seed': 2,
              'train_mc_samples': 1,
              'eval_mc_samples': 100}

optim_params={'optim': 'adam',
              'learning_rate': 0.01,
#               'betas': 0.,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

dataset = 'toydata1d'
data_params={'x_points': (x_train, x_test),
             'y_points': (y_train, y_test)}

# Manually set a value for weight decay (wd) by forcing the prior precision
# wd = 1e-4
# model_params['prior_prec'] = 10
# model_params['prior_prec'] = (2 * model_params['noise_prec'] * len(x_train))*wd/(1 - model_params["dropout"])

print('prior prec', model_params['prior_prec'])
print('lenghtscale', np.sqrt(model_params["prior_prec"]))
print('wd', (1 - model_params["dropout"]) * np.sqrt(model_params["prior_prec"]) ** 2 / (2 * model_params["noise_prec"] * len(x_train)))

In [None]:
Dropout_unconverged = ExperimentDropoutMLPReg(dataset,
                                    model_params,
                                    train_params,
                                    optim_params,
                                    data_params=data_params,
                                    evals_per_epoch=1,
                                    normalize_x=True,
                                    normalize_y=True,
                                    use_cuda=torch.cuda.is_available(),
                                    print_freq=1999)

Dropout_unconverged.run(log_metric_history=False)

In [None]:
x_mean, x_std = Dropout_unconverged.x_means.cpu().numpy(), Dropout_unconverged.x_stds.cpu().numpy()
y_mean, y_std = Dropout_unconverged.y_mean.cpu().numpy(), Dropout_unconverged.y_std.cpu().numpy()

out = Dropout_unconverged.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std + y_mean

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
aleatoric = 1/np.sqrt(model_params['noise_prec'])*y_std
epistemic = out.var(axis = 0)**0.5

fig = plot_curve(x_train, y_train, x_domain.view(-1).cpu().numpy()*x_std + x_mean,  means, aleatoric, epistemic)
fig.savefig('figures/chap4/toy1d/mcdo/unconverged.pdf', bbox_inches='tight', dpi=200)

## Variational Adam (Vadam)

### 400-point

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
#               'prior_prec': lengthscale**2, # = 1
              'prior_prec': 1, # = 1
              'noise_prec': 1/0.3**2 * y_train.var()}

train_params={'batch_size': len(x_train),
              'num_epochs': 2000, # 20000
              'seed': 2,
              'train_mc_samples': 10,
              'eval_mc_samples': 100}

optim_params={'optim': 'adam',
              'learning_rate': 0.1,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

dataset = 'toydata1d'
data_params={'x_points': (x_train, x_test),
             'y_points': (y_train, y_test)}

Vadam = ExperimentVadamMLPReg(dataset,
                        model_params,
                        train_params,
                        optim_params,
                        data_params=data_params,
                        evals_per_epoch=1,
                        normalize_x=True,
                        normalize_y=True,
                        use_cuda=torch.cuda.is_available(),
                        print_freq=199
    )
Vadam.run(log_metric_history=False)

In [None]:
x_mean, x_std = Vadam.x_means.cpu().numpy(), Vadam.x_stds.cpu().numpy()
y_mean, y_std = Vadam.y_mean.cpu().numpy(), Vadam.y_std.cpu().numpy()

out = Vadam.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std + y_mean

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
# aleatoric = 1/np.sqrt(model_params['noise_prec'])*y_std
aleatoric = 1/np.sqrt(1/0.3**2 * y_train.var())*y_std
epistemic = out.var(axis = 0)**0.5

fig = plot_curve(x_train, y_train, x_domain.view(-1).cpu().numpy()*x_std + x_mean, means, aleatoric, epistemic)
fig.axes[0].legend(loc='lower right')
fig.savefig('figures/chap4/toy1d/vadam/large-sample.pdf', bbox_inches='tight', dpi=200)

### 40-point

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
#               'prior_prec': lengthscale**2, # = 1
              'prior_prec': 1, # = 1
              'noise_prec': 1/0.3**2 * y_train.var()}

train_params={'batch_size': len(x_train),
              'num_epochs': 2000, # 20000
              'seed': 2,
              'train_mc_samples': 10,
              'eval_mc_samples': 100}

optim_params={'optim': 'adam',
              'learning_rate': 0.1,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

dataset = 'toydata1d'
data_params={'x_points': (x_train[:40], x_test),
             'y_points': (y_train[:40], y_test)}

Vadam_small = ExperimentVadamMLPReg(dataset,
                        model_params,
                        train_params,
                        optim_params,
                        data_params=data_params,
                        evals_per_epoch=1,
                        normalize_x=True,
                        normalize_y=True,
                        use_cuda=torch.cuda.is_available(),
                        print_freq=199
    )
Vadam_small.run(log_metric_history=False)

In [None]:
x_mean, x_std = Vadam_small.x_means.cpu().numpy(), Vadam_small.x_stds.cpu().numpy()
y_mean, y_std = Vadam_small.y_mean.cpu().numpy(), Vadam_small.y_std.cpu().numpy()

out = Vadam_small.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std + y_mean

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())à
# aleatoric = 1/np.sqrt(model_params['noise_prec'])*y_std
aleatoric = 1/np.sqrt(1/0.3**2 * y_train.var())*y_std
epistemic = out.var(axis = 0)**0.5

fig = plot_curve(x_train[:40], y_train[:40], x_domain.view(-1).cpu().numpy()*x_std + x_mean, means, aleatoric, epistemic)
fig.savefig('figures/chap4/toy1d/vadam/small-sample.pdf', bbox_inches='tight', dpi=200)

## Probabilistic Backpropagation (PBP)

### 400-sample

In [None]:
model_params={'hidden_sizes': [100]}

train_params={'num_epochs': 10,
              'seed': 2}

dataset = 'toydata1d'
data_params={'x_points': (x_train, x_test),
             'y_points': (y_train, y_test)}

PBP = ExperimentPBPReg(dataset,
                        model_params,
                        train_params,
                        data_params,
                        experiment_prefix='toydata',
                        normalize_x=True,
                        normalize_y=True,
                        use_cuda=torch.cuda.is_available(),
                        print_freq=9
    )
PBP.run()

In [None]:
x_mean, x_std = PBP.x_means.cpu().numpy(), PBP.x_stds.cpu().numpy()
y_mean, y_std = PBP.y_mean.cpu().numpy(), PBP.y_std.cpu().numpy()

# Model outpus are already *unnormalized*
m, v, v_noise = PBP.model.predict(x_domain.numpy())

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
aleatoric = v_noise**0.5
epistemic = v**0.5

fig = plot_curve(x_train, y_train, x_domain.view(-1).cpu().numpy()*x_std + x_mean, m, aleatoric, epistemic)
fig.savefig('figures/chap4/toy1d/pbp/large-sample.pdf', bbox_inches='tight', dpi=200)
# filename = 'ex1_pbp.pdf'
# fig.savefig( 'images/toy-example'+ filename, bbox_inches='tight', dpi=200)

In [None]:
aleatoric, epistemic

### 40-point

In [None]:
model_params={'hidden_sizes': [100]}

train_params={'num_epochs': 40,
              'seed': 2}

dataset = 'toydata1d'
data_params={'x_points': (x_train[:40], x_test),
             'y_points': (y_train[:40], y_test)}

PBP_small = ExperimentPBPReg(dataset,
                        model_params,
                        train_params,
                        data_params,
                        experiment_prefix='toydata',
                        normalize_x=True,
                        normalize_y=True,
                        use_cuda=torch.cuda.is_available(),
                        print_freq=9
    )
PBP_small.run()

In [None]:
x_mean, x_std = PBP_small.x_means.cpu().numpy(), PBP_small.x_stds.cpu().numpy()
y_mean, y_std = PBP_small.y_mean.cpu().numpy(), PBP_small.y_std.cpu().numpy()

# Model outpus are already *unnormalized*
m, v, v_noise = PBP_small.model.predict(x_domain.numpy())

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
aleatoric = v_noise**0.5
epistemic = v**0.5

fig = plot_curve(x_train[:40], y_train[:40], x_domain.view(-1).cpu().numpy()*x_std + x_mean, m, aleatoric, epistemic)
fig.savefig('figures/chap4/toy1d/pbp/small-sample.pdf', bbox_inches='tight', dpi=200)
# filename = 'ex1_pbp.pdf'
# fig.savefig( 'images/toy-example'+ filename, bbox_inches='tight', dpi=200)

### Too many passes

In [None]:
model_params={'hidden_sizes': [100]}

train_params={'num_epochs': 200,
              'seed': 2}

dataset = 'toydata1d'
data_params={'x_points': (x_train, x_test),
             'y_points': (y_train, y_test)}

PBP_shrink = ExperimentPBPReg(dataset,
                        model_params,
                        train_params,
                        data_params,
                        experiment_prefix='toydata',
                        normalize_x=True,
                        normalize_y=True,
                        use_cuda=torch.cuda.is_available(),
                        print_freq=99
    )
PBP_shrink.run()

In [None]:
x_mean, x_std = PBP_shrink.x_means.cpu().numpy(), PBP_shrink.x_stds.cpu().numpy()
y_mean, y_std = PBP_shrink.y_mean.cpu().numpy(), PBP_shrink.y_std.cpu().numpy()

# Model outpus are already *unnormalized*
m, v, v_noise = PBP_shrink.model.predict(x_domain.numpy())

means = out.mean(axis = 0)

# aleatoric = 1/np.sqrt(BBB.model.log_noise.cpu().exp().detach().item())
aleatoric = v_noise**0.5
epistemic = v**0.5

fig = plot_curve(x_train, y_train, x_domain.view(-1).cpu().numpy()*x_std + x_mean, m, aleatoric, epistemic)
fig.savefig('figures/chap4/toy1d/pbp/shrinked-variance.pdf', bbox_inches='tight', dpi=200)
# filename = 'ex1_pbp.pdf'
# fig.savefig( 'images/toy-example'+ filename, bbox_inches='tight', dpi=200)

In [None]:
aleatoric, epistemic

# Toy set #2

https://github.com/ThirstyScholar/bayes-by-backprop/blob/master/BBB/bnn_regression.py  
https://arxiv.org/pdf/1502.05336.pdf

In [None]:
def base_model(x):
    return x**3

def noise_model(x):
    return 3*np.ones_like(x)

def sample_data(x):
    return base_model(x) + np.random.normal(0, noise_model(x))

In [None]:
train_size, test_size = 20, 100

# X values: U[-4, 4]
train_data_2 = np.random.uniform(-4, 4, size=train_size).reshape((-1, 1))
test_data_2 = np.random.uniform(-4, 4, size=test_size).reshape((-1, 1))

train_labels_2 = sample_data(train_data_2)
test_labels_2 = sample_data(test_data_2)

# # X domain for visualizing learned model
x_domain = torch.linspace(-6,6,100)[..., None]


## BBB

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
              'prior_prec': 1.0,
              'noise_prec': 1/9 * train_labels_2.var()}

train_params={'batch_size': len(train_data_2),
              'num_epochs': 2000, # 20000
              'seed': 2,
              'train_mc_samples': 10,
              'eval_mc_samples': 100}

optim_params={'optim': 'adam',
              'learning_rate': 0.1,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

dataset = 'toydata1d'
data_params={'x_points': (train_data_2, test_data_2),
             'y_points': (train_labels_2, test_labels_2)}

BBB = ExperimentBBBMLPReg(dataset,
                            model_params,
                            train_params,
                            optim_params,
                            data_params=data_params,
                            evals_per_epoch=1,
                            normalize_x=True,
                            normalize_y=True,
                            use_cuda=torch.cuda.is_available(),
                            print_freq=199
    )
BBB.run(log_metric_history=False)

In [None]:
x_mean, x_std = BBB.x_means.cpu().numpy(), BBB.x_stds.cpu().numpy()
y_mean, y_std = BBB.y_mean.cpu().numpy(), BBB.y_std.cpu().numpy()

out = BBB.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std.numpy() + y_mean.numpy()

aleatoric = 1/np.sqrt(model_params['noise_prec'])*y_std.numpy()
epistemic = out.var(axis = 0)**0.5

fig = plot_curve(train_data_2, train_labels_2,
                 x_domain.view(-1)*x_std + x_mean, m,
                 aleatoric, epistemic,
                 ideal_output=base_model(x_domain.view(-1).cpu()*x_std+x_mean))

plt.xlim([-6, 6])
plt.ylim([-100, 100])

# filename = 'ex2_bbb.pdf'
# fig.savefig( 'images/toy-example'+ filename, bbox_inches='tight', dpi=200)

## MC Dropout

In [None]:
model_params={'hidden_sizes': [100],
              'act_func': 'relu' ,
              'dropout': 0.2,
              'prior_prec': 1.0,
              'noise_prec': 1/9 * train_labels_2.var()}

train_params={'batch_size': len(train_data_2),
              'num_epochs': 10000, # 20000
              'seed': 2,
              'train_mc_samples': 1,
              'eval_mc_samples': 100}

optim_params={'optim': 'adam',
              'learning_rate': 0.1,
              'betas': (0.9,0.99),
              'prec_init': 10.0}

dataset = 'toydata1d'
data_params={'x_points': (train_data_2, test_data_2),
             'y_points': (train_labels_2, test_labels_2)}

In [None]:
Dropout = ExperimentDropoutMLPReg(dataset,
                                    model_params,
                                    train_params,
                                    optim_params,
                                    data_params=data_params,
                                    evals_per_epoch=1,
                                    normalize_x=True,
                                    normalize_y=True,
                                    use_cuda=torch.cuda.is_available(),
                                    print_freq=2000)

Dropout.run(log_metric_history=False)

In [None]:
x_mean, x_std = Dropout.x_means.cpu().numpy(), Dropout.x_stds.cpu().numpy()
y_mean, y_std = Dropout.y_mean.cpu().numpy(), Dropout.y_std.cpu().numpy()

out = Dropout.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std + y_mean

means = out.mean(axis = 0)

aleatoric = 1/np.sqrt(model_params['noise_prec'])*y_std
epistemic = out.var(axis = 0)**0.5


fig = plot_curve(train_data_2, train_labels_2,
                 x_domain.view(-1).numpy()*x_std + x_mean, m,
                 aleatoric, epistemic,
                 ideal_output=base_model(x_domain.view(-1).cpu().numpy()*x_std+x_mean))

plt.xlim([-6, 6])
plt.ylim([-100, 100])

# filename = 'ex2_mcdropout.pdf'
# fig.savefig( 'images/toy-example'+ filename, bbox_inches='tight', dpi=200)

## Vadam

In [None]:
Vadam = ExperimentVadamMLPReg(dataset,
                        model_params,
                        train_params,
                        optim_params,
                        data_params=data_params,
                        evals_per_epoch=1,
                        normalize_x=False,
                        normalize_y=False,
                        use_cuda=torch.cuda.is_available(),
                        print_freq=1000
    )
Vadam.run()

In [None]:
x_mean, x_std = Vadam.x_means.cpu().numpy(), Vadam.x_stds.cpu().numpy()
y_mean, y_std = Vadam.y_mean.cpu().numpy(), Vadam.y_std.cpu().numpy()

out = Vadam.prediction(x_domain, train=False)
out = torch.stack(out).cpu().detach().numpy()
out = out*y_std.numpy() + y_mean.numpy()

means = out.mean(axis = 0)

aleatoric = 1/np.sqrt(model_params['noise_prec'])*y_std.numpy()
epistemic = out.var(axis = 0)**0.5


fig = plot_curve(train_data_2, train_labels_2,
                 x_domain.view(-1)*x_std + x_mean, m,
                 aleatoric, epistemic,
                 ideal_output=base_model(x_domain.view(-1).cpu()*x_std+x_mean))

plt.xlim([-6, 6])
plt.ylim([-100, 100])

# filename = 'ex2_vadam.pdf'
# fig.savefig( 'images/toy-example'+ filename, bbox_inches='tight', dpi=200)

## PBP

In [None]:
model_params={'hidden_sizes': [100]}

train_params={'batch_size': len(train_data_2),
              'num_epochs': 40,
              'seed': 2}

dataset = 'toydata1d'
data_params={'x_points': (train_data_2, test_data_2),
             'y_points': (train_labels_2, test_labels_2)}

PBP = ExperimentPBPReg(dataset,
                        model_params,
                        train_params,
                        data_params,
                        experiment_prefix='toydata',
                        normalize_x=True,
                        normalize_y=True,
                        use_cuda=torch.cuda.is_available(),
                        print_freq=39
    )
PBP.run()

In [None]:
x_mean, x_std = PBP.x_means.cpu().numpy(), PBP.x_stds.cpu().numpy()
y_mean, y_std = PBP.y_mean.cpu().numpy(), PBP.y_std.cpu().numpy()

# Model outpus are already *unnormalized*
m, v, v_noise = PBP.model.predict(x_domain.numpy())

means = out.mean(axis = 0)

aleatoric = v_noise**0.5
epistemic = v**0.5

fig = plot_curve(train_data_2, train_labels_2,
                 x_domain.view(-1).cpu().numpy()*x_std + x_mean, m,
                 aleatoric, epistemic,
                 ideal_output=base_model(x_domain.view(-1).cpu().numpy()*x_std+x_mean))

plt.xlim([-6, 6])
plt.ylim([-100, 100])

# filename = 'ex2_pbp.pdf'
# fig.savefig( 'images/toy-example'+ filename, bbox_inches='tight', dpi=200)

# Toy set #3

https://arxiv.org/pdf/1505.05424.pdf

 $$y=x+ 0.3 sin(2π(x+\epsilon)) + 0.3 sin(4π(x+\epsilon)) + \epsilon,  where \epsilon ∼ N(0,0.02) $$