In [1]:
%load_ext autoreload
%autoreload 2

### Импорты

In [1]:
import sys
sys.path.append('..')
from control_variates.model import MLP
from control_variates.optim import SGLD, ScaleAdaSGHMC as H_SA_SGHMC
from mnist_utils import load_mnist_dataset
from control_variates.trainer import BNNTrainer
import torch
from torch.nn import functional as F

import numpy as np
import dill as pickle
from pathlib import Path
from functools import partial
from easydict import EasyDict as edict

%matplotlib inline

from matplotlib import pyplot as plt
import numpy as np
from tqdm import tqdm

### Параметры обучения

In [2]:
# psy_hidden = 150
# psy_depth1 = 3
# psy_depth2 = 2

args = {
    'bnn_lr': 1e-7,
    'cv_lr': 1e-3,
    'n_cv_iter': 100,
    'batch_size' : 500,
    'input_dim' : 784,
    'width' : 100,
    'depth' : 0,
    'output_dim' : 2,
    'n_epoch' : 300,
    'alpha0' : 1, 
    'beta0' : 1,
    'resample_prior_every' : 10,
    'resample_momentum_every' : 50,
    'burn_in_epochs' : 100,
    'save_freq' : 1,
    'resample_prior_until' : 100,
    'report_every' : 10,

    }

args = edict(args)

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

### Берем два класса из МНИСТа

In [4]:
Path('../../data', 'mnist').mkdir(exist_ok=True, parents=True)
trainloader, valloader = load_mnist_dataset(Path('../../data', 'mnist'), args.batch_size, [3, 5])

In [5]:
batch = trainloader.dataset[0]

In [6]:
#model = MLP(input_dim=input_dim, width=width, depth=depth, output_dim=output_dim)

In [7]:
from control_variates.model import LogRegression
model = LogRegression(args.input_dim)

In [8]:
optimizer = SGLD(model.parameters(), lr=args.bnn_lr, alpha0=args.alpha0, beta0=args.beta0)
#optimizer = H_SA_SGHMC(model.parameters(), lr=args.bnn_lr, alpha0=args.alpha0, beta0=args.beta0)
#optimizer = H_SA_SGHMC(model.parameters(), lr=0.0, alpha0=args.alpha0, beta0=args.beta0)
#optimizer = torch.optim.SGD(model.parameters(), lr=args.bnn_lr, momentum=False)

In [9]:
def nll_func(y_hat, y):
    nll = F.cross_entropy(y_hat, y, reduction='sum')
    return nll

def err_func(y_hat, y):
    err = y_hat.argmax(-1).ne(y)
    return err

In [10]:
trainer = BNNTrainer(model, 
    optimizer, 
    nll_func, 
    err_func, 
    trainloader, 
    valloader, 
    device=device, 
    resample_prior_every=args.resample_prior_every,
    resample_momentum_every=args.resample_momentum_every,
    save_freq=args.save_freq,
    batch_size=args.batch_size,
    report_every=args.report_every
    )

N_train 11552


In [11]:
trainer.train(n_epoch=args.n_epoch, burn_in_epochs=args.burn_in_epochs, resample_prior_until=args.resample_prior_until)

2020-09-06 23:31:55,002 Epoch 0 finished. Val loss 0.5957387685775757, Val error 0.31913774973711884
2020-09-06 23:31:55,012 Potential: 7159.369140625
2020-09-06 23:31:55,015 Potential grad: [-147.77573 -147.77393 -147.77669 ...  147.77292  348.34497 -348.41052]
2020-09-06 23:33:05,710 Epoch 10 finished. Val loss 0.2784401476383209, Val error 0.0793901156677182
2020-09-06 23:33:05,712 Potential: 3421.53125
2020-09-06 23:33:05,715 Potential grad: [-18.630615 -18.62878  -18.631329 ...  18.628006  43.93964  -43.97178 ]
2020-09-06 23:34:13,844 Epoch 20 finished. Val loss 0.21478569507598877, Val error 0.06309148264984227
2020-09-06 23:34:13,848 Potential: 2691.96728515625
2020-09-06 23:34:13,852 Potential grad: [-25.078611 -25.076563 -25.07844  ...  25.075865  59.12194  -59.175785]
2020-09-06 23:35:21,935 Epoch 30 finished. Val loss 0.1840483397245407, Val error 0.054153522607781286
2020-09-06 23:35:21,936 Potential: 2339.314208984375
2020-09-06 23:35:21,937 Potential grad: [ -48.70432   -

In [13]:
weights_set = trainer.weight_set_samples

print(len(weights_set))

NameError: name 'trainer' is not defined

In [14]:
weights_set = weights_set[::5][-1000]

NameError: name 'weights_set' is not defined

In [None]:
Path('../../saved_samples', 'mnist_weights').mkdir(exist_ok=True, parents=True)
pickle.dump(weights_set, Path('../../saved_samples', 'mnist_weights', 'sgld_sample.pkl').open('wb'))

In [15]:
#Path('../../saved_samples').mkdir(exist_ok=True, parents=True)
weights_set = pickle.load(Path('../../saved_samples', 'mnist_weights', 'sgld_sample.pkl').open('rb'))[0]
weights_set, priors = weights_set

FileNotFoundError: [Errno 2] No such file or directory: '../saved_samples/mnist_weights/1_samples_seed23.pkl'

In [17]:
priors = {}
for (n, _), p in zip(weights_set[0].items(), optimizer.param_groups[0]['params']):
    print(n, p)
    state = trainer.optimizer.state[p]  
    priors[n] = state['weight_decay']

NameError: name 'weights_set' is not defined

### CV

In [19]:
from control_variates.cv_utils import state_dict_to_vec
from control_variates.cv_utils import compute_log_likelihood, compute_mc_estimate, compute_naive_variance, compute_tricky_divergence
from control_variates.model import get_prediction, get_binary_prediction

In [20]:
squeezed_weights = [state_dict_to_vec(w) for w in weights_set]

models = [LogRegression(args.input_dim) for _ in range(len(weights_set))]
for w, model in zip(weights_set, models):
    model.load_state_dict(w)

NameError: name 'weights_set' is not defined

In [21]:
models[0].state_dict()

NameError: name 'models' is not defined

In [22]:
opt_with_priors = trainer.optimizer

priors = {}
group_params = opt_with_priors.param_groups[0]['params']
for (n, _), p in zip(model.named_parameters(), group_params):  
    state = opt_with_priors.state[p]  
    priors[n] = state['weight_decay']

# pickle.dump(priors, Path('../saved_samples', 'mnist_weights', 'priors.pkl').open('wb'))

NameError: name 'trainer' is not defined

In [23]:
priors

{}

In [24]:
# priors = pickle.load(Path('../saved_samples', 'mnist_weights', 'priors.pkl').open('rb'))

# group_params = trainer.optimizer.param_groups[0]['params']
# for (n, _), p in zip(model.named_parameters(), group_params):  
#     trainer.optimizer.state[p]['weight_decay'] = priors[n]

FileNotFoundError: [Errno 2] No such file or directory: '../saved_samples/mnist_weights/priors.pkl'

In [25]:
from control_variates.cv import PsyMLP, PsyDoubleMLP, PsyLinear, SteinCV


In [26]:

psy_input_dim = squeezed_weights[0].shape[0]
N_train = len(trainloader.dataset)

NameError: name 'squeezed_weights' is not defined

In [27]:

_, new_valloader = load_mnist_dataset(Path('../data', 'mnist'), args.batch_size, [3, 5])

In [28]:
x_new, y_new = next(iter(new_valloader))

In [29]:
x_new, y_new = next(iter(valloader))
train_x, train_y = next(iter(trainloader))

In [30]:
from control_variates.uncertainty_quantification import ClassificationUncertaintyMCMC
from control_variates.cv_utils import SpectralVariance, trapezoidal_kernel

### Фитим на одном примере

Простейший случай: Линейная модель, выборочная дисперсия

In [31]:
from control_variates.cv import PsyConst

ImportError: cannot import name 'PsyConst' from 'control_variates.cv' (../control_variates/cv.py)

In [32]:
priors

{}

In [33]:
optimizer.alpha0

NameError: name 'optimizer' is not defined

In [34]:
psy_model = PsyLinear(input_dim=psy_input_dim)
psy_model.init_zero()
psy_model.to(device)

neural_control_variate = SteinCV(psy_model, train_x, train_y, priors, N_train)

ncv_optimizer = torch.optim.Adam(psy_model.parameters(), lr=args.cv_lr, weight_decay=1e-1)

function_f = lambda model, x: get_binary_prediction(model, x, classes=[0, 1])
history = [] 
x = x_new[:1]
fig = plt.figure()

data_iter = iter(trainloader)
for it in range(args.n_cv_iter):
    try:
        train_x, train_y = next(data_iter)
    except:
        data_iter = iter(trainloader)
        train_x, train_y = next(data_iter)
    neural_control_variate.train_x = train_x
    neural_control_variate.train_y = train_y
    ncv_optimizer.zero_grad()
    mc_variance, no_cv_variance = compute_naive_variance(function_f, neural_control_variate, models, x)
    history.append(mc_variance.mean().item())
    mc_variance.mean().backward()
    ncv_optimizer.step()

print(mc_variance.mean().item(), no_cv_variance.mean().item())
plt.plot(np.arange(it+1), history)
plt.axhline(y=no_cv_variance.mean(), color='r', linestyle='-')
plt.xlim(0, args.n_cv_iter)
plt.show()

NameError: name 'psy_input_dim' is not defined

In [35]:
for model in models:
    model.zero_grad()
log_likelihoods = [(compute_log_likelihood(train_x, train_y, model) * N_train).backward() for model in models]
ll_div = torch.stack([compute_tricky_divergence(model, priors) for model in models])  # ll_div для каждой модели

models_weights = torch.stack([state_dict_to_vec(model.state_dict()) for model in models])  # батч моделей
models_weights.requires_grad = True
psy_value = psy_model(models_weights, x)  # хотим тензор число моделей X число примеров
psy_func = partial(psy_model, x=x)
psy_jac = torch.autograd.functional.jacobian(psy_func, models_weights, create_graph=True)
psy_div = torch.einsum('ijil->ij', psy_jac)  # я чет завис с размерностями: i - n_models, j - n_images, l - n_weights
ncv_value = psy_value * ll_div.unsqueeze(-1) + psy_div

NameError: name 'models' is not defined

In [36]:
models[0].state_dict()

NameError: name 'models' is not defined

In [37]:
priors

{}

In [38]:
ll_div.mean()

NameError: name 'll_div' is not defined

In [39]:
ncv_value.mean()

NameError: name 'ncv_value' is not defined

In [40]:
neural_control_variate(models, x).mean()

NameError: name 'neural_control_variate' is not defined

In [41]:
psy_model.state_dict()

NameError: name 'psy_model' is not defined

In [42]:
psy_model = PsyLinear(psy_input_dim)
psy_model.init_zero()
psy_model.to(device)

neural_control_variate = SteinCV(psy_model, train_x, train_y, priors, N_train)

ncv_optimizer = torch.optim.Adam(psy_model.parameters(), lr=args.cv_lr)

function_f = lambda model, x: get_binary_prediction(model, x, classes=[0, 1])
history = [] 
x = x_new[20:30]
fig = plt.figure()

data_iter = iter(trainloader)
for it in range(args.n_cv_iter):
    try:
        train_x, train_y = next(data_iter)
    except:
        data_iter = iter(trainloader)
        train_x, train_y = next(data_iter)
    neural_control_variate.train_x = train_x
    neural_control_variate.train_y = train_y
    ncv_optimizer.zero_grad()
    mc_variance, no_cv_variance = compute_naive_variance(function_f, neural_control_variate, models, x)
    history.append(mc_variance.mean().item())
    mc_variance.mean().backward()
    ncv_optimizer.step()

print(mc_variance.mean().item(), no_cv_variance.mean().item())
plt.plot(np.arange(it+1), history)
plt.axhline(y=no_cv_variance.mean(), color='r', linestyle='-')
plt.xlim(0, args.n_cv_iter)
plt.show()

NameError: name 'psy_input_dim' is not defined

In [43]:
for model in models:
    model.zero_grad()
log_likelihoods = [(compute_log_likelihood(train_x, train_y, model) * N_train).backward() for model in models]
ll_div = torch.stack([compute_tricky_divergence(model, priors) for model in models])  # ll_div для каждой модели

models_weights = torch.stack([state_dict_to_vec(model.state_dict()) for model in models])  # батч моделей
models_weights.requires_grad = True
psy_value = psy_model(models_weights, x)  # хотим тензор число моделей X число примеров
psy_func = partial(psy_model, x=x)
psy_jac = torch.autograd.functional.jacobian(psy_func, models_weights, create_graph=True)
psy_div = torch.einsum('ijil->ij', psy_jac)  # я чет завис с размерностями: i - n_models, j - n_images, l - n_weights
ncv_value = psy_value * ll_div.unsqueeze(-1) + psy_div

NameError: name 'models' is not defined

In [44]:
psy_model.layer.weight.sum()

NameError: name 'psy_model' is not defined

In [45]:
ll_div.mean()

NameError: name 'll_div' is not defined

In [46]:
mc_variance, no_cv_variance = compute_naive_variance(function_f, neural_control_variate, models, x_new[40:140])
print(mc_variance.mean().item(), no_cv_variance.mean().item())

NameError: name 'function_f' is not defined

In [47]:
uncertainty_quant = ClassificationUncertaintyMCMC(models, neural_control_variate)
metrics = []
variance_with_cv = []
variance_without_cv = []

for x, y in tqdm(new_valloader):
    ratio = uncertainty_quant.compute_variance_ratio(x)
    metrics.extend(ratio.tolist())
    variance_with_cv.extend(uncertainty_quant.estimate_emperical_variance(x, True).tolist())
    variance_without_cv.extend(uncertainty_quant.estimate_emperical_variance(x, False).tolist())
metrics = np.array(metrics)
print(np.mean(metrics), np.std(metrics))

NameError: name 'models' is not defined

In [48]:
(metrics < 1.).sum() / metrics.shape[0]

NameError: name 'metrics' is not defined

In [49]:
plt.scatter(variance_without_cv, variance_with_cv)
x_ = np.linspace(0, np.max(variance_with_cv))
plt.plot(x_, x_)
plt.xlim((0, 0.0025))
plt.ylim((0, 0.01))

NameError: name 'variance_without_cv' is not defined

Спектральная дисперсия

In [50]:
psy_model = PsyLinear(psy_input_dim)
psy_model.init_zero()
psy_model.to(device)

neural_control_variate = SteinCV(psy_model, train_x, train_y, priors, N_train)
function_f = lambda model, x: get_binary_prediction(model, x, classes=[0, 1])

function_h = lambda model, x: function_f(model, x) - neural_control_variate(model, x)

window_lag_f = trapezoidal_kernel
truncation_point = len(models) // 2
spectral_loss = SpectralVariance(function_h, models, window_lag_f, truncation_point)
no_cv_loss = SpectralVariance(function_f, models, window_lag_f, truncation_point)

ncv_optimizer = torch.optim.Adam(psy_model.parameters(), lr=args.cv_lr)

history = [] 
x = x_new[20:50]
fig = plt.figure()

no_cv_variance = no_cv_loss(x)

data_iter = iter(trainloader)
for it in range(args.n_cv_iter):
    try:
        train_x, train_y = next(data_iter)
    except:
        data_iter = iter(trainloader)
        train_x, train_y = next(data_iter)
    neural_control_variate.train_x = train_x
    neural_control_variate.train_y = train_y
    ncv_optimizer.zero_grad()
    mc_variance = spectral_loss(x)
    history.append(mc_variance.mean().item())
    mc_variance.mean().backward()
    ncv_optimizer.step()

print(mc_variance.mean().item(), no_cv_variance.mean().item())
plt.plot(np.arange(it+1), history)
plt.axhline(y=no_cv_variance.mean(), color='r', linestyle='-')
plt.xlim(0, args.n_cv_iter)
plt.show()

NameError: name 'psy_input_dim' is not defined

In [51]:
x = x_new[50:130]
no_cv_variance = no_cv_loss(x)
mc_variance = spectral_loss(x)
mc_variance.mean().item(), no_cv_variance.mean().item()

NameError: name 'no_cv_loss' is not defined

In [52]:
uncertainty_quant = ClassificationUncertaintyMCMC(models, neural_control_variate)
metrics = []
variance_with_cv = []
variance_without_cv = []

for x, y in tqdm(new_valloader):
    #ratio = uncertainty_quant.compute_variance_ratio(x)
    #metrics.extend(ratio.tolist())
    variance_with_cv.extend(uncertainty_quant.estimate_emperical_variance(x, True).tolist())
    variance_without_cv.extend(uncertainty_quant.estimate_emperical_variance(x, False).tolist())
metrics = np.array(variance_with_cv) / np.array(variance_without_cv)
print(np.mean(metrics), np.std(metrics))

NameError: name 'models' is not defined

In [53]:
print(np.mean(metrics), np.std(metrics))

NameError: name 'metrics' is not defined

In [54]:
(metrics < 1.).sum() / metrics.shape[0]

NameError: name 'metrics' is not defined

In [55]:
plt.scatter(variance_without_cv, variance_with_cv)
x_ = np.linspace(0, np.max(variance_with_cv))
plt.plot(x_, x_)
plt.xlim((0, 0.0025))
plt.ylim((0, 0.01))

NameError: name 'variance_without_cv' is not defined

### Фитим на батче

In [56]:
#psy_model = PsyMLP(psy_input1, psy_hidden, psy_depth1)
psy_model = PsyLinear(psy_input_dim)
psy_model.init_zero()
psy_model.to(device)

neural_control_variate = SteinCV(psy_model, train_x, train_y, priors, N_train)

ncv_optimizer = torch.optim.Adam(psy_model.parameters(), lr=args.cv_lr)

NameError: name 'psy_input_dim' is not defined

In [57]:
function_f = lambda model, x: get_binary_prediction(model, x, classes=[0, 1])
history = [] #np.zeros()
x = x_new[20:21]
fig = plt.figure()
for it in range(args.n_cv_iter):
    ncv_optimizer.zero_grad()
    mc_variance, no_cv_variance = compute_naive_variance(function_f, neural_control_variate, models, x)
    history.append(mc_variance.sum().item())
    mc_variance.backward()
    ncv_optimizer.step()

plt.plot(np.arange(it+1), history)
plt.axhline(y=no_cv_variance, color='r', linestyle='-')
plt.xlim(0, args.n_cv_iter)
plt.show()

NameError: name 'ncv_optimizer' is not defined

In [58]:
psy_model = PsyConv(psy_input_dim, 75)
#psy_model.init_zero()
psy_model.to(device)

neural_control_variate = SteinCV(psy_model, train_x, train_y, priors, N_train)

ncv_optimizer = torch.optim.Adam(psy_model.parameters(), lr=args.cv_lr)

function_f = lambda model, x: get_binary_prediction(model, x, classes=[0, 1])
history = [] 
x = x_new[20:30]
fig = plt.figure()

data_iter = iter(trainloader)
for it in range(args.n_cv_iter):
    try:
        train_x, train_y = next(data_iter)
    except:
        data_iter = iter(trainloader)
        train_x, train_y = next(data_iter)
    neural_control_variate.train_x = train_x
    neural_control_variate.train_y = train_y
    ncv_optimizer.zero_grad()
    mc_variance, no_cv_variance = compute_naive_variance(function_f, neural_control_variate, models, x)
    history.append(mc_variance.mean().item())
    mc_variance.mean().backward()
    ncv_optimizer.step()

    print(mc_variance.mean().item(), no_cv_variance.mean().item())
plt.plot(np.arange(it+1), history)
plt.axhline(y=no_cv_variance.mean(), color='r', linestyle='-')
plt.xlim(0, args.n_cv_iter)
plt.show()

NameError: name 'PsyConv' is not defined

In [59]:
from control_variates.cv import PsyConv

In [60]:
psy_model = PsyConv(psy_input_dim, 75)
psy_model.init_zero()
psy_model.to(device)

neural_control_variate = SteinCV(psy_model, train_x, train_y, priors, N_train)

ncv_optimizer = torch.optim.Adam(psy_model.parameters(), lr=args.cv_lr)

function_f = lambda model, x: get_binary_prediction(model, x, classes=[0, 1])
history = [] 
x = x_new[20:70]
fig = plt.figure()

data_iter = iter(trainloader)
for it in range(args.n_cv_iter):
    #x = x_new[it:it+10]
    try:
        train_x, train_y = next(data_iter)
    except:
        data_iter = iter(trainloader)
        train_x, train_y = next(data_iter)
    neural_control_variate.train_x = train_x
    neural_control_variate.train_y = train_y
    ncv_optimizer.zero_grad()
    mc_variance, no_cv_variance = compute_naive_variance(function_f, neural_control_variate, models, x)
    history.append(mc_variance.mean().item())
    mc_variance.mean().backward()
    ncv_optimizer.step()

    print(mc_variance.mean().item(), no_cv_variance.mean().item())
plt.plot(np.arange(it+1), history)
plt.axhline(y=no_cv_variance.mean(), color='r', linestyle='-')
plt.xlim(0, args.n_cv_iter)
plt.show()

NameError: name 'psy_input_dim' is not defined

In [61]:
mc_variance, no_cv_variance = compute_naive_variance(function_f, neural_control_variate, models, x_new[30:40])

NameError: name 'neural_control_variate' is not defined

In [62]:
mc_variance.mean().item(), no_cv_variance.mean().item()

NameError: name 'mc_variance' is not defined

In [63]:
from control_variates.cv_utils import *

In [64]:
for model in models:
    model.zero_grad()
log_likelihoods = [(compute_log_likelihood(train_x, train_y, model)*N_train).backward() for model in models]
# for l in log_likelihoods:
#     l = l * N_train
#     l.backward()
for p in models[0].parameters():
    print(p.grad.sum())
ll_div = torch.stack([compute_tricky_divergence(model, priors) for model in models])


NameError: name 'models' is not defined

In [65]:
ll_div

NameError: name 'll_div' is not defined

In [66]:
log_likelihoods

NameError: name 'log_likelihoods' is not defined

In [67]:
priors

{}

In [68]:
for n, p in model.named_parameters():
    print(priors[n]*p.sum())

KeyError: 'linear.weight'

In [69]:
for p in models[0].parameters():
    print(p.grad)


NameError: name 'models' is not defined