In [1]:
import torch
import time
import matplotlib.pyplot as plt

# from botorch.acquisition.objective import ConstrainedMCObjective
from botorch.acquisition.monte_carlo import (
    qExpectedImprovement,
    qNoisyExpectedImprovement,
)
from botorch.acquisition.knowledge_gradient import qKnowledgeGradient
from botorch import fit_gpytorch_model
from botorch.sampling.samplers import SobolQMCNormalSampler

import sys
sys.path.append("../../conformalbo/")
from utils import (
    generate_initial_data,
    initialize_model,
    parse,
    optimize_acqf_and_get_observation,
    update_random_observations,
    get_problem,
    # assess_coverage,
)
# from helpers import qConformalExpectedImprovement, qConformalNoisyExpectedImprovement
from botorch.models.transforms import Standardize, Normalize

from botorch.test_functions.multi_objective import BraninCurrin

In [3]:
!ps aux | grep 14090

wesley_m  9332  1.0  0.0  12896  3052 pts/53   Ss+  13:21   0:00 /bin/bash -c ps aux | grep 14090
wesley_m  9334  0.0  0.0  14440  1124 pts/53   S+   13:21   0:00 grep 14090
sanae_l  14090  100  0.5 16587380 2965756 pts/88 Rl 05:20 481:35 python experiments/compute_bound.py --dataset=fmnist --encoding_type=arithmetic --levels=20 --misc_extra_bits=3 --prenet_cfg_path=/data/users/sanyam_s/assets/pactl/sweep-bu6bb4px/run-20220502-6wh95170/files/net.cfg.yml --scale_posterior=0.01 --use_kmeans=0


In [2]:
!nvidia-smi

Wed May  4 13:21:07 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 465.19.01    CUDA Version: 11.3     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA TITAN RTX    On   | 00000000:1A:00.0 Off |                  N/A |
| 41%   29C    P8    11W / 280W |   5295MiB / 24220MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA TITAN RTX    On   | 00000000:1B:00.0 Off |                  N/A |
| 56%   75C    P2   280W / 280W |   7059MiB / 24220MiB |    100%      Default |
|       

In [4]:
dtype = torch.double# if dtype == "double" else torch.float
device = torch.device("cuda:5")

torch.random.manual_seed(0)

bb_fn = BraninCurrin(negate=True).to(device,dtype)

In [5]:
(
        train_x_ei,
        train_obj_ei,
        best_observed_value_ei,
    ) = generate_initial_data(
        30, bb_fn, 0.1, device, dtype
    )

torch.Size([30, 2]) torch.Size([30, 2])


In [6]:
mean, std = train_obj_ei.mean(0), train_obj_ei.std(0)

In [7]:
train_obj_ei = (train_obj_ei - mean) / std

In [8]:
train_yvar = torch.tensor(0.3 ** 2, device=device, dtype=dtype)

In [12]:
train_obj_ei.squeeze().shape

torch.Size([30, 2])

In [13]:
mll_and_model = initialize_model(
            train_x_ei, train_obj_ei.squeeze(), train_yvar,
            method="exact", alpha=0.05, tgt_grid_res=64,
        )

In [14]:
(
        val_x,
        val_obj,
        _,
    ) = generate_initial_data(
        100, bb_fn, 0.1, device, dtype
    )

torch.Size([100, 2]) torch.Size([100, 2])


In [15]:
val_obj = (val_obj - mean) / std

In [16]:
fit_gpytorch_model(mll_and_model[0]);

In [17]:
%pdb

Automatic pdb calling has been turned ON


In [18]:
from helpers import assess_coverage

In [19]:
conformal_kwargs = dict(
                alpha=0.2,
                grid_res=32,
                max_grid_refinements=4,
                ratio_estimator=None,
    temp=0.01,
            )

In [20]:
model = mll_and_model[1]

In [21]:
val_x.shape

torch.Size([100, 2])

In [22]:
# model.posterior(val_x.unsqueeze(-2)).mean.shape

In [23]:
# test_inputs = torch.rand(100, 1, 2, device=device, dtype=dtype)
# target_grid = torch.randn(100, 32, 1, 2, device=device, dtype=dtype)

In [24]:
from helpers import est_train_post_var, conformal_gp_regression
import torchsort
import numpy as np

In [25]:
cred_bounds, conf_bounds = assess_coverage(mll_and_model[1], val_x, val_obj, **conformal_kwargs)

torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  ../aten/src/ATen/native/BatchLinearAlgebra.cpp:1672.)
  Linv = torch.triangular_solve(Eye, L, upper=False).solution


In [26]:
conf_bounds

0.6549999713897705

In [None]:
trans_val_y = mll_and_model[2](val_obj)[0].cpu()

In [None]:
plt.vlines(trans_val_y, cred_bounds[0].cpu(), cred_bounds[1].cpu(), alpha = 0.2)
plt.scatter(trans_val_y, (cred_bounds[0] + cred_bounds[1]).cpu() / 2, label = "Credible")
plt.vlines(trans_val_y, conf_bounds[0].cpu(), conf_bounds[1].cpu(), color = "orange", alpha = 0.2)
plt.scatter(trans_val_y, (conf_bounds[0] + conf_bounds[1]).cpu() / 2, label = "Conformal")
plt.xlabel("Actual")
plt.ylabel("Predicted")
plt.legend()
plt.title("Levy-5")

In [None]:
plt.hist((cred_bounds[1] - cred_bounds[0]).cpu().numpy())
plt.hist((conf_bounds[1] - conf_bounds[0]).cpu().numpy(), alpha = 0.5)
plt.xlabel("Width")
plt.title("Width of Equivalent Intervals")

## ignore below

In [None]:
xx, yy = torch.meshgrid(torch.linspace(0, 1, 40), torch.linspace(0, 1, 40))
test_x = torch.stack((xx.reshape(-1), yy.reshape(-1))).t().to(device, dtype)

In [None]:
mll, model, trans = mll_and_model

In [None]:
inputs, objective = train_x_ei, train_obj_ei

In [None]:
trans.eval()
t_objective = trans(objective)[0]

In [None]:
acqf = qConformalExpectedImprovement(
    model=model,
    best_f=(t_objective).max(),
    sampler=PassSampler(64),
)
acqf.objective._verify_output_shape = False

In [None]:
with torch.no_grad():
    model.conformal()
    conformalei = acqf(test_x.unsqueeze(-2))
    

In [None]:
model.standard()
acqf = qExpectedImprovement(
    model=model,
    best_f=(t_objective).max(),
)

In [None]:
with torch.no_grad():
    stdei = acqf(test_x.unsqueeze(-2))

In [None]:
f = plt.contourf(xx, yy, stdei.cpu().reshape(40,40))
plt.colorbar(f)
plt.title("Standard EI")

In [None]:
f = plt.contourf(xx, yy, conformalei.cpu().reshape(40,40))
plt.colorbar(f)
plt.title("Conformal EI")

In [None]:
n_batch = 30
batch_size = 1
num_init = 10
noise_se = 0.1
method = "exact"
alpha = 0.05
tgt_grid_res = 64
mc_samples = 64

In [None]:
keys = ["ei", "cei"]
best_observed = {k: [] for k in keys}
coverage = {k: [] for k in keys}

# call helper functions to generate initial training data and initialize model
# (
#     train_x_ei,
#     train_obj_ei,
#     best_observed_value_ei,
# ) = generate_initial_data(
#     num_init, bb_fn, noise_se, device, dtype
# )
heldout_x, heldout_obj, _ = generate_initial_data(20 * num_init, bb_fn, noise_se, device, dtype)

mll_model_dict = {}
data_dict = {}
for k in keys:
    mll_and_model = initialize_model(
        train_x_ei, train_obj_ei, train_yvar,
        method=method, alpha=alpha, tgt_grid_res=tgt_grid_res,
    )
    mll_model_dict[k] = (mll_and_model)
    best_observed[k].append(best_observed_value_ei)
    data_dict[k] = (train_x_ei, train_obj_ei)

optimize_acqf_kwargs = {
    "bounds": bounds,
    "BATCH_SIZE": batch_size,
    "fn": bb_fn,
    "noise_se": noise_se,
}

# run N_BATCH rounds of BayesOpt after the initial random batch
for iteration in range(1, n_batch + 1):
    t0 = time.time()
    for k in keys:
        torch.cuda.empty_cache()
        
        if k == "rnd":
            # update random
            best_observed[k] = update_random_observations(batch_size, best_observed[k], bb_fn.bounds, bb_fn, dim=bounds.shape[1])
            continue

        # fit the model
        mll, model, trans = mll_model_dict[k]
        inputs, objective = data_dict[k]
        trans.eval()
        t_objective = trans(objective)[0]
        # model.requires_grad_(True)
        fit_gpytorch_model(mll)
        # model.requires_grad_(False)
#         print(list(model.named_parameters()))
        # now assess coverage on the heldout set
        # TODO: update the heldout sets
        coverage[k].append(assess_coverage(model, heldout_x, trans(heldout_obj)[0], alpha))
        print(coverage[k][-1], k)

        # now prepare the acquisition
        qmc_sampler = SobolQMCNormalSampler(num_samples=mc_samples)
        if k == "ei":
            acqf = qExpectedImprovement(
                model=model,
                best_f=(t_objective).max(),
               sampler=qmc_sampler,
            )
        elif k == "nei":
            acqf = qNoisyExpectedImprovement(
                model=model,
                X_baseline=inputs,
                sampler=qmc_sampler,
            )
        elif k == "kg":
            acqf = qKnowledgeGradient(
                model=model,
                current_value=t_objective.max(),
                num_fantasies=None,
                sampler=qmc_sampler,
            )
        elif k == "cei":
            model.conformal()
            acqf = qConformalExpectedImprovement(
                model=model,
                best_f=(t_objective).max(),
                sampler=PassSampler(mc_samples),
            )
            acqf.objective._verify_output_shape = False
        elif k == "cnei":
            model.conformal()
            acqf = qConformalNoisyExpectedImprovement(
                model=model,
                X_baseline=inputs,
                sampler=PassSampler(mc_samples),
                cache_root=False,
            )
            acqf.objective._verify_output_shape = False
        
        print(model.train_inputs[0].shape, model.train_targets.shape, 'shapes')
        # optimize acquisition
        new_x, new_obj = optimize_acqf_and_get_observation(
            acqf, **optimize_acqf_kwargs
        )
        new_x = new_x.detach()
    
        torch.cuda.empty_cache()
        
        with torch.no_grad():
            output = acqf(test_x.unsqueeze(-2)).cpu()
            plt.figure()
            f = plt.contourf(xx, yy, output.reshape(40, 40))
            plt.colorbar(f)
        
        # display new pt
        plt.scatter(new_x[0,0].cpu(), new_x[0,1].cpu(), color = "red")
        plt.title(k)
        plt.show()
            
        inputs = torch.cat([inputs, new_x])
        objective = torch.cat([objective, new_obj])

        best_observed[k].append(objective.max().item())
        # prepare new model
        mll, model, trans = initialize_model(
            inputs,
            objective,
            method=method,
        )
        mll_model_dict[k] = (mll, model, trans)
        data_dict[k] = inputs, objective

    t1 = time.time()


In [None]:
fig, ax = plt.subplots(1, 2, figsize = (16, 5))
ax[0].scatter(*data_dict["ei"][0].t().cpu(), c=torch.arange(40))
ax[1].scatter(*data_dict["cei"][0].t().cpu(),c=torch.arange(40))

In [None]:
plt.plot(data_dict["ei"][1].cpu().cummax(0)[0])
plt.plot(data_dict["cei"][1].cpu().cummax(0)[0])
plt.ylim((-20, 0))