In [1]:
import numpy as np
import torch

from rlaopt.solvers import PCGConfig, SAPConfig, SAPAccelConfig
from rlaopt.preconditioners import NystromConfig

from scalable_gp_inference.hparam_training import train_exact_gp_subsampled
from scalable_gp_inference.gp_inference import GPInference
from scalable_gp_inference.sdd_config import SDDConfig

from experiments.data_processing.preprocess import preprocess_dataset

In [2]:
seed = 0

In [3]:
torch.set_default_dtype(torch.float64)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f39e427f970>

In [4]:
device = torch.device("cuda:1")

# ntr = 10000
# ntst = 1000
# d = 3
# data_noise = 0.04

# freqs = 2 * torch.pi * torch.randn(d)
# Xtr = torch.linspace(0, 1, ntr).unsqueeze(1).expand(-1, d)
# Xtst = torch.linspace(0, 1, ntst).unsqueeze(1).expand(-1, d)
# ytr = torch.sin(Xtr @ freqs) + torch.randn(Xtr.shape[0]) * (data_noise ** 0.5)
# ytst = torch.sin(Xtst @ freqs) + torch.randn(Xtst.shape[0]) * (data_noise ** 0.5)

data = preprocess_dataset("3droad", 0.1, np.float64, True, "z_score", device, seed)
Xtr = data["x_train"]
ytr = data["y_train"]
Xtst = data["x_test"]
ytst = data["y_test"]


kernel_type = "matern32"
distributed = True
devices = set([torch.device("cuda:1"), torch.device("cuda:3"), torch.device("cuda:4")])
num_posterior_samples = 64
num_random_features = 64

subsample_size = 10000

Number of training samples for 3droad: 391386
Number of test samples for 3droad: 43488
Device: cuda:1


In [5]:
hparams = train_exact_gp_subsampled(Xtr=Xtr, ytr=ytr, kernel_type=kernel_type, opt_hparams={"lr": 0.1},
            training_iters=100, subsample_size=subsample_size, num_trials=1)

In [6]:
print(hparams)

GPHparams(signal_variance=3.1257829705102855, kernel_lengthscale=tensor([0.0381, 0.0423], device='cuda:1'), noise_variance=0.0003895794149481096)


In [7]:
gp_inference_model = GPInference(
    Xtr=Xtr,
    ytr=ytr,
    Xtst=Xtst,
    ytst=ytst,
    kernel_type=kernel_type,
    kernel_hparams=hparams,
    num_posterior_samples=num_posterior_samples,
    num_random_features=num_random_features,
    distributed=distributed,
    devices=devices,
)

In [8]:
max_iters = 1000
nystrom_config = NystromConfig(rank=100, rho=hparams.noise_variance, damping_mode="adaptive")
# accel_config = SAPAccelConfig(mu=hparams.noise_variance, nu=10.0)
# solver_config = SAPConfig(
#     precond_config=nystrom_config,
#     max_iters=max_iters,
#     atol=1e-6,
#     rtol=1e-6,
#     blk_sz=Xtr.shape[0] // 10,
#     accel_config=accel_config,
#     device=device,
# )
solver_config = SDDConfig(
    momentum=0.9,
    step_size=100 / Xtr.shape[0],
    theta=100 / max_iters,
    blk_size=Xtr.shape[0] // 10,
    max_iters=max_iters,
    device=device,
    atol=1e-6,
    rtol=1e-6,
)
# solver_config = PCGConfig(
#     precond_config=nystrom_config,
#     max_iters=max_iters,
#     atol=1e-6,
#     rtol=1e-6,
#     device=device,
# )

In [None]:
results = gp_inference_model.perform_inference(
    solver_config=solver_config,
    W_init=None,
    use_full_kernel=False,
    eval_freq=10,
    log_in_wandb=True,
)

Initialized with clean caches. PID: 3985654
Initialized with clean caches. PID: 3985654


[34m[1mwandb[0m: Currently logged in as: [33mpratikrathore8[0m ([33msketchy-opts[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[PID 3991200] Computing kernel for device cuda:3...
[PID 3991200] Kernel cached. Cache size: 1
[PID 3991201] Computing kernel for device cuda:4...
[PID 3991201] Kernel cached. Cache size: 1
[PID 3991199] Computing kernel for device cuda:1...
[PID 3991199] Kernel cached. Cache size: 1
[PID 3991200] Using cached kernel for device cuda:3
[PID 3991199] Using cached kernel for device cuda:1
[PID 3991201] Using cached kernel for device cuda:4
[PID 3991200] Using cached kernel for device cuda:3
[PID 3991199] Using cached kernel for device cuda:1
[PID 3991201] Using cached kernel for device cuda:4
[PID 3991200] Using cached kernel for device cuda:3
[PID 3991199] Using cached kernel for device cuda:1
[PID 3991201] Using cached kernel for device cuda:4
[PID 3991199] Using cached kernel for device cuda:1
[PID 3991201] Using cached kernel for device cuda:4
[PID 3991200] Using cached kernel for device cuda:3
[PID 3991199] Using cached kernel for device cuda:1
[PID 3991200] Using cached kernel for d

Process Process-7:
Process Process-6:
Process Process-3:
Process Process-8:
Process Process-2:
Process Process-4:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/pratikr/rlaopt/rlaopt/linops/distributed.py", line 89, in _device_worker
    task = task_queue.get()
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 103, in get
    res = self._recv_bytes()
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
Tr

KeyboardInterrupt: 

Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7f370aac0250>> (for post_run_cell), with arguments args (<ExecutionResult object at 7f386052baf0, execution_count=9 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 7f386052b400, raw_cell="results = gp_inference_model.perform_inference(
  .." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2Bsoal-11.stanford.edu/home/pratikr/scalable_gp_inference/test_gp_inference.ipynb#X10sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

: 