In [1]:
from typing import Any, Dict, Optional
import torch
import random
import os 
import argparse
from typing import (
    Any,
    Callable,
    Dict,
    Hashable,
    Iterable,
    List,
    Optional,
    Sequence,
    Tuple,
    Type,
    TypeVar,
    Union,
)
from botorch.utils.sampling import draw_sobol_samples

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from botorch.models.gp_regression import SingleTaskGP
from tqdm import tqdm
from torch import Tensor
from botorch.acquisition.active_learning import (
    MCSampler,
    qNegIntegratedPosteriorVariance,
)

from botorch.fit import fit_gpytorch_mll
from sklearn.model_selection import train_test_split
from botorch.models.gp_regression import SingleTaskGP

from sklearn.metrics import mean_absolute_error

import gc




from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
import warnings

import pickle

from botorch.exceptions.warnings import BotorchTensorDimensionWarning, InputDataWarning
warnings.filterwarnings(
            "ignore",
            message="Input data is not standardized.",
            category=InputDataWarning,
        )
import warnings
warnings.filterwarnings("ignore")



from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP
from gpytorch.mlls import ExactMarginalLogLikelihood

from botorch.acquisition.active_learning import (
    MCSampler,
    qNegIntegratedPosteriorVariance,
)

from botorch.utils.transforms import normalize, standardize

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# dtype = torch.double
dtype = torch.float32


# Ensure output directory exists
# os.makedirs(output_dir, exist_ok=True)


bounds = torch.tensor([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]], device=device, dtype=dtype)

mcp = draw_sobol_samples(bounds=bounds, n=1024, q=1, seed=42).squeeze(1)
mcp.to(device=device, dtype=dtype)
# bounds


seeds = np.load('seeds.npy')
xtest = np.load('xtest.npy')
ytest = np.load('ytest.npy')


with open('xcandidates_original.pkl', 'rb') as f:
    xcandidates_original = pickle.load(f)
    
with open('ycandidates_original.pkl', 'rb') as f:
    ycandidates_original = pickle.load(f)
    
xtest = torch.tensor(xtest, dtype=dtype,device=device)
ytest = torch.tensor(ytest, dtype=dtype,device=device)


In [5]:

def random_initial_data(x, y, initial_percent, seed=None):
    if seed is not None:
        np.random.seed(seed)
    # np.random.seed(seed)
    n = int(len(x)*initial_percent)
    idx = np.random.choice(len(x), n, replace=False).tolist()
    x_initial = [x[i] for i in idx]
    y_initial = [y[i] for i in idx]
    xcandidates = [x[i] for i in range(len(x)) if i not in idx]
    ycandidates = [y[i] for i in range(len(y)) if i not in idx]
    
    return x_initial, y_initial, xcandidates, ycandidates




rand_selection_mae = []
xmax_candidates = []
pred_mae = []
pred_y = []
pred_std = []
qnipv_runs =[]



def find_max_normalized_acqval(tensor_list, qNIVP):
    max_value = None
    max_index = -1
    acq_val_lst = []
    # torch.manual_seed(13)
    for i, tensor in enumerate(tensor_list):
        tensor_len = len(tensor)
        qNIVP_val = qNIVP(tensor)
        normalized_qNIVP_val = qNIVP_val / tensor_len
        acq_val_lst.append(normalized_qNIVP_val.item())
        if max_value is None or normalized_qNIVP_val > max_value:
            max_value = normalized_qNIVP_val
            max_index = i

    return max_value, max_index, acq_val_lst





for i in tqdm(seeds):
    print(f'running seed {i} of {len(seeds)}')
    torch.cuda.empty_cache()
    xcandidates = xcandidates_original.copy()
    ycandidates = ycandidates_original.copy()
    
    
    
    xinit, yinit, xcandidates, ycandidates = random_initial_data(xcandidates, ycandidates, 0.05, seed=i)
    
    
    xinit = torch.cat(xinit,dim=0).to(device)
    yinit = torch.cat(yinit,dim=0).to(device)

    
    gp = SingleTaskGP(xinit, yinit).to(device)
    mll = ExactMarginalLogLikelihood(gp.likelihood, gp).to(device)
    fit_gpytorch_mll(mll)
    
    with torch.no_grad():
        posterior = gp(xtest)
        ypred = posterior.mean.detach().cpu().numpy()
        # ystd = posterior.stddev.detach().cpu().numpy()
        del posterior
    
    # posterior = gp(xtest)
    # ypred = posterior.mean.detach().numpy()
    # ystd = posterior.stddev.detach().numpy()
    
    
    ymae = mean_absolute_error(ytest, ypred)
    
    pred_mae = []
    # pred_y.append(ypred)
    # pred_std.append(ystd)
    pred_mae.append(ymae)




    for inner_i in tqdm(range(100)):
        if not len(xcandidates):
            break
        
        qNIVP = qNegIntegratedPosteriorVariance(gp, mc_points= mcp)
        
        
        max_value, max_index, acq_val_lst = find_max_normalized_acqval(xcandidates, qNIVP)
        xmax_candidates.append(max_index)
        
        # print(f'pre-addtion of new ten',len(xinit))
        xinit= torch.cat((xinit, xcandidates[max_index]), 0).to(device)
        yinit = torch.cat((yinit, ycandidates[max_index]), 0).to(device)
        
        
        del xcandidates[max_index]
        del ycandidates[max_index]
        
        del gp, mll
        torch.cuda.empty_cache()
        gc.collect()
       
        
        gp = SingleTaskGP(xinit, yinit).to(device)
        # gp = SingleTaskGP(xinit, ytrain_,covar_module=rbf_kernel)
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp).to(device)
        fit_gpytorch_mll(mll)
        
        with torch.no_grad():
            ypred = gp(xtest)
            ypred_mean = ypred.mean.detach().numpy()
            del ypred
            
            # pred_y.append(ypred_mean)

            ymae = mean_absolute_error(ytest, ypred_mean)
        # print('mean absolute error: ', ymae)
            pred_mae.append(ymae)
        # ystd = gp(xtest).stddev
        # ystd = ystd.detach().numpy()
        # pred_std.append(ystd)
    qnipv_runs.append(pred_mae)
    # np.save('qnipv_runs_copy.npy', np.array(qnipv_runs))
    np.save('qnipv_runs_seed.npy', np.array(pred_mae))

  0%|          | 0/25 [00:00<?, ?it/s]

running seed 25 of 25


100%|██████████| 100/100 [19:48<00:00, 11.88s/it]
  4%|▍         | 1/25 [19:48<7:55:17, 1188.22s/it]

running seed 1037 of 25


100%|██████████| 100/100 [19:14<00:00, 11.55s/it]
  8%|▊         | 2/25 [39:03<7:28:09, 1169.13s/it]

running seed 2545 of 25


100%|██████████| 100/100 [19:24<00:00, 11.64s/it]
 12%|█▏        | 3/25 [58:28<7:07:52, 1166.92s/it]

running seed 996 of 25


100%|██████████| 100/100 [20:21<00:00, 12.22s/it]
 16%|█▌        | 4/25 [1:18:50<6:56:02, 1188.67s/it]

running seed 3343 of 25


100%|██████████| 100/100 [19:17<00:00, 11.58s/it]
 20%|██        | 5/25 [1:38:08<6:32:31, 1177.56s/it]

running seed 2470 of 25


100%|██████████| 100/100 [19:40<00:00, 11.80s/it]
 24%|██▍       | 6/25 [1:57:48<6:13:11, 1178.48s/it]

running seed 2204 of 25


100%|██████████| 100/100 [20:19<00:00, 12.19s/it]
 28%|██▊       | 7/25 [2:18:07<5:57:33, 1191.85s/it]

running seed 4629 of 25


100%|██████████| 100/100 [20:19<00:00, 12.20s/it]
 32%|███▏      | 8/25 [2:38:27<5:40:14, 1200.83s/it]

running seed 4893 of 25


100%|██████████| 100/100 [53:36<00:00, 32.17s/it]
 36%|███▌      | 9/25 [3:32:04<8:08:16, 1831.05s/it]

running seed 635 of 25


100%|██████████| 100/100 [1:50:03<00:00, 66.03s/it] 
 40%|████      | 10/25 [5:22:07<13:46:04, 3304.33s/it]

running seed 3850 of 25


100%|██████████| 100/100 [1:31:38<00:00, 54.98s/it]
 44%|████▍     | 11/25 [6:53:46<15:27:41, 3975.80s/it]

running seed 4277 of 25


100%|██████████| 100/100 [34:23<00:00, 20.64s/it]
 48%|████▊     | 12/25 [7:28:10<12:15:25, 3394.28s/it]

running seed 3004 of 25


100%|██████████| 100/100 [1:59:59<00:00, 72.00s/it] 
 52%|█████▏    | 13/25 [9:28:10<15:09:25, 4547.08s/it]

running seed 294 of 25


100%|██████████| 100/100 [27:32<00:00, 16.52s/it]
 56%|█████▌    | 14/25 [9:55:43<11:13:22, 3672.93s/it]

running seed 47 of 25


100%|██████████| 100/100 [29:48<00:00, 17.88s/it]
 60%|██████    | 15/25 [10:25:31<8:37:28, 3104.83s/it]

running seed 2619 of 25


100%|██████████| 100/100 [51:48<00:00, 31.08s/it]
 64%|██████▍   | 16/25 [11:17:20<7:45:55, 3106.14s/it]

running seed 2743 of 25


100%|██████████| 100/100 [35:21<00:00, 21.22s/it]
 68%|██████▊   | 17/25 [11:52:42<6:14:42, 2810.25s/it]

running seed 1271 of 25


100%|██████████| 100/100 [19:46<00:00, 11.86s/it]
 72%|███████▏  | 18/25 [12:12:29<4:30:56, 2322.36s/it]

running seed 4068 of 25


100%|██████████| 100/100 [20:04<00:00, 12.04s/it]
 76%|███████▌  | 19/25 [12:32:33<3:18:39, 1986.55s/it]

running seed 4586 of 25


100%|██████████| 100/100 [19:54<00:00, 11.95s/it]
 80%|████████  | 20/25 [12:52:28<2:25:44, 1748.88s/it]

running seed 4365 of 25


100%|██████████| 100/100 [19:51<00:00, 11.91s/it]
 84%|████████▍ | 21/25 [13:12:20<1:45:26, 1581.58s/it]

running seed 4233 of 25


100%|██████████| 100/100 [19:35<00:00, 11.76s/it]
 88%|████████▊ | 22/25 [13:31:55<1:12:59, 1459.84s/it]

running seed 3885 of 25


100%|██████████| 100/100 [20:09<00:00, 12.09s/it]
 92%|█████████▏| 23/25 [13:52:05<46:09, 1384.65s/it]  

running seed 1828 of 25


100%|██████████| 100/100 [20:58<00:00, 12.58s/it]
 96%|█████████▌| 24/25 [14:13:03<22:26, 1346.76s/it]

running seed 1698 of 25


100%|██████████| 100/100 [19:01<00:00, 11.42s/it]
100%|██████████| 25/25 [14:32:05<00:00, 2093.03s/it]


In [10]:
np.save('qnipv_runs_seed_true.npy', np.array(qnipv_runs))