In [4]:
import numpy as np
import torch
import pandas as pd
import odbo
import os
import gc

## TurBO for next best experiment

In [5]:
random_seed = 0
np.random.seed(random_seed)
data_test = pd.read_csv('../datasets/GB1_2014_536944.csv', sep=',')
name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['Fitness'])
name = odbo.utils.code_to_array(name_pre)
del data_test
name_sele = np.load('sele_experiment_GB1_2014.npy')
Y_train = np.load('sele_fitness_GB1_2014.npy')
print('Selected initial experiments no. is ', len(Y_train))
print('Select max Y: ', Y_train.max(), 'True max Y:', Y_test.max())


Selected initial experiments no. is  135
Select max Y:  2.27 True max Y: 5.022


In [None]:
l, search_iter = 0, 50
gp_method='gp_regression'
tr_length = [3.2]
batch_size = 1
failure_count,max_count = 0,0
state = odbo.turbo.TurboState(dim=20, batch_size=batch_size, length=tr_length, n_trust_regions=len(tr_length), failure_tolerance = 10)
state.best_value = Y_train.max()

name_sele_temp = name_sele.copy()
Y_train_sele = torch.tensor(Y_train.reshape(len(Y_train),1))

while l < search_iter:
    if Y_train_sele[-batch_size:].detach().numpy().max() < Y_train_sele[:-batch_size].max():
        failure_count = failure_count + 1
        feature_model = odbo.featurization.FewChangeMeasurement(raw_vars=name_sele_temp, Y=Y_train_sele.detach().numpy(), method='Max', mode='correlate', n_components=20)
    else:
        failure_count = 0
        feature_model = odbo.featurization.FewChangeMeasurement(raw_vars=name_sele_temp, Y=Y_train_sele.detach().numpy(), method='Max', mode='correlate', n_components=20)
    if failure_count >= 3 and max_count < 3:
        max_count = max_count + 1
        feature_model = odbo.featurization.FewChangeMeasurement(raw_vars=name_sele_temp, Y=Y_train_sele.detach().numpy(), method='Avg', mode='correlate', n_components=20)
    else:
        max_count = 0
        feature_model = odbo.featurization.FewChangeMeasurement(raw_vars=name_sele_temp, Y=Y_train_sele.detach().numpy(), method='Avg', mode='correlate', n_components=20)

    X_test, X_test_pca= feature_model.transform(name)
    X_train_sele, X_train_sele_pca = feature_model.transform(name_sele_temp)
    X_test_pca, X_train_sele_pca = torch.tensor(X_test_pca), torch.tensor(X_train_sele_pca)
    print('Feature transformation done')
    threshold = Y_train_sele[np.argsort(Y_train_sele)[int(0.9*len(Y_train_sele))]]
    labels_train = odbo.prescreening.sp_label(X_train_sele, Y_train_sele, thres=threshold)
    pre_model = odbo.prescreening.XGBOD(eval_metric = 'error')
    pre_model.fit(X_train_sele, labels_train)
    pred_test_labels = pre_model.predict(X_test)
    sele_id_test = list(np.where(pred_test_labels == 0)[0])
    del X_test, X_train_sele, pre_model, pred_test_labels, feature_model
    gc.collect()
    
    print('Prescreened search space size: ', len(sele_id_test))
    search_name_sele = name[sele_id_test, :]
    X_test_sele_pca, Y_test_sele = torch.tensor(X_test_pca[sele_id_test, :]), torch.tensor(Y_test[sele_id_test].reshape(len(sele_id_test),1))
    print("Iter: ", l, "Current Max: ", Y_train_sele.max().detach().numpy(), 'TR length: ', state.length, "Test max: ", Y_test_sele.max().detach().numpy())
    X_next, acq_value, raw_next_exp_id = odbo.turbo_design(state=state, X=X_train_sele_pca, Y=Y_train_sele, X_pending=X_test_sele_pca, n_trust_regions=len(tr_length), batch_size=batch_size, gp_method=gp_method)
    Y_next_m = torch.zeros((len(tr_length), batch_size, 1), device=Y_train_sele.device, dtype=Y_train_sele.dtype)
    next_exp_id = []
    for i in range(batch_size):
        next_exp_id_m = raw_next_exp_id[:, i]
        Y_next_m[:, i, 0], idtoadd = Y_test_sele[next_exp_id_m].reshape(len(tr_length)), next_exp_id_m[np.argmax(Y_test_sele[next_exp_id_m])]
        next_exp_id.append(idtoadd)
    Y_train_sele = torch.cat([Y_train_sele, Y_test_sele[next_exp_id]])
    name_sele_temp = np.concatenate((name_sele_temp, search_name_sele[next_exp_id]))
    print("Newly added value: ", Y_train_sele[-batch_size:].detach().numpy(), name_sele_temp[-1], "Current size: ", len(Y_train_sele))
    state = odbo.turbo.update_state(state=state, Y_next=Y_next_m)
    l = l + 1

#np.save('results/GB1_2014/GB1_2014_ODBO_TuRBO_GP_batch1_{}.npy'.format(random_seed), Y_train_sele)



Feature transformation done
Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Prescreened search space size:  63223
Iter:  0 Current Max:  2.27 TR length:  [3.2] Test max:  4.288
Newly added value:  [[1.339]] ['Q' 'Y' 'K' 'L' 'I' 'L' 'N' 'G' 'K' 'T' 'L' 'K' 'G' 'E' 'T' 'T' 'T' 'C'
 'A' 'V' 'L' 'A' 'A' 'T' 'A' 'E' 'K' 'V' 'F' 'K' 'Q' 'Y' 'A' 'N' 'D' 'N'
 'G' 'V' 'D' 'G' 'E' 'W' 'T' 'Y' 'D' 'D' 'A' 'T' 'K' 'T' 'F' 'T' 'V' 'T'
 'E'] Current size:  136
Feature transformation done
Parameters: { "silent" } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


P

## BO for next best experiment

In [None]:
## Run BO experiment with robust regression or directly gp
l, search_iter = 0, 50
batch_size = 1
gp_method='gp_regression'
failure_count,max_count = 0, 0
while l < search_iter:
    print("Iter: ", l, "Current Max: ", Y_train_sele.max().detach().numpy(), "Test max: ", Y_test_sele.max().detach().numpy())
    X_next, acq_value, next_exp_id = odbo.bo_design(X=X_train_sele, Y=Y_train_sele, X_pending=X_test_sele, gp_method=gp_method, batch_size=batch_size)
    ids_keep = list(np.delete(range(X_test_sele.shape[0]), next_exp_id))
    X_train_sele, Y_train_sele = torch.cat([X_train_sele, X_test_sele[next_exp_id, :]]), torch.cat([Y_train_sele, Y_test_sele[next_exp_id]])
    X_test_sele, Y_test_sele = X_test_sele[ids_keep, :], Y_test_sele[ids_keep]
    name_sele_temp = np.concatenate((name_sele_temp, search_name_sele[next_exp_id]))
    search_name_sele = search_name_sele[ids_keep]
    print("Newly added value: ", Y_train_sele[-batch_size:].detach().numpy(), name_sele_temp[-1])
    feature_model1 = odbo.featurization.MaxMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
    if Y_train_sele[-batch_size:].detach().numpy().max() <= Y_train_sele[:-batch_size].max():
        failure_count = failure_count + 1
    else:
        failure_count = 0
    if failure_count >= 3 and max_count < 3:
        max_count = max_count + 1
        feature_model1 = odbo.featurization.AvgMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
    else:
        max_count = 0
    X_train_sele = torch.tensor(feature_model1.transform(X_train_sele))
    X_test_sele= torch.tensor(feature_model1.transform(X_test_sele))
    l = l + 1

#np.save('results/GB1_2014/GB1_2014_ODBO_BO_GP_batch1_{}.npy'.format(random_seed), Y_train_sele)

In [64]:
sele_id_test = list(np.where(pred_test_labels == 0)[0])
name_sele_temp = name_sele.copy()
X_train_sele_pca, Y_train_sele = torch.tensor(X_train_pca), torch.tensor(Y_train.reshape(len(Y_train),1))

# Run BO experiment with robust regression or directly GP
l, search_iter = 0, 50
gp_method='gp_regression'
tr_length = [3.2]
batch_size = 1
failure_count,max_count = 0,0
state = odbo.turbo.TurboState(dim=X_train_sele_pca.shape[1], batch_size=batch_size, length=tr_length, n_trust_regions=len(tr_length), failure_tolerance = 10)
state.best_value = Y_train_sele.max()
while l < search_iter:
    print('Prescreened search space size: ', len(sele_id_test))
    search_name_sele = name[sele_id_test, :]
    X_test_sele_pca, Y_test_sele = torch.tensor(X_test_pca[sele_id_test, :]), torch.tensor(Y_test[sele_id_test].reshape(len(sele_id_test),1))
    print("Iter: ", l, "Current Max: ", Y_train_sele.max().detach().numpy(), 'TR length: ', state.length, "Test max: ", Y_test_sele.max().detach().numpy())
    X_next, acq_value, raw_next_exp_id = odbo.turbo_design(state=state, X=X_train_sele_pca, Y=Y_train_sele, X_pending=X_test_sele_pca, n_trust_regions=len(tr_length), batch_size=batch_size, gp_method=gp_method)
    Y_next_m = torch.zeros((len(tr_length), batch_size, 1), device=Y_train_sele.device, dtype=Y_train_sele.dtype)
    next_exp_id = []
    for i in range(batch_size):
        next_exp_id_m = raw_next_exp_id[:, i]
        Y_next_m[:, i, 0], idtoadd = Y_test_sele[next_exp_id_m].reshape(len(tr_length)), next_exp_id_m[np.argmax(Y_test_sele[next_exp_id_m])]
        next_exp_id.append(idtoadd)
    X_train_sele, Y_train_sele = torch.cat([X_train_sele, X_test_sele[next_exp_id, :]]), torch.cat([Y_train_sele, Y_test_sele[next_exp_id]])
    ids_keep = list(np.delete(range(X_test_sele.shape[0]), next_exp_id))
    name_sele_temp = np.concatenate((name_sele_temp, search_name_sele[next_exp_id]))
    print("Newly added value: ", Y_train_sele[-batch_size:].detach().numpy(), name_sele_temp[-1])
    state = odbo.turbo.update_state(state=state, Y_next=Y_next_m)
    print(X_train_sele.shape)
    if Y_train_sele[-batch_size:].detach().numpy().max() <= Y_train_sele[:-batch_size].max():
        failure_count = failure_count + 1
        feature_model1 = odbo.featurization.FewChangeMeasurement(raw_vars=name_sele_temp, Y=Y_train_sele.detach().numpy(), method='Max', mode='correlate')
    else:
        failure_count = 0
        feature_model1 = odbo.featurization.FewChangeMeasurement(raw_vars=name_sele_temp, Y=Y_train_sele.detach().numpy(), method='Max', mode='correlate')
    if failure_count >= 3 and max_count < 3:
        max_count = max_count + 1
        feature_model1 = odbo.featurization.FewChangeMeasurement(raw_vars=name_sele_temp, Y=Y_train_sele.detach().numpy(), method='Avg', mode='correlate')
    else:
        max_count = 0
        feature_model1 = odbo.featurization.FewChangeMeasurement(raw_vars=name_sele_temp, Y=Y_train_sele.detach().numpy(), method='Avg', mode='correlate')

    X_test_sele, X_test_sele_pca= feature_model1.transform(name)
    X_train_sele, X_train_sele_pca = feature_model1.transform(name_sele_temp)
    X_test_sele_pca, X_train_sele_pca = torch.tensor(X_test_sele_pca), torch.tensor(X_train_sele_pca)
    print(feature_model1._pca.explained_variance_ratio_.sum())

    threshold = Y_train[np.argsort(Y_train_sele)[int(0.95*len(Y_train_sele))]]
    labels_train = odbo.prescreening.sp_label(X_train_sele, Y_train_sele, thres=threshold)
    pre_model = odbo.prescreening.XGBOD(eval_metric = 'error')
    pre_model.fit(X_train_sele, labels_train)
    pred_labels = pre_model.predict(X_train)
    labels_test = odbo.prescreening.sp_label(X_test_sele, Y_test, thres=threshold)
    pred_test_labels = pre_model.predict(X_test_sele)
    sele_id_test = list(np.where(pred_test_labels == 0)[0])
    
    l = l + 1

#np.save('results/GB1_2014/GB1_2014_ODBO_TuRBO_GP_batch1_{}.npy'.format(random_seed), Y_train_sele)


Prescreened search space size:  174
Iter:  0 Current Max:  0.8197798314933114 TR length:  [3.2] Test max:  -4.3428059215206005


IndexError: index 8096 is out of bounds for dimension 0 with size 174