In [None]:
import numpy as np
import torch
import pandas as pd
import odbo
import os
import warnings
warnings.filterwarnings("ignore")

## Get initial experiment design

In [None]:
data_test = pd.read_csv('../datasets/GB1_2016_149361.csv', sep=',')
name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['Fitness'])
shuffle_order = np.arange(len(Y_test))
np.random.shuffle(shuffle_order[1:])
name_pre[1:], Y_test[1:] = name_pre[shuffle_order[1:]], Y_test[shuffle_order[1:]]
name = odbo.utils.code_to_array(name_pre)
if os.path.isfile('sele_experiment_GB1_2016.npy') == True:
    name_sele = np.load('sele_experiment_GB1_2016.npy')
    Y_train = np.load('sele_fitness_GB1_2016.npy')
    print('Selected initial experiments no. is ', len(Y_train))
else:
    sele_indices = odbo.initialization.initial_design(name, least_occurance=[2,2,2,2],verbose = False,random_state=random_seed)
    name_sele, Y_train = name[sele_indices, :], Y_test[sele_indices]
    print('Selected initial experiments no. is ', len(sele_indices))
print('Select max Y: ', Y_train.max())

## Functions for multiple runs with different random seeds


In [None]:
def wrapped_run_bo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed):
    threshold = max(0.05, np.mean(Y_train)-2*np.std(Y_train))
    feature_model = odbo.featurization.MaxMeasurement(raw_vars=name_sele, Y=Y_train)
    X_train = feature_model.transform(name_sele)
    X_test = feature_model.transform(name)    
    X_train_sele, Y_train_sele = torch.tensor(X_train), torch.tensor(Y_train.reshape(len(Y_train),1))
    X_test_sele, Y_test_sele = torch.tensor(X_test), torch.tensor(Y_test.reshape(len(Y_test),1))
    search_name_sele, name_sele_temp = name, name_sele
    l = 0
    failure_count,max_count = 0, 0
    while l < search_iter:
        X_next, acq_value, next_exp_id = odbo.bo_design(X=X_train_sele, Y=Y_train_sele, X_pending=X_test_sele, gp_method=gp_method, batch_size=batch_size)
        ids_keep = list(np.delete(range(X_test_sele.shape[0]), next_exp_id))
        X_train_sele, Y_train_sele = torch.cat([X_train_sele, X_test_sele[next_exp_id, :]]), torch.cat([Y_train_sele, Y_test_sele[next_exp_id]])
        X_test_sele, Y_test_sele, search_name_sele = X_test_sele[ids_keep, :], Y_test_sele[ids_keep], search_name_sele[ids_keep]
        name_sele_temp = np.concatenate((name_sele_temp, search_name_sele[next_exp_id]))
        feature_model1 = odbo.featurization.AvgMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
        if Y_train_sele[-batch_size:].detach().numpy().max() <= Y_train_sele[:-batch_size].max():
            failure_count = failure_count + 1
        else:
            failure_count = 0
        if failure_count >= 3 and max_count < 3:
            max_count = max_count + 1
            feature_model1 = odbo.featurization.MaxMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
        else:
            max_count = 0
        X_train_sele = torch.tensor(feature_model1.transform(X_train_sele))
        X_test_sele= torch.tensor(feature_model1.transform(X_test_sele))
        l = l + 1
    return Y_train_sele, name_sele_temp

def wrapped_run_turbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed):
    threshold = max(0.05, np.mean(Y_train)-2*np.std(Y_train))
    feature_model = odbo.featurization.MaxMeasurement(raw_vars=name_sele, Y=Y_train)
    X_train = feature_model.transform(name_sele)
    X_test = feature_model.transform(name)
    X_train_sele, Y_train_sele = torch.tensor(X_train), torch.tensor(Y_train.reshape(len(Y_train),1))
    search_name_sele, name_sele_temp = name, name_sele
    X_test_sele, Y_test_sele = torch.tensor(X_test), torch.tensor(Y_test.reshape(len(Y_test),1))
    l = 0
    failure_count = 0
    state = odbo.turbo.TurboState(dim=X_train_sele.shape[1], batch_size=batch_size, length=tr_length, n_trust_regions=len(tr_length), failure_tolerance = 10)
    state.best_value = Y_train_sele.max()
    while l < search_iter:
        X_next, acq_value, raw_next_exp_id = odbo.turbo_design(state=state, X=X_train_sele, Y=Y_train_sele, X_pending=X_test_sele, n_trust_regions=len(tr_length), batch_size=batch_size, gp_method=gp_method)
        Y_next_m = torch.zeros((len(tr_length), batch_size, 1), device=Y_train_sele.device, dtype=Y_train_sele.dtype)
        next_exp_id = []
        for i in range(batch_size):
            next_exp_id_m = raw_next_exp_id[:, i]
            Y_next_m[:, i, 0], idtoadd = Y_test_sele[next_exp_id_m].reshape(len(tr_length)), next_exp_id_m[np.argmax(Y_test_sele[next_exp_id_m])]
            next_exp_id.append(idtoadd)
        X_train_sele, Y_train_sele = torch.cat([X_train_sele, X_test_sele[next_exp_id, :]]), torch.cat([Y_train_sele, Y_test_sele[next_exp_id]])
        ids_keep = list(np.delete(range(X_test_sele.shape[0]), next_exp_id))
        X_test_sele, Y_test_sele, search_name_sele = X_test_sele[ids_keep, :], Y_test_sele[ids_keep], search_name_sele[ids_keep]
        name_sele_temp = np.concatenate((name_sele_temp, search_name_sele[next_exp_id]))
        state = odbo.turbo.update_state(state=state, Y_next=Y_next_m)
        feature_model1 = odbo.featurization.AvgMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
        if Y_train_sele[-batch_size:].detach().numpy().max() <= Y_train_sele[:-batch_size].max():
            failure_count = failure_count + 1
        else:
            failure_count = 0
        if failure_count >= 3 and max_count < 3:
            max_count = max_count + 1
            feature_model1 = odbo.featurization.MaxMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
        else:
            max_count = 0
        X_train_sele = torch.tensor(feature_model1.transform(X_train_sele))
        X_test_sele= torch.tensor(feature_model1.transform(X_test_sele))

        l = l + 1

    return Y_train_sele, name_sele_temp


def wrapped_run_odbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed):
    threshold = max(0.05, np.mean(Y_train)-2*np.std(Y_train))
    feature_model = odbo.featurization.MaxMeasurement(raw_vars=name_sele, Y=Y_train)
    X_train = feature_model.transform(name_sele)
    X_test = feature_model.transform(name)
    labels_train = odbo.prescreening.sp_label(X_train, Y_train, thres=threshold)
    pre_model = odbo.prescreening.XGBOD(eval_metric = 'error')
    pre_model.fit(X_train, labels_train)
    pred_labels = pre_model.predict(X_train)
    labels_test = odbo.prescreening.sp_label(X_test, Y_test, thres=threshold)
    pred_test_labels = pre_model.predict(X_test)
    
    X_train_sele, Y_train_sele = torch.tensor(X_train), torch.tensor(Y_train.reshape(len(Y_train),1))
    # Only search the space after prescreening
    sele_id_test = list(np.array([k for k, x in enumerate(pred_test_labels) if x == 0]))
    X_test_sele, Y_test_sele = torch.tensor(X_test[sele_id_test, :]), torch.tensor(Y_test[sele_id_test].reshape(len(sele_id_test),1))
    search_name_sele, name_sele_temp = name[sele_id_test, :], name_sele
    ## Run BO experiment with robust regression or directly gp
    l = 0
    failure_count,max_count = 0, 0
    while l < search_iter:
        X_next, acq_value, next_exp_id = odbo.bo_design(X=X_train_sele, Y=Y_train_sele, X_pending=X_test_sele, gp_method=gp_method, batch_size=batch_size)
        ids_keep = list(np.delete(range(X_test_sele.shape[0]), next_exp_id))
        X_train_sele, Y_train_sele = torch.cat([X_train_sele, X_test_sele[next_exp_id, :]]), torch.cat([Y_train_sele, Y_test_sele[next_exp_id]])
        X_test_sele, Y_test_sele, search_name_sele = X_test_sele[ids_keep, :], Y_test_sele[ids_keep], search_name_sele[ids_keep]
        name_sele_temp = np.concatenate((name_sele_temp, search_name_sele[next_exp_id]))
        feature_model1 = odbo.featurization.AvgMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
        if Y_train_sele[-batch_size:].detach().numpy().max() <= Y_train_sele[:-batch_size].max():
            failure_count = failure_count + 1
        else:
            failure_count = 0
        if failure_count >= 3 and max_count < 3:
            max_count = max_count + 1
            feature_model1 = odbo.featurization.MaxMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
        else:
            max_count = 0
        X_train_sele = torch.tensor(feature_model1.transform(X_train_sele))
        X_test_sele= torch.tensor(feature_model1.transform(X_test_sele))
        l = l + 1
    return Y_train_sele, name_sele_temp

def wrapped_run_odturbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed):
    threshold = max(0.05, np.mean(Y_train)-2*np.std(Y_train))
    feature_model = odbo.featurization.MaxMeasurement(raw_vars=name_sele, Y=Y_train)
    X_train = feature_model.transform(name_sele)
    X_test = feature_model.transform(name)
    labels_train = odbo.prescreening.sp_label(X_train, Y_train, thres=threshold)
    pre_model = odbo.prescreening.XGBOD(eval_metric = 'error')
    pre_model.fit(X_train, labels_train)
    pred_labels = pre_model.predict(X_train)
    labels_test = odbo.prescreening.sp_label(X_test, Y_test, thres=threshold)
    pred_test_labels = pre_model.predict(X_test)
    
    X_train_sele, Y_train_sele = torch.tensor(X_train), torch.tensor(Y_train.reshape(len(Y_train),1))
    # Only search the space after prescreening
    sele_id_test = []
    sele_id_test.extend(in_inlier)
    sele_id_test.extend(out_inlier)
    search_name_sele, name_sele_temp = name[sele_id_test, :], name_sele
    X_test_sele, Y_test_sele = torch.tensor(X_test[sele_id_test, :]), torch.tensor(Y_test[sele_id_test].reshape(len(sele_id_test),1))
    # Run BO experiment with robust regression or directly gp
    l = 0
    failure_count = 0
    state = odbo.turbo.TurboState(dim=X_train_sele.shape[1], batch_size=batch_size, length=tr_length, n_trust_regions=len(tr_length), failure_tolerance = 10)
    state.best_value = Y_train_sele.max()
    while l < search_iter:
        X_next, acq_value, raw_next_exp_id = odbo.turbo_design(state=state, X=X_train_sele, Y=Y_train_sele, X_pending=X_test_sele, n_trust_regions=len(tr_length), batch_size=batch_size, gp_method=gp_method)
        Y_next_m = torch.zeros((len(tr_length), batch_size, 1), device=Y_train_sele.device, dtype=Y_train_sele.dtype)
        next_exp_id = []
        for i in range(batch_size):
            next_exp_id_m = raw_next_exp_id[:, i]
            Y_next_m[:, i, 0], idtoadd = Y_test_sele[next_exp_id_m].reshape(len(tr_length)), next_exp_id_m[np.argmax(Y_test_sele[next_exp_id_m])]
            next_exp_id.append(idtoadd)
        X_train_sele, Y_train_sele = torch.cat([X_train_sele, X_test_sele[next_exp_id, :]]), torch.cat([Y_train_sele, Y_test_sele[next_exp_id]])
        ids_keep = list(np.delete(range(X_test_sele.shape[0]), next_exp_id))
        X_test_sele, Y_test_sele, search_name_sele = X_test_sele[ids_keep, :], Y_test_sele[ids_keep], search_name_sele[ids_keep]
        name_sele_temp = np.concatenate((name_sele_temp, search_name_sele[next_exp_id]))
        state = odbo.turbo.update_state(state=state, Y_next=Y_next_m)
        feature_model1 = odbo.featurization.AvgMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
        if Y_train_sele[-batch_size:].detach().numpy().max() <= Y_train_sele[:-batch_size].max():
            failure_count = failure_count + 1
        else:
            failure_count = 0
        if failure_count >= 3 and max_count < 3:
            max_count = max_count + 1
            feature_model1 = odbo.featurization.MaxMeasurement(raw_vars=X_train_sele, Y=Y_train_sele.detach().numpy())
        else:
            max_count = 0
        X_train_sele = torch.tensor(feature_model1.transform(X_train_sele))
        X_test_sele= torch.tensor(feature_model1.transform(X_test_sele))

        l = l + 1

    return Y_train_sele, name_sele_temp


## Comparison with different choices of Bayesian Optimization methods

In [None]:
%%capture
search_iter = 50
trials = 50
batch_size = 1
tr_length = [4.8]

gp_method='gp_regression'

# Naive BO without XGBOD prescreening using GP as surrogate model and batch size of 1

if os.path.isfile('results/GB1_2016_BO_GP_batch1.npy') == True:
    BO_GP_bacth1_Y = np.load('results/GB1_2016_BO_GP_batch1.npy')
else:
    BO_GP_bacth1_Y = []
    for i in range(trials):
        random_seed = i
        Y_output, AA_output = wrapped_run_bo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed)
        BO_GP_bacth1_Y.append(Y_output)
    BO_GP_bacth1_Y = np.hstack(BO_GP_bacth1_Y)
    np.save('results/GB1_2016_BO_GP_batch1.npy', BO_GP_bacth1_Y)

# TuRBO without XGBOD prescreening using GP as surrogate model and batch size of 1

if os.path.isfile('results/GB1_2016_TuRBO_GP_batch1.npy') == True:
    TuRBO_GP_bacth1_Y = np.load('results/GB1_2016_TuRBO_GP_batch1.npy')
else:
    TuRBO_GP_bacth1_Y = []
    for i in range(trials):
        random_seed = i
        Y_output, AA_output = wrapped_run_turbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed)
        TuRBO_GP_bacth1_Y.append(Y_output)
    TuRBO_GP_bacth1_Y = np.hstack(TuRBO_GP_bacth1_Y)
    np.save('results/GB1_2016_TuRBO_GP_batch1.npy', TuRBO_GP_bacth1_Y)

# ODBO using naive BO as optimization method, GP as surrogate model and batch size of 1
if os.path.isfile('results/GB1_2016_ODBO_BO_GP_batch1.npy') == True:
    ODBO_GP_bacth1_Y = np.load('results/GB1_2016_ODBO_BO_GP_batch1.npy')
else:
    ODBO_GP_bacth1_Y = []
    for i in range(trials):
        random_seed = i
        Y_output, AA_output = wrapped_run_odbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed)
        ODBO_GP_bacth1_Y.append(Y_output)
    ODBO_GP_bacth1_Y = np.hstack(ODBO_GP_bacth1_Y)
    np.save('results/GB1_2016_ODBO_BO_GP_batch1.npy', ODBO_GP_bacth1_Y)

# ODBO using TuRBO as optimization method, GP as surrogate model and batch size of 1
if os.path.isfile('results/GB1_2016_ODBO_TuRBO_GP_batch1.npy') == True:
    ODTuRBO_GP_bacth1_Y = np.load('results/GB1_2016_ODBO_TuRBO_GP_batch1.npy')
else:
    ODTuRBO_GP_bacth1_Y = []
    for i in range(trials):
        random_seed = i
        Y_output, AA_output = wrapped_run_odturbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed)
        ODTuRBO_GP_bacth1_Y.append(Y_output)
    ODTuRBO_GP_bacth1_Y = np.hstack(ODTuRBO_GP_bacth1_Y)
    np.save('results/GB1_2016_ODBO_TuRBO_GP_batch1.npy', ODTuRBO_GP_bacth1_Y)

gp_method='robust_regression'

# Naive BO without XGBOD prescreening using Robust GP as surrogate model and batch size of 1

if os.path.isfile('results/GB1_2016_BO_GP_batch1.npy') == True:
    BO_GP_bacth1_Y = np.load('results/GB1_2016_BO_GP_batch1.npy')
else:
    BO_GP_bacth1_Y = []
    for i in range(trials):
        random_seed = i
        Y_output, AA_output = wrapped_run_bo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed)
        BO_GP_bacth1_Y.append(Y_output)
    BO_GP_bacth1_Y = np.hstack(BO_GP_bacth1_Y)
    np.save('results/GB1_2016_BO_GP_batch1.npy', BO_GP_bacth1_Y)

# TuRBO without XGBOD prescreening using Robust GP as surrogate model and batch size of 1

if os.path.isfile('results/GB1_2016_TuRBO_GP_batch1.npy') == True:
    TuRBO_GP_bacth1_Y = np.load('results/GB1_2016_TuRBO_GP_batch1.npy')
else:
    TuRBO_GP_bacth1_Y = []
    for i in range(trials):
        random_seed = i
        Y_output, AA_output = wrapped_run_turbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed)
        TuRBO_GP_bacth1_Y.append(Y_output)
    TuRBO_GP_bacth1_Y = np.hstack(TuRBO_GP_bacth1_Y)
    np.save('results/GB1_2016_TuRBO_GP_batch1.npy', TuRBO_GP_bacth1_Y)

# ODBO using naive BO as optimization method, Robust GP as surrogate model and batch size of 1
if os.path.isfile('results/GB1_2016_ODBO_BO_RobustGP_batch1.npy') == True:
    ODBO_RobustGP_bacth1_Y = np.load('results/GB1_2016_ODBO_BO_RobustGP_batch1.npy')
else:
    ODBO_RobustGP_bacth1_Y = []
    for i in range(trials):
        random_seed = i
        Y_output, AA_output = wrapped_run_odbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed)
        ODBO_RobustGP_bacth1_Y.append(Y_output)
    ODBO_RobustGP_bacth1_Y = np.hstack(ODBO_RobustGP_bacth1_Y)
    np.save('results/GB1_2016_ODBO_BO_RobustGP_batch1.npy', ODBO_RobustGP_bacth1_Y)

# ODBO using TuRBO as optimization method, Robust GP as surrogate model and batch size of 1
if os.path.isfile('results/GB1_2016_ODBO_TuRBO_RobustGP_batch1.npy') == True:
    ODTuRBO_RobustGP_bacth1_Y = np.load('results/GB1_2016_ODBO_TuRBO_RobustGP_batch1.npy')
else:
    ODTuRBO_RobustGP_bacth1_Y = []
    for i in range(trials):
        random_seed = i
        Y_output, AA_output = wrapped_run_odturbo(name_sele, Y_train, name, Y_test, gp_method, batch_size, search_iter, random_seed)
        ODTuRBO_RobustGP_bacth1_Y.append(Y_output)
    ODTuRBO_RobustGP_bacth1_Y = np.hstack(ODTuRBO_RobustGP_bacth1_Y)
    np.save('results/GB1_2016_ODBO_TuRBO_RobustGP_batch1.npy', ODTuRBO_RobustGP_bacth1_Y)



In [None]:
# Plotting results
odbo.plot.plot_bo(iters=50, BO_result=ODBO_GP_bacth1_Y, methods='ODBO_BO_GP_batch1')
odbo.plot.plot_bo(iters=50, BO_result=ODTuRBO_GP_bacth1_Y, methods='ODBO_TuRBO_GP_batch1')
odbo.plot.plot_bo(iters=50, BO_result=ODBO_RobustGP_bacth1_Y, methods='ODBO_BO_RobustGP_batch1')
odbo.plot.plot_bo(iters=50, BO_result=ODTuRBO_RobustGP_bacth1_Y, methods='ODTuRBO_RobustGP_bacth1')
plt.plot([0,50], [max(Y_test), max(Y_test)], label='True maximum fitness', color = 'k')
plt.xlabel('Number of observations (beyond initial points)')
plt.ylabel('Maximum Fitness')
plt.show()