In [None]:
import numpy as np
import torch
import pandas as pd
import odbo
import os
import matplotlib.pyplot as plt

## GB1_2016-Comparison between different BO methods

In [None]:
fig = plt.figure(figsize=(6, 4), dpi=100)
methods = ['Random', 'Naive BO + GP', 'TuRBO + GP', 'ODBO, BO + GP', 'ODBO, TuRBO + GP', 'ODBO, BO + RobustGP', 'ODBO, TuRBO + RobustGP']
method_name = ['Random', 'BO_GP_batch1', 'TuRBO_GP_batch1', 'ODBO_BO_GP_batch1', 'ODBO_TuRBO_GP_batch1', 
               'ODBO_BO_RobustGP_batch1', 'ODBO_TuRBO_RobustGP_batch1']
color = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6']
data_test = pd.read_csv('../datasets/GB1_2016_149361.csv', sep=',')
name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['Fitness']).ravel()
data ={}
iterations = 50
for i in range(len(methods)):
    data[i] = []
    for j in range(10):
        data[i].append(np.load('results/GB1_2016/GB1_2016_{}_{}.npy'.format(method_name[i], j)))
    if i == 0:
        data[i] = np.vstack(data[i]).T
    else:
        data[i] = np.hstack(data[i])
    odbo.plot.plot_bo(iters=iterations, BO_result=data[i], method=methods[i], color = color[i])
    
plt.plot([0,iterations], [max(Y_test), max(Y_test)], label='True maximum fitness', color = 'k')
plt.xlabel('Number of observations (beyond initial points)', fontsize=12)
plt.legend(bbox_to_anchor=(0.0, -0.21, 0.95, 0.0),labelspacing=0.1, ncol=2)
plt.ylim([0, 10.2])
plt.xlim([0, iterations])
plt.ylabel('Maximum Fitness', fontsize=12)
plt.savefig('results/GB1_2016/GB1_2016_opt_curves.png')
plt.show()

#Count avg hit numbers
hit_counts = list(np.ones(len(method_name)))
num_Random_Y, num_BO_GP_bacth1_Y, num_TuRBO_GP_bacth1_Y, num_ODBO_GP_bacth1_Y, num_ODTuRBO_GP_bacth1_Y, \
     num_ODBO_RobustGP_bacth1_Y, num_ODTuRBO_RobustGP_bacth1_Y = [],[],[],[],[],[],[]
fitness_labels = ['Top 1%', 'Top 2%', 'Top 5%']

for i in range(len(method_name)):
    hit_counts[i] = []
    for j in [0.01, 0.02, 0.05]:
        fitness_top = Y_test[np.argsort(Y_test)[-int(j*len(Y_test))]]
        hit_counts[i].append(len(np.where(data[i][-iterations:, :].ravel()>=fitness_top)[0])/10)
fig, ax = plt.subplots()
fig.patch.set_visible(False)
ax.axis('off')
ax.axis('tight')
cell_text = np.divide(hit_counts, 0.01*iterations)
ax.table(cellText=cell_text,
         rowLabels=methods,
         colLabels=fitness_labels,
         cellLoc='center',
         colLoc='center',
         loc = 'center',
         fontsize =14)
plt.title('Average percentage hit ratio of top 1%, 2% and 5% measurements using different methods', fontsize=14, loc = 'center')
plt.show()



## GB1_2016-Comparison between different batch sizes in ODBO

In [None]:
methods = ['ODBO, TuRBO + GP, Batch size=1', 'ODBO, TuRBO + GP, Batch size=5', 'ODBO, TuRBO + GP, Batch size=10','ODBO, TuRBO + RobustGP, Batch size=1','ODBO, TuRBO + RobustGP, Batch size=5','ODBO, TuRBO + RobustGP, Batch size=10']
method_name = ['ODBO_TuRBO_GP_batch1', 'ODBO_TuRBO_GP_batch5', 'ODBO_TuRBO_GP_batch10', 'ODBO_TuRBO_RobustGP_batch1', 'ODBO_TuRBO_RobustGP_batch5', 'ODBO_TuRBO_RobustGP_batch10']
data_test = pd.read_csv('../datasets/GB1_2016_149361.csv', sep=',')
name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['Fitness'])
data = {}
iterations = 50
fig = plt.figure(figsize=(6, 4), dpi=100)
color = ['C4', 'C7', 'C8', 'C6', 'C9', 'C10']
for i in range(len(methods)):
    data[i] = []
    for j in range(10):
        data[i].append(np.load('results/GB1_2016/GB1_2016_{}_{}.npy'.format(method_name[i], j)))
    data[i] = np.hstack(data[i])
    odbo.plot.plot_bo(iters=iterations, BO_result=data[i], method=methods[i], color = color[i])
plt.plot([0,iterations], [max(Y_test), max(Y_test)], label='True maximum fitness', color = 'k')
plt.xlabel('Number of observations (beyond initial points)', fontsize=12)
plt.legend(bbox_to_anchor=(0.0, -0.21, 0.95, 0.0),labelspacing=0.1, ncol=2)
plt.ylim([0, 10.2])
plt.xlim([0, iterations])
plt.ylabel('Maximum Fitness', fontsize=12)
plt.savefig('results/GB1_2016/GB1_2016_opt_curves_batch_size.png')
plt.show()


## GB1_2016-Comparison between different acqusition functions in ODBO

In [None]:
methods = ['ODBO, TuRBO + GP, EI', 'ODBO, TuRBO + GP, UCB', 'ODBO, TuRBO + GP, PI', 'ODBO, TuRBO + GP, TS','ODBO, TuRBO + RobustGP, EI','ODBO, TuRBO + RobustGP, UCB','ODBO, TuRBO + RobustGP, PI', 'ODBO, TuRBO + RobustGP, TS']
method_name = ['ODBO_TuRBO_GP_batch1', 'ODBO_TuRBO_GP_batch1_ucb', 'ODBO_TuRBO_GP_batch1_pi', 'ODBO_TuRBO_GP_batch1_ts', 'ODBO_TuRBO_RobustGP_batch1', 'ODBO_TuRBO_RobustGP_batch1_ucb', 'ODBO_TuRBO_RobustGP_batch1_pi', 'ODBO_TuRBO_RobustGP_batch1_ts']
data_test = pd.read_csv('../datasets/GB1_2016_149361.csv', sep=',')
name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['Fitness'])
data = {}
iterations = 50
fig = plt.figure(figsize=(6, 4), dpi=100)
color = ['C4', 'C11', 'C12', 'C13', 'C6', 'C15', 'C17', 'C18']
for i in range(len(methods)):
    data[i] = []
    for j in range(10):
        data[i].append(np.load('results/GB1_2016/GB1_2016_{}_{}.npy'.format(method_name[i], j)))
    data[i] = np.hstack(data[i])
    odbo.plot.plot_bo(iters=iterations, BO_result=data[i], method=methods[i], color = color[i])
plt.plot([0,iterations], [max(Y_test), max(Y_test)], label='True maximum fitness', color = 'k')
plt.xlabel('Number of observations (beyond initial points)', fontsize=12)
plt.legend(bbox_to_anchor=(0.0, -0.21, 0.95, 0.0),labelspacing=0.1, ncol=2)
plt.ylim([0, 10.2])
plt.xlim([0, iterations])
plt.ylabel('Maximum Fitness', fontsize=12)
plt.savefig('results/GB1_2016/GB1_2016_opt_curves_acfn.png')
plt.show()

## GB1_2016-Comparison between update XGBOD model with BO iterations

In [None]:
methods = ['ODBO, TuRBO + GP', 'ODBO, TuRBO + GP, update XGBOD', 'ODBO, TuRBO + RobustGP', 'ODBO, TuRBO + RobustGP, update XGBOD']
method_name = ['ODBO_TuRBO_GP_batch1', 'ODBO_TuRBO_update_XGBOD_GP_batch1', 
               'ODBO_TuRBO_RobustGP_batch1', 'ODBO_TuRBO_update_XGBOD_RobustGP_batch1']
color = ['C4','C6','C19','C20']
data_test = pd.read_csv('../datasets/GB1_2016_149361.csv', sep=',')
name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['Fitness'])
data = {}
iterations = 50
fig = plt.figure(figsize=(6, 4), dpi=100)
for i in range(len(methods)):
    data[i] = []
    for j in range(10):
        data[i].append(np.load('results/GB1_2016/GB1_2016_{}_{}.npy'.format(method_name[i], j)))
    data[i] = np.hstack(data[i])
    odbo.plot.plot_bo(iters=iterations, BO_result=data[i], method=methods[i], color = color[i])
plt.plot([0,iterations], [max(Y_test), max(Y_test)], label='True maximum fitness', color = 'k')
plt.xlabel('Number of observations (beyond initial points)', fontsize=12)
plt.legend(bbox_to_anchor=(0.0, -0.21, 1.20, 0.0),labelspacing=0.1, ncol=2)
plt.ylim([0, 10.2])
plt.xlim([0, iterations])
plt.ylabel('Maximum Fitness', fontsize=12)
plt.savefig('results/GB1_2016/GB1_2016_opt_curves_update_comparison.png')
plt.show()


## Other datasets: Comparison between different ODBO methods

In [None]:
dataset = 'GB1_2014'

In [None]:
methods = ['Random', 'BO + GP', 'TuRBO + GP', 'ODBO, BO + GP', 'ODBO, TuRBO + GP', 'ODBO, BO + RobustGP', 'ODBO, TuRBO + RobustGP']
method_name = ['Random', 'BO_GP_batch1', 'TuRBO_GP_batch1', 'ODBO_BO_GP_batch1', 'ODBO_TuRBO_GP_batch1', 
               'ODBO_BO_RobustGP_batch1', 'ODBO_TuRBO_RobustGP_batch1']
color = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6']
if dataset == 'Ube4b_2013':
    data_test = pd.read_csv('../datasets/Ube4b_2013_98299.csv', sep=',')
    name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['Log2Eratio'])
    iterations = 50
elif dataset == 'avGFP_2016':
    data_test = pd.read_csv('../datasets/avGFP_2016_54025.csv', sep=',')
    name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['medianBrightness'])
    iterations = 50
elif dataset == 'GB1_2014':
    data_test = pd.read_csv('../datasets/GB1_2014_536085.csv', sep=',')
    name_pre, Y_test = np.array(data_test['AACombo']), np.array(data_test['score'])
    iterations = 100

data ={}
fig = plt.figure(figsize=(6, 4), dpi=100)
for i in range(len(methods)):
    data[i] = []
    for j in range(10):
        data[i].append(np.load('results/{}/{}_{}_{}.npy'.format(dataset, dataset, method_name[i], j)))
    if i == 0:
        data[i] = np.vstack(data[i]).T
    else:
        data[i] = np.hstack(data[i])
    odbo.plot.plot_bo(iters=iterations, BO_result=data[i], method=method_name[i], color = color[i])
plt.plot([0,iterations], [max(Y_test), max(Y_test)], label='True maximum fitness', color = 'k')
plt.xlabel('Number of observations (beyond initial points)', fontsize=12)
plt.legend(bbox_to_anchor=(0.0, -0.21, 0.95, 0.0),labelspacing=0.1, ncol=2)
plt.xlim([0, iterations])

if dataset == 'Ube4b_2013':
    plt.ylim([4, 10.2])
    plt.ylabel('Maximum Log2(E3 score)', fontsize=12)
    plt.savefig('results/Ube4b_2013/Ube4b_2013_opt_curves.png')
elif dataset == 'avGFP_2016':
    plt.ylim([3.7, 4.2])
    plt.ylabel('Maximum Median Brightness', fontsize=12)
    plt.savefig('results/avGFP_2016/avGFP_2016_opt_curves.png')
elif dataset == 'GB1_2014':
    plt.ylim([1.6, 2.6])
    plt.ylabel('Maximum Enrichment Score', fontsize=12)
    plt.savefig('results/GB1_2014/GB1_2014_opt_curves.png')
plt.show()

#Count avg hit numbers
hit_counts = list(np.ones(len(method_name)))
fitness_labels = ['Top 1%', 'Top 2%', 'Top 5%']

for i in range(len(method_name)):
    hit_counts[i] = []
    for j in [0.01, 0.02, 0.05]:
        fitness_top = Y_test[np.argsort(Y_test)[-int(j*len(Y_test))]]
        hit_counts[i].append(len(np.where(data[i][-iterations:, :].ravel()>=fitness_top)[0])/10)
fig, ax = plt.subplots()
fig.patch.set_visible(False)
ax.axis('off')
ax.axis('tight')
cell_text = np.divide(hit_counts, 0.01*iterations)
ax.table(cellText=cell_text,
         rowLabels=methods,
         colLabels=fitness_labels,
         cellLoc='center',
         colLoc='center',
         loc = 'center',
         fontsize =14)
plt.title('Average percentage hit ratio of top 1%, 2% and 5% measurements using different methods', fontsize=14, loc = 'center')
plt.show()

