In [1]:
import os
import torch

import os
import sys

import numpy as np
import pandas as pd

import torch
import torch.nn as nn


sys.path.append('..')
sys.path.append('.')
sys.path.append('../llm_garden')

from peft_modules import peft_utils
import esm_adapter



DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# torch.hub.set_dir("./torch_hub")
torch.hub.set_dir("/home/zengs/zengs_data/torch_hub")


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dtype = torch.double



SMOKE_TEST = os.environ.get("SMOKE_TEST")
N_TRIALS = 3 if not SMOKE_TEST else 2
N_BATCH = 20 if not SMOKE_TEST else 2
MC_SAMPLES = 256 if not SMOKE_TEST else 32

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
INPUT_SEQ_SIZE = 10


# data
data = peft_utils.get_esm_example_data(INPUT_SEQ_SIZE)


# Load ESM-2 model
# esm2_t33_650M_UR50D
# esm2_t6_8M_UR50D
model, alphabet = esm_adapter.pretrained.esm2_t6_8M_UR50D()
batch_converter = alphabet.get_batch_converter()
model.eval()

batch_labels, batch_strs, batch_tokens = batch_converter(data)

batch_tokens, batch_tokens.shape

(tensor([[ 0,  5, 15, 11,  7, 10, 16,  9, 10,  4, 15,  2],
         [ 0, 25,  5,  4, 11,  5, 10, 16, 16,  9,  7,  2],
         [ 0, 13, 15,  9,  7, 18, 13,  4, 12, 10, 13,  2],
         [ 0,  9, 10, 13, 21, 12,  8, 16, 11,  6, 20,  2]]),
 torch.Size([4, 12]))

In [3]:
token_embedding = model.embed_tokens(batch_tokens)


# torch.Size([4, 12, 320])) # (N_SEQ, N_TOKEN, N_FEAT)
token_embedding, token_embedding.shape

(tensor([[[-1.9543e-01, -2.7515e-01, -1.4941e-01,  ...,  1.2744e-01,
           -1.0431e-01, -6.4697e-02],
          [-6.6711e-02, -1.1932e-02,  8.6288e-03,  ...,  1.4633e-02,
           -4.3945e-02, -1.9348e-01],
          [-9.7595e-02,  1.5762e-02,  3.3600e-02,  ...,  3.4027e-03,
            2.2369e-02,  2.8732e-02],
          ...,
          [-8.5693e-02, -2.2476e-02,  4.2603e-02,  ...,  8.2764e-02,
            8.9539e-02,  8.9417e-02],
          [-9.7595e-02,  1.5762e-02,  3.3600e-02,  ...,  3.4027e-03,
            2.2369e-02,  2.8732e-02],
          [ 7.3120e-02, -3.0811e-01, -1.1700e-01,  ..., -2.2058e-01,
           -3.4326e-01, -1.5488e-03]],
 
         [[-1.9543e-01, -2.7515e-01, -1.4941e-01,  ...,  1.2744e-01,
           -1.0431e-01, -6.4697e-02],
          [ 2.0813e-02, -8.6548e-02, -7.0374e-02,  ..., -2.9898e-04,
           -8.3923e-02, -1.3000e-01],
          [-6.6711e-02, -1.1932e-02,  8.6288e-03,  ...,  1.4633e-02,
           -4.3945e-02, -1.9348e-01],
          ...,
    

In [None]:
from botorch.test_functions import Hartmann
from botorch.models import FixedNoiseGP, ModelListGP
from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood


neg_hartmann6 = Hartmann(negate=True)
NOISE_SE = 0.5
train_yvar = torch.tensor(NOISE_SE**2, device=device, dtype=dtype)


def outcome_constraint(X):
    """L1 constraint; feasible if less than or equal to zero."""
    return X.sum(dim=-1) - 3


def weighted_obj(X):
    """Feasibility weighted objective; zero if not feasible."""
    return neg_hartmann6(X) * (outcome_constraint(X) <= 0).type_as(X)


def generate_initial_data(n=10):
    # generate training data
    # train_x = torch.rand(10, 6, device=device, dtype=dtype)
    # exact_obj = neg_hartmann6(train_x).unsqueeze(-1)  # add output dimension
    # exact_con = outcome_constraint(train_x).unsqueeze(-1)  # add output dimension
    # train_obj = exact_obj + NOISE_SE * torch.randn_like(exact_obj)
    # train_con = exact_con + NOISE_SE * torch.randn_like(exact_con)


    train_x = torch.randint(4, 29, size=(10, 6), device=device)
    train_x = train_x.to(device=device, dtype=dtype)

    # TODO: change these to the actual objective and constraint values
    exact_obj = torch.rand(10, 1, device=device, dtype=dtype)
    exact_con = outcome_constraint(train_x).unsqueeze(-1)  # add output dimension
    train_obj = exact_obj
    train_con = exact_con

    train_obj = train_obj.to(device=device, dtype=dtype)
    train_con = train_con.to(device=device, dtype=dtype)

    # best_observed_value = torch.rand(10, 1, device=device, dtype=dtype)
    best_observed_value = weighted_obj(train_x).max().item()
    return train_x, train_obj, train_con, best_observed_value


def initialize_model(train_x, train_obj, train_con, state_dict=None):
    # define models for objective and constraint
    train_x = train_x.to(device=device, dtype=dtype)
    train_obj = train_obj.to(device=device, dtype=dtype)
    train_con = train_con.to(device=device, dtype=dtype)

    # print(train_x)

    model_obj = FixedNoiseGP(train_x, train_obj, train_yvar.expand_as(train_obj)).to(
        train_x
    )
    model_con = FixedNoiseGP(train_x, train_con, train_yvar.expand_as(train_con)).to(
        train_x
    )
    # combine into a multi-output GP model
    model = ModelListGP(model_obj, model_con)
    mll = SumMarginalLogLikelihood(model.likelihood, model)
    # load state dict if it is passed
    if state_dict is not None:
        model.load_state_dict(state_dict)
    return mll, model


In [None]:
from botorch.acquisition.objective import ConstrainedMCObjective


def obj_callable(Z):
    return Z[..., 0]


def constraint_callable(Z):
    return Z[..., 1]


# define a feasibility-weighted objective for optimization
constrained_obj = ConstrainedMCObjective(
    objective=obj_callable,
    constraints=[constraint_callable],
)

In [None]:
from botorch.optim import optimize_acqf


bounds = torch.tensor([[0.0] * 6, [1.0] * 6], device=device, dtype=dtype)

BATCH_SIZE = 3 if not SMOKE_TEST else 2
NUM_RESTARTS = 10 if not SMOKE_TEST else 2
RAW_SAMPLES = 512 if not SMOKE_TEST else 32


def optimize_acqf_and_get_observation(acq_func):
    """Optimizes the acquisition function, and returns a new candidate and a noisy observation."""
    # optimize
    candidates, _ = optimize_acqf(
        acq_function=acq_func,
        bounds=bounds,
        q=BATCH_SIZE,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={"batch_limit": 5, "maxiter": 200},
    )
    # observe new values
    new_x = candidates.detach()
    exact_obj = neg_hartmann6(new_x).unsqueeze(-1)  # add output dimension
    exact_con = outcome_constraint(new_x).unsqueeze(-1)  # add output dimension
    new_obj = exact_obj + NOISE_SE * torch.randn_like(exact_obj)
    new_con = exact_con + NOISE_SE * torch.randn_like(exact_con)
    return new_x, new_obj, new_con


def update_random_observations(best_random):
    """Simulates a random policy by taking a the current list of best values observed randomly,
    drawing a new random point, observing its value, and updating the list.
    """
    rand_x = torch.rand(BATCH_SIZE, 6)
    next_random_best = weighted_obj(rand_x).max().item()
    best_random.append(max(best_random[-1], next_random_best))
    return best_random

In [None]:
from botorch import fit_gpytorch_mll
from botorch.acquisition.monte_carlo import (
    qExpectedImprovement,
    qNoisyExpectedImprovement,
)
from botorch.sampling.normal import SobolQMCNormalSampler
from botorch.exceptions import BadInitialCandidatesWarning

import time
import warnings


warnings.filterwarnings("ignore", category=BadInitialCandidatesWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)


N_TRIALS = 3 if not SMOKE_TEST else 2
N_BATCH = 20 if not SMOKE_TEST else 2
MC_SAMPLES = 256 if not SMOKE_TEST else 32

verbose = False

best_observed_all_ei, best_observed_all_nei, best_random_all = [], [], []


N_TRIALS = 1
N_BATCH = 1
# average over multiple trials
for trial in range(1, N_TRIALS + 1):

    print(f"\nTrial {trial:>2} of {N_TRIALS} ", end="")
    best_observed_ei, best_observed_nei, best_random = [], [], []

    # call helper functions to generate initial training data and initialize model
    (
        train_x_ei,
        train_obj_ei,
        train_con_ei,
        best_observed_value_ei,
    ) = generate_initial_data(n=10)

    train_x_nei, train_obj_nei, train_con_nei = train_x_ei, train_obj_ei, train_con_ei
    mll_nei, model_nei = initialize_model(train_x_nei, train_obj_nei, train_con_nei)

    best_observed_value_nei = best_observed_value_ei
    best_observed_ei.append(best_observed_value_ei)
    best_observed_nei.append(best_observed_value_nei)
    best_random.append(best_observed_value_ei)

    # run N_BATCH rounds of BayesOpt after the initial random batch
    for iteration in range(1, N_BATCH + 1):

        t0 = time.monotonic()

        # fit the models
        fit_gpytorch_mll(mll_nei)
        
        # define the qEI and qNEI acquisition modules using a QMC sampler
        qmc_sampler = SobolQMCNormalSampler(sample_shape=torch.Size([MC_SAMPLES]))
    
        qNEI = qNoisyExpectedImprovement(
            model=model_nei,
            X_baseline=train_x_nei,
            sampler=qmc_sampler,
            objective=constrained_obj,
        )

        new_x_nei, new_obj_nei, new_con_nei = optimize_acqf_and_get_observation(qNEI)
        
        print(train_x_nei)
        train_x_nei = torch.cat([train_x_nei, new_x_nei])
        print(train_x_nei)

In [None]:
new_x_nei
