In [18]:
from summit import *
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA


## Functions

In [19]:
# Transform inputs
def get_ct(continuous_features, ordinal_features):
    transformers = []
    transformers.append(
        (
            "continuous",
            StandardScaler(),
            continuous_features
        )
    )
    if type(ordinal_features) == list:
        transformers.append(
            (
                "ordinal",
                OrdinalEncoder(),
                ordinal_features
            )
        )
    elif type(ordinal_features) == dict:
        categories = [cats for cats in ordinal_features.values()]
        transformers.append(
            (
                "ordinal",
                OrdinalEncoder(categories=categories),
                list(ordinal_features.keys())
            )
        )
    return ColumnTransformer(transformers)

In [14]:
def parity_plot(result: dict, ax: plt.Axes = None):
    # Train resultss
    model = result["model"]
    train_posterior = model.posterior(torch.tensor(result["train_X"]))
    train_y_pred_mean = train_posterior.mean.detach()
    train_y_pred_std = train_posterior.variance.sqrt()
    train_y_pred_mean = result["output_transform"].inverse_transform(train_y_pred_mean)
    train_y = result["output_transform"].inverse_transform(result["train_y"])

    # Plotting
    if ax is None:
        fig, ax = plt.subplots(1)
    ax.plot([0,100], [0,100], "--")
    ax.scatter(train_y, train_y_pred_mean, label="Train")
    ax.set_xlim(0,100)
    ax.set_ylim(0,100)
    
    if result.get("test_X") is not None and result.get("test_y") is not None:
        test_posterior = model.posterior(torch.tensor(result["test_X"]))
        test_y_pred_mean = test_posterior.mean.detach()
        test_y_pred_std = test_posterior.variance.sqrt()
        test_y_pred_mean = result["output_transform"].inverse_transform(test_y_pred_mean)
        test_y = result["output_transform"].inverse_transform(result["test_y"])
        
        ax.scatter(test_y, test_y_pred_mean, label="Test")

NameError: name 'plt' is not defined

## Generate Data

In [2]:
exp_pt = MIT_case1(noise_level=1)
exp_pt.domain

0,1,2,3
Name,Type,Description,Values
conc_cat,"continuous, input",catalyst concentration,"[0.000835,0.004175]"
t,"continuous, input",reaction time,"[60,600]"
cat_index,"categorical, input",Choice of catalyst,8 levels
temperature,"continuous, input",Reactor temperature in degress celsius,"[30,110]"
y,"continuous, maximize objective",yield (%),"[0,100]"


We first generate different amounts of data using latin hypercube sampling.

Now, we can run single-task and multi-task Bayesian optimization.

In [8]:
def generate_mit_case_1_data(n_points):
    exp_pt = MIT_case1(noise_level=1)
    rs = np.random.RandomState(100)
    lhs = LHS(exp_pt.domain, random_state=rs)
    conditions = lhs.suggest_experiments(n_points)
    exp_pt.run_experiments(conditions)
    pt_data = exp_pt.data
    pt_data['task', 'METADATA'] = 0
    return pt_data

n_aux = [5, 10, 50]
aux_datasets = [generate_mit_case_1_data(n) for n in n_aux]

In [11]:
def generate_mit_case_2_data(n_points):
    exp_pt = MIT_case2(noise_level=1)
    rs = np.random.RandomState(100)
    lhs = LHS(exp_pt.domain, random_state=rs)
    conditions = lhs.suggest_experiments(n_points)
    exp_pt.run_experiments(conditions)
    pt_data = exp_pt.data
    pt_data['task', 'METADATA'] = 1
    return pt_data

data = generate_mit_case_2_data(10)

## Train Single Task Model

In [9]:
def train_single_task_model(df, include_continuous=True):
    continuous_features = [
        "conc_cat", "temperature", "t"
    ]
    categorical_features = ["cat_index"]
    features = categorical_features
    if include_continuous:
        features += continuous_features
    features.append("y")
    df = df.copy()[features]

    if include_continuous:
        input_transform = get_ct(
            continuous_features=continuous_features,
            ordinal_features=categorical_features)
    else:   
        input_transform = OrdinalEncoder()
        
    train_X = input_transform.fit_transform(df)
    output_scaler = StandardScaler()
    train_y = output_scaler.fit_transform(df[["y"]])

    model = MixedSingleTaskGP(
        train_X=torch.tensor(train_X),
        train_Y=torch.tensor(train_y),
        cat_dims=
    )
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)
    
    return {
        "input_transform": input_transform,
        "output_transform": output_scaler,
        "train_X": train_X,
        "train_y": train_y,
        "model": model
    }

In [20]:
train_single_task_model(data)

ValueError: Selected columns, ['conc_cat', 'temperature', 't'], are not unique in dataframe

## Compare ICM and LCM models