In [14]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import olympus
from olympus.datasets import Dataset
from olympus.evaluators import Evaluator
from olympus.emulators import Emulator
from olympus.campaigns import Campaign
from olympus.planners import Planner
from olympus.scalarizers import Scalarizer


## CASE STUDY 1

In [16]:
cs1_datasets = ['redoxmers']#'dye_lasers']
cs1_planners = [
    #'RandomSearch', 
    #'Genetic',
    #'Hyperopt', 
    #'Gpyopt', 
    #'Gryffin', 
    #'Dragonfly', 
    'Botorch',
    #'Smac',
    #'Hebo',
]  

In [17]:
for dataset_name in cs1_datasets:
    for planner_name in cs1_planners:
        
        print(f'\nTESTING {planner_name} ON {dataset_name} ...\n')
            
        if dataset_name == 'dye_lasers':
            # fully categorical, lookup table
            dataset = Dataset(kind=dataset_name)

            planner = Planner(kind=planner_name)
            planner.set_param_space(dataset.param_space)

            campaign = Campaign()
            campaign.set_param_space(dataset.param_space)
            campaign.set_value_space(dataset.value_space)
            
            scalarizer = Scalarizer(
                kind='Chimera', 
                value_space=dataset.value_space,
                goals=['max', 'min', 'max'],
                tolerances=[0.5, 0.5, 0.5],
                absolutes=[False, False, False]
            )

            evaluator = Evaluator(
                planner=planner, 
                emulator=dataset,
                campaign=campaign,
                scalarizer=scalarizer,
            )
        
        elif dataset_name == 'redoxmers':
            # fully categorical, lookup table
            dataset = Dataset(kind=dataset_name)

            if planner_name == 'Botorch':
                from olympus.planners.planner_botorch import Botorch
                planner = Botorch(goal='minimize', use_descriptors=False)
            else:
                planner = Planner(kind=planner_name)
        
            planner.set_param_space(dataset.param_space)

            campaign = Campaign()
            campaign.set_param_space(dataset.param_space)
            campaign.set_value_space(dataset.value_space)
            
            scalarizer = Scalarizer(
                kind='Chimera', 
                value_space=dataset.value_space,
                goals=['min', 'min', 'min'],
                tolerances=[0.5, 0.5, 0.5],
                absolutes=[False, False, False]
            )

            evaluator = Evaluator(
                planner=planner, 
                emulator=dataset,
                campaign=campaign,
                scalarizer=scalarizer,
            )
            
        evaluator.optimize(num_iter=15)
        
        print('Done!')


TESTING Botorch ON redoxmers ...



torch.linalg.solve_triangular has its arguments reversed and does not return a copy of one of the inputs.
X = torch.triangular_solve(B, A).solution
should be replaced with
X = torch.linalg.solve_triangular(A, B). (Triggered internally at  /Users/distiller/project/pytorch/aten/src/ATen/native/BatchLinearAlgebra.cpp:1672.)
  Linv = torch.triangular_solve(Eye, L, upper=False).solution


False
tensor([[1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 1., 0.,  ..., 1., 0., 0.],
        [0., 1., 0.,  ..., 0., 1., 0.],
        [0., 1., 0.,  ..., 0., 0., 1.]], dtype=torch.float64)
False
tensor([[1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 1., 0.,  ..., 1., 0., 0.],
        [0., 1., 0.,  ..., 0., 1., 0.],
        [0., 1., 0.,  ..., 0., 0., 1.]], dtype=torch.float64)
False
tensor([[1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 1., 0.,  ..., 1., 0., 0.],
        [0., 1., 0.,  ..., 0., 1., 0.],
        [0., 1., 0.,  ..., 0., 0., 1.]], dtype=torch.float64)
False
tensor([[1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        [1., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 1., 0

In [None]:
campaign.observations.get_params()
campaign.observations.get_values()

In [None]:
campaign.scalarized_observations.get_values()

## CASE STUDY 2 

In [5]:
#----------------
# SUZUKI MIYAURA
#----------------

#suzuki_datasets = ['suzuki', 'suzuki_i', 'suzuki_ii', 'suzuki_iii', 'suzuki_iv', 'suzuki_edbo']
suzuki_datasets = ['suzuki_i', 'suzuki_ii', 'suzuki_iii', 'suzuki_iv'] #['suzuki_edbo']

suzuki_planners = [
    #'RandomSearch', 
    'Genetic',
    #'Hyperopt', 
    #'Gpyopt', 
    #'Gryffin', 
    #'Dragonfly', 
    #'Botorch',
    #'Smac',
    #'Hebo',
]  
#suzuki_planners = ['RandomSearch']

In [6]:
for dataset_name in suzuki_datasets:
    for planner_name in suzuki_planners:
        
        print(f'\nTESTING {planner_name} ON {dataset_name} ...\n')
        
        if dataset_name == 'suzuki': 
            
            # fully continuous, emulated dataset
            emulator = Emulator(dataset=dataset_name, model='BayesNeuralNet')
            planner = Planner(kind=planner_name)
            planner.set_param_space(emulator.param_space)

            campaign = Campaign()
            campaign.set_param_space(emulator.param_space)
            campaign.set_value_space(emulator.value_space)

            evaluator = Evaluator(
                planner=planner, 
                emulator=emulator,
                campaign=campaign,
            )
            
        elif dataset_name == 'suzuki_edbo':
            
            # fully categorical, lookup table
            dataset = Dataset(kind=dataset_name)

            planner = Planner(kind=planner_name)
            planner.set_param_space(dataset.param_space)

            campaign = Campaign()
            campaign.set_param_space(dataset.param_space)
            campaign.set_value_space(dataset.value_space)

            evaluator = Evaluator(
                planner=planner, 
                emulator=dataset,
                campaign=campaign,
            )
            
        elif dataset_name in ['suzuki_i', 'suzuki_ii', 'suzuki_iii', 'suzuki_iv']:
            
            # mixed parameter, emulator, multi-objective optimization
            emulator = Emulator(dataset=dataset_name, model='BayesNeuralNet')
            planner = Planner(kind=planner_name)
            planner.set_param_space(emulator.param_space)

            campaign = Campaign()
            campaign.set_param_space(emulator.param_space)
            campaign.set_value_space(emulator.value_space)
            
            scalarizer = Scalarizer(
                kind='Chimera', 
                value_space=emulator.value_space,
                goals=['max', 'max'],
                tolerances=[0.9, 0.0],
                absolutes=[False, False]
            )

            evaluator = Evaluator(
                planner=planner, 
                emulator=emulator,
                campaign=campaign,
                scalarizer=scalarizer,
            )
        
        evaluator.optimize(num_iter=10)
        
        print('Done!')


TESTING Genetic ON suzuki_i ...

[0;37m[INFO] Loading emulator using a BayesNeuralNet model for the dataset suzuki_i...
[0m

  trainable=trainable)
  trainable=trainable)


Done!

TESTING Genetic ON suzuki_ii ...

[0;37m[INFO] Loading emulator using a BayesNeuralNet model for the dataset suzuki_ii...
[0mDone!

TESTING Genetic ON suzuki_iii ...

[0;37m[INFO] Loading emulator using a BayesNeuralNet model for the dataset suzuki_iii...
[0mDone!

TESTING Genetic ON suzuki_iv ...

[0;37m[INFO] Loading emulator using a BayesNeuralNet model for the dataset suzuki_iv...
[0mDone!


In [None]:
campaign.observations.get_values()

In [7]:
#------------------
# BUCHWALD-HARTWIG
#------------------

buchwald_datasets = ['buchwald_a','buchwald_b','buchwald_c','buchwald_d','buchwald_e']

buchwald_planners = [
    #'RandomSearch', 
    'Genetic',
    #'Hyperopt', 
    #'Gpyopt', 
    #'Gryffin', 
    #'Dragonfly', 
    #'Botorch',
    #'Smac',
    #'Hebo',
] 


In [8]:
buchwald_campaigns = []

for dataset_name in buchwald_datasets:
    for planner_name in buchwald_planners:
        
        print(f'\nTESTING {planner_name} ON {dataset_name} ...\n')
        
        dataset = Dataset(kind=dataset_name)
        planner = Planner(kind=planner_name)
        planner.set_param_space(dataset.param_space)
        
        campaign = Campaign()
        campaign.set_param_space(dataset.param_space)
        campaign.set_value_space(dataset.value_space)
        
        evaluator = Evaluator(
            planner=planner, 
            emulator=dataset,
            campaign=campaign,
        )
        
        evaluator.optimize(num_iter=15)
        
        print('Done!')
        
        buchwald_campaigns.append(campaign)


TESTING Genetic ON buchwald_a ...



TypeError: 'Individual' object cannot be interpreted as an integer

## CASE STUDY 3

In [40]:
cs3_datasets = ['dye_lasers'] #'redoxmers']
cs3_planners = [
    #'RandomSearch', 
    #'Genetic',
    #'Hyperopt', 
    #'Gpyopt', 
    #'Gryffin', 
    #'Dragonfly', 
    'Botorch',
    #'Smac',
    #'Hebo',
] 
cs3_scalarizers = ['Chimera', 'WeightedSum', 'Parego'] # 'ConstrainedAsf'

In [41]:
for dataset_name in cs3_datasets:
    for planner_name in cs3_planners:
        for scalarizer_name in cs3_scalarizers:
        
            print(f'\nTESTING {planner_name} ON {dataset_name} WITH {scalarizer_name} ...\n')

            if dataset_name == 'dye_lasers':
                # fully categorical, lookup table
                dataset = Dataset(kind=dataset_name)

                planner = Planner(kind=planner_name)
                planner.set_param_space(dataset.param_space)

                campaign = Campaign()
                campaign.set_param_space(dataset.param_space)
                campaign.set_value_space(dataset.value_space)

                if scalarizer_name == 'Chimera':
                    scalarizer = Scalarizer(
                        kind='Chimera', 
                        value_space=dataset.value_space,
                        goals=['max', 'min', 'max'],
                        tolerances=[0.5, 0.5, 0.5],
                        absolutes=[False, False, False]
                    )
                elif scalarizer_name == 'Parego':
                    scalarizer = Scalarizer(
                        kind='Parego', 
                        value_space=dataset.value_space,
                        goals=['max', 'min', 'max'],
                        rho=0.05,
                    )
                
                elif scalarizer_name == 'WeightedSum':
                    scalarizer = Scalarizer(
                        kind='WeightedSum', 
                        value_space=dataset.value_space,
                        goals=['max', 'min', 'max'],
                        weights=[0.33, 0.33, 0.33],
                    )
                
                elif scalarizer_name == 'ConstrainedAsf':
                    pass
                    # TODO: implement this! 
                    

                evaluator = Evaluator(
                    planner=planner, 
                    emulator=dataset,
                    campaign=campaign,
                    scalarizer=scalarizer,
                )

            elif dataset_name == 'redoxmers':
                # fully categorical, lookup table
                dataset = Dataset(kind=dataset_name)

                planner = Planner(kind=planner_name)
                planner.set_param_space(dataset.param_space)

                campaign = Campaign()
                campaign.set_param_space(dataset.param_space)
                campaign.set_value_space(dataset.value_space)

                if scalarizer_name == 'Chimera':
                    scalarizer = Scalarizer(
                        kind='Chimera', 
                        value_space=dataset.value_space,
                        goals=['min', 'min', 'min'],
                        tolerances=[0.5, 0.5, 0.5],
                        absolutes=[False, False, False]
                    )
                elif scalarizer_name == 'Parego':
                    scalarizer = Scalarizer(
                        kind='Parego', 
                        value_space=dataset.value_space,
                        goals=['min', 'min', 'min'],
                        rho=0.05,
                    )
                
                elif scalarizer_name == 'WeightedSum':
                    scalarizer = Scalarizer(
                        kind='WeightedSum', 
                        value_space=dataset.value_space,
                        goals=['min', 'min', 'min'],
                        weights=[0.33, 0.33, 0.33],
                    )
                
                elif scalarizer_name == 'ConstrainedAsf':
                    pass
                    # TODO: implement this! 

                evaluator = Evaluator(
                    planner=planner, 
                    emulator=dataset,
                    campaign=campaign,
                    scalarizer=scalarizer,
                )

            evaluator.optimize(num_iter=15)

            print('Done!')


TESTING Botorch ON dye_lasers WITH Chimera ...

False
tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]], dtype=torch.float64)
False
tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]], dtype=torch.float64)
False
tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]], dtype=torch.float64)
False
tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  

False
tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.]], dtype=torch.float64)
Done!


In [1]:
'CN(C)/C(N(C)C)=N\\C(C)(C)C' == 'CN(C)/C(N(C)C)=N\\C(C)(C)C'

True

In [2]:
'CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C(C)(C)C''CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C(C)(C)C' == 'CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C(C)(C)C'

False

In [3]:
'CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C(C)(C)C)C(C)(C)C)C(OC)=CC=C2OC'=='CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C(C)(C)C)C(C)(C)C)C(OC)=CC=C2OC'

True

In [4]:
'CN(C)/C(N(C)C)=N\\C(C)(C)C'=='CN(C)/C(N(C)C)=N\\C(C)(C)C'

True

In [5]:
'o1ccc(n1)c2ccccc2'=='o1ccc(n1)c2ccccc2'

True

In [6]:
'Ic1cccnc1'=='Ic1cccnc1'

True

In [30]:
dataset = Dataset(kind='buchwald_e')

In [31]:
df = dataset.data
print(df.shape)


df[(df['aryl_halide']=='Ic1cccnc1')&(df['additive']=='o1ccc(n1)c2ccccc2')&(df['base']=='CN(C)/C(N(C)C)=N\\C(C)(C)C')]

(792, 5)


Unnamed: 0,aryl_halide,additive,base,ligand,yield
568,Ic1cccnc1,o1ccc(n1)c2ccccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)...,44.330847
577,Ic1cccnc1,o1ccc(n1)c2ccccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C...,88.16754
594,Ic1cccnc1,o1ccc(n1)c2ccccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C1=C(C2=C(OC)C=CC(OC)=C2P(C34CC5CC(C4)CC(C5...,84.556615
791,Ic1cccnc1,o1ccc(n1)c2ccccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C(C)(C)C)...,69.795902


In [32]:
df[
    (df['aryl_halide']=='Ic1cccnc1')&
    (df['additive']=='o1ccc(n1)c2ccccc2')&
    (df['base']=='CN(C)/C(N(C)C)=N\C(C)(C)C')&
    (df['ligand']=='CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C(C)(C)C)C(C)(C)C)C(OC)=CC=C2OC')
]

Unnamed: 0,aryl_halide,additive,base,ligand,yield
791,Ic1cccnc1,o1ccc(n1)c2ccccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C(C)(C)C)...,69.795902


In [33]:
df[df['base']=='CN(C)/C(N(C)C)=N\\C(C)(C)C']

Unnamed: 0,aryl_halide,additive,base,ligand,yield
1,Clc1cccnc1,o1nccc1c2ccccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)...,4.169293
4,Clc1cccnc1,CCOC(=O)c1onc(C)c1,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)...,2.014672
7,Clc1cccnc1,CCOC(=O)c1cc(C)on1,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C1=CC(C(C)C)=CC(C(C)C)=C1C2=C(P(C3CCCCC3)...,16.173827
10,Clc1cccnc1,o1nccc1c2ccccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C...,15.194521
13,Clc1cccnc1,CCOC(=O)c1onc(C)c1,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C)C(C=C(C(C)C)C=C1C(C)C)=C1C2=CC=CC=C2P(C(C...,3.149733
...,...,...,...,...,...
780,Ic1cccnc1,Fc1cccc(F)c1c2oncc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C1=C(C2=C(OC)C=CC(OC)=C2P(C34CC5CC(C4)CC(C5...,56.573875
783,Ic1cccnc1,C(N(Cc1ccccc1)c2oncc2)c3ccccc3,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C1=C(C2=C(OC)C=CC(OC)=C2P(C34CC5CC(C4)CC(C5...,59.162165
786,Ic1cccnc1,Cc1onc(c1)n2cccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C1=C(C2=C(OC)C=CC(OC)=C2P(C34CC5CC(C4)CC(C5...,62.559565
789,Ic1cccnc1,COC(=O)c1cc(on1)c2sccc2,CN(C)/C(N(C)C)=N\C(C)(C)C,CC(C1=C(C2=C(OC)C=CC(OC)=C2P(C34CC5CC(C4)CC(C5...,55.264663
