In [1]:
import importlib
import pickle
from pathlib import Path

import numpy as np
import pandas as pd
import swifter  # noqa: F401

from stk_search.geom3d import train_models
from stk_search.ObjectiveFunction import LookUpTable
from stk_search.Representation import (
    Representation_from_fragment,
)
from stk_search.Search_algorithm import (
    BayesianOptimisation,
    Ea_surrogate,
    Search_algorithm,
)

Path("search_experiment/search_exp_database").mkdir(
    parents=True, exist_ok=True
)


In [2]:
# load Search space and check conditions

SearchSpace_loc = "SearchSpace/SearchSpace_test.pkl"

with Path(SearchSpace_loc).open("rb") as f:
    sp = pickle.load(f)  # noqa: , S301,
sp.update()
print(sp.syntax)
print(sp.conditions_list)
print(f"size of the search space is : {sp.get_space_size():.2e}")
sp.df_precursors.head()

[0, 1, 2, 3, 4, 5]
[[], [], [], [], [], []]
size of the search space is : 0.00e+00


Unnamed: 0,electron affinity (eV),homo lumo_gap (eV),ionisation potential (eV),total energy (au),HOMO-LUMO GAP (eV),ES1,fosc1,InChIKey,mol_opt,Atom_num
0,3.0685,0.095166,9.114,-56.555859,1.672081,3.787,0.0003,GVYASUPVXDSGHN-UQCOIBPSSA-N,<rdkit.Chem.rdchem.Mol object at 0x799d5d961040>,22
1,2.8466,0.038435,9.3325,-56.53667,2.258122,3.514,0.0016,HNRHIGPDSSXEJA-FARCUNLSSA-N,<rdkit.Chem.rdchem.Mol object at 0x799d5d960f90>,22
2,2.9365,0.072094,9.2252,-56.533763,2.009171,3.414,0.0003,SNMJIXVGAOBXCS-UHFFFAOYSA-N,<rdkit.Chem.rdchem.Mol object at 0x799d5d961090>,22
3,2.5901,0.064465,8.9842,-48.098467,2.153852,3.364,0.0008,LWMBLJLODRWDDF-NYYWCZLTSA-N,<rdkit.Chem.rdchem.Mol object at 0x799d5d9610e0>,22
4,2.5596,0.076792,8.8405,-48.091982,1.967541,3.342,0.0004,BJFIUKSBVAJFOJ-WTKPLQERSA-N,<rdkit.Chem.rdchem.Mol object at 0x799d5d961130>,22


In [3]:
df_total_path = "data_example/Molecule_database/30K_benchmark_150524.csv"
df_total = pd.read_csv(df_total_path)
oligomer_size = 6
target_name = "target"
aim = "maximise"
df_total["target"] = (
    -np.abs(df_total["ES1"] - 3)
    - np.abs(df_total["ionisation potential (eV)"] - 5.5)
    + np.log10(df_total["fosc1"])
)
df_total.to_csv(df_total_path, index=False)
# define the evaluation function
ObjectiveFunction = LookUpTable(
    df_total, oligomer_size, target_name=target_name, aim=aim
)

In [4]:
# test evaluation function
molecule_id = 0
molecule_properties = sp.check_df_for_element_from_sp(df_total).iloc[
    [molecule_id]
]
print(molecule_properties["InChIKey"])
molecule_properties[[f"InChIKey_{x}" for x in range(oligomer_size)]]
ObjectiveFunction.evaluate_element(
    molecule_properties[[f"InChIKey_{x}" for x in range(oligomer_size)]]
)

0    CLUOOVKTWZALSH-UHFFFAOYSA-N
Name: InChIKey, dtype: object


(-2.5070288713583553, 'CLUOOVKTWZALSH-UHFFFAOYSA-N')

In [5]:
# initialise search algorithm

which_acquisition = "EI"
lim_counter = 10
BO_learned = BayesianOptimisation.BayesianOptimisation(
    which_acquisition=which_acquisition, lim_counter=lim_counter
)
EA = Search_algorithm.evolution_algorithm()
SUEA = Ea_surrogate.Ea_surrogate()
BO_Mord = BayesianOptimisation.BayesianOptimisation(
    which_acquisition=which_acquisition, lim_counter=lim_counter
)

In [6]:
# load the Representation and the model

config_dir = "data_example/representation_learning/splitrand-nummol20000/"
config, min_val_loss = train_models.get_best_embedding_model(config_dir)

SUEA = Ea_surrogate.Ea_surrogate()
SUEA.config_dir = config_dir
SUEA.load_representation_model()
BO_learned.config_dir = config_dir
BO_learned.load_representation_model()


Model loaded:  SchNet
Model loaded:  SchNet


(<stk_search.Representation.Representation_poly_3d.RepresentationPoly3d at 0x799d522da520>,
 Pymodel_new(
   (molecule_3D_repr): SchNet(hidden_channels=128, num_filters=128, num_interactions=8, num_gaussians=51, cutoff=6)
   (graph_pred_linear): Linear(in_features=128, out_features=1, bias=True)
   (transform_to_opt): Linear(in_features=128, out_features=128, bias=True)
 ))

In [7]:
# initialise the Representation of the molecules used
# here we use the dataframe of the precursors that has PCA of desrciprtors calculated using mordred

df_representation_path = "data_example/precursor/df_mordred_24072024.pkl"
df_representation = pd.read_pickle(df_representation_path)  # noqa: S301
df_representation = df_representation.dropna(axis=1)
# check if the Representation dataframe contains all the elements in teh search space
print(
    "number of element not in the Representation dataframe ",
    -(
        sp.df_precursors["InChIKey"].isin(df_representation["InChIKey"]).sum()
        - sp.df_precursors.shape[0]
    ),
)
if (
    -(
        sp.df_precursors["InChIKey"].isin(df_representation["InChIKey"]).sum()
        - sp.df_precursors.shape[0]
    )
    > 0
):
    sp.df_precursors = sp.df_precursors[
        sp.df_precursors["InChIKey"].isin(df_representation["InChIKey"])
    ]
    with Path(SearchSpace_loc).open("wb") as f:
        pickle.dump(sp, f)
frag_properties = df_representation.select_dtypes(include=[np.number]).columns
print(frag_properties)
BO_Mord.Representation = (
    Representation_from_fragment.RepresentationFromFragment(
        df_representation, frag_properties
    )
)


number of element not in the Representation dataframe  0
Index(['PCA_0', 'PCA_1', 'PCA_2', 'PCA_3', 'PCA_4', 'PCA_5', 'PCA_6', 'PCA_7',
       'PCA_8', 'PCA_9'],
      dtype='object')


In [8]:
# test representation

molecule_id = 0
molecule_properties = sp.check_df_for_element_from_sp(df_total).iloc[
    [molecule_id]
]
print(molecule_properties["InChIKey"])
X_rpr = SUEA.Representation.generate_repr(
    molecule_properties[[f"InChIKey_{x}" for x in range(oligomer_size)]]
)
print(X_rpr)
X_rpr = BO_learned.Representation.generate_repr(
    molecule_properties[[f"InChIKey_{x}" for x in range(oligomer_size)]]
)
print(X_rpr)

0    CLUOOVKTWZALSH-UHFFFAOYSA-N
Name: InChIKey, dtype: object
tensor([[ 0.1417,  0.3532,  0.6085,  0.0272,  0.1343,  0.3805, -0.0399, -0.3059,
          0.2316,  0.0622, -0.2203, -0.3029, -0.1769,  0.1601, -0.3236,  0.1478,
         -0.2510, -0.0188, -0.4512, -0.1417,  0.0479,  0.2816,  0.3486,  0.1556,
          0.1332,  0.2660, -0.3931, -0.4436,  0.2244, -0.3033,  0.1446, -0.4518,
          0.0987,  0.2211, -0.1881,  0.2777,  0.1778,  0.1911, -0.1662,  0.1010,
          0.3406, -0.0163,  0.4243, -0.0293,  0.5388, -0.2400, -0.0238, -0.1496,
          0.0362,  0.3610, -0.1622, -0.0590, -0.0347, -0.0454, -0.0376, -0.1943,
         -0.3544, -0.2063, -0.4570,  0.5313, -0.3119,  0.4948, -0.3478,  0.0188,
          0.0007,  0.4584, -0.1681,  0.0902, -0.2440,  0.3692,  0.4220, -0.0227,
         -0.0476,  0.2752, -0.2138, -0.2509,  0.0231, -0.1412, -0.0031,  0.1457,
         -0.0611, -0.0174, -0.6084,  0.1283,  0.3202,  0.0045,  0.2109,  0.0135,
          0.2852,  0.2423, -0.0271,  0.2797, -

In [9]:
from stk_search import SearchExp

importlib.reload(SearchExp)


# set experiment conditions
def define_and_run_search(
    search_algorithm=EA,
    num_elem_initialisation=50,
    number_of_iterations=100,
    verbose=True,
    case_name="test",
):
    """Define and run a search experiment.

    Args:
    ----
        search_algorithm (Search_algorithm): The search algorithm to use.
        num_elem_initialisation (int): The number of elements to initialise.
        number_of_iterations (int): The number of iterations to run the search for.
        verbose (bool): Whether to print the search progress.
        case_name (str): The name of the search experiment.

    Returns:
    -------
        int: The maximum id acquired.

    """
    s_exp = SearchExp.SearchExp(
        sp,
        search_algorithm,
        ObjectiveFunction,
        number_of_iterations,
        verbose=verbose,
    )
    benchmark = True
    s_exp.output_folder = (
        f"output/search_experiment/{oligomer_size}_frag/" + case_name
    )
    s_exp.num_elem_initialisation = num_elem_initialisation
    s_exp.benchmark = benchmark
    s_exp.df_total = df_total
    # Save search inputs
    import json
    import subprocess

    def save_run_search_inputs(
        inputs, save_path="run_search_new_inputs.json"
    ) -> int:
        """Save the inputs to a file.

        Args:
        ----
            inputs (dict): The inputs to save.
            save_path (str): The path to save the inputs to.

        """
        # Get the current git version
        git_version = (
            subprocess.check_output(["git", "rev-parse", "HEAD"])
            .strip()
            .decode("utf-8")
        )

        # Add the git version to the inputs
        inputs["git_version"] = git_version

        # Save the inputs to a file
        with Path(save_path).open("w") as f:
            json.dump(inputs, f)

        print("Inputs saved.")
        return 0

    input_json = {}
    input_json["SearchSpace_loc"] = SearchSpace_loc
    input_json["search_algorithm"] = search_algorithm.name
    input_json["ObjectiveFunction"] = ObjectiveFunction.__str__()
    input_json["number_of_iterations"] = number_of_iterations
    input_json["verbose"] = verbose
    input_json["num_elem_initialisation"] = num_elem_initialisation
    input_json["benchmark"] = benchmark
    input_json["df_total"] = df_total_path
    input_json["df_representation"] = df_representation_path
    if "Representation" in search_algorithm.__dict__:
        input_json["representation"] = search_algorithm.Representation.name
    else:
        input_json["representation"] = None
    input_json["frag_properties"] = list(frag_properties)
    input_json["which_acquisition"] = which_acquisition
    input_json["run_search_name"] = s_exp.search_exp_name
    input_json["search_output_folder"] = s_exp.output_folder
    input_json["date"] = s_exp.date
    input_json["oligomer_size"] = oligomer_size
    input_json["config_dir"] = config_dir
    input_json["case_name"] = case_name
    save_path = f"output/search_experiment/search_exp_database/{s_exp.search_exp_name}.json"
    Path("output/search_experiment/search_exp_database").mkdir(
        parents=True, exist_ok=True
    )

    save_run_search_inputs(input_json, save_path)
    s_exp.run_seach()
    return max(s_exp.ids_acquired)

In [10]:
for _ in range(10):
    s_exp = define_and_run_search(search_algorithm=EA, case_name="EA")


Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.


In [11]:
for _ in range(10):
    define_and_run_search(search_algorithm=SUEA, case_name="SUEA")

Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.
Inputs saved.


IndexError: positional indexers are out-of-bounds

In [None]:
for _ in range(5):
    define_and_run_search(search_algorithm=BO_learned, case_name="BO_learned")

Inputs saved.


Pandas Apply:   0%|          | 0/50 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/28 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/69 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/104 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/120 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/5 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/122 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/126 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/132 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/5 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/144 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/12 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/150 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/150 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/156 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/207 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/5 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/51 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/81 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/8 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/133 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/136 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/137 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/144 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/144 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/147 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/147 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/156 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/6 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/165 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/171 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/172 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/177 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/279 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/288 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/292 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/298 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/302 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/303 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/52 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/142 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/13 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/156 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/6 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/183 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/15 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/195 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/7 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/203 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/211 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/211 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/218 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/228 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/241 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/256 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/275 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/9 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/280 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/286 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/293 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/298 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/312 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/10 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/322 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/8 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/329 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/340 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/345 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/53 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/90 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/8 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/123 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/137 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/142 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/160 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/162 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/170 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/180 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/182 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/191 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/194 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/202 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/210 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/54 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/77 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/118 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/13 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/121 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/127 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/134 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/138 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/142 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/158 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/161 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/162 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/188 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/194 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/220 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/8 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/226 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/230 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/235 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/309 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/340 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/346 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/5 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/353 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/355 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/55 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/77 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/86 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/97 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/115 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/141 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/5 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/143 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/148 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/152 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/166 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/179 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/187 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/199 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/223 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/14 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/228 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/241 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/7 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/56 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/74 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/86 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/94 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/5 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/102 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/108 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/114 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/144 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/6 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/147 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/161 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/163 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/310 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/57 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/76 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/9 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/82 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/84 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/152 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/167 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/10 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/199 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/11 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/199 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/203 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/208 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/217 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/224 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/58 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/67 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/137 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/147 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/153 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/177 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/180 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/185 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/190 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/204 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/205 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/221 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/235 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/239 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/261 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/262 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/271 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/273 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/277 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/285 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/289 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/305 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/315 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/318 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/319 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/59 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/77 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/83 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/93 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/133 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/135 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/138 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/151 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/168 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/177 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/7 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/177 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/186 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/2 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/205 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/212 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/219 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/225 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/3 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/228 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/228 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/232 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/1 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/60 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/64 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/74 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/78 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/98 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/109 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/116 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/121 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/121 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/140 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/166 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/175 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/247 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/4 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
for _ in range(5):
    define_and_run_search(search_algorithm=BO_Mord, case_name="BO_Mord")

# load results and plot some metrics

In [None]:
import json
import pickle


def load_search_list(df):
    """Load the search list from the dataframe."""
    search_list = []
    for _index, row in df.iterrows():
        with Path(row["search_exp_file"]).open("rb") as f:
            search_list.append(pickle.load(f))
    return search_list


save_path = "output/search_experiment/search_exp_database"
json_files = list(Path(f"{save_path}/").glob("*.json"))
list_json = []
for json_file in json_files:
    try:
        with Path(json_file).open("r") as f:
            list_json.append(json.load(f))
        f.close()
    except FileExistsError:
        print("error oppening", json_file)
df = pd.DataFrame(list_json)
df["search_exp_file"] = (
    df["search_output_folder"]
    + "/"
    + df["date"]
    + "/results_"
    + df["run_search_name"]
    + ".pkl"
)
print(df.shape)
df_all = df.copy()
df_all.head()


In [None]:
num_iteration = 20
num_elem_initialisation = 50
oligomer_size = 6
# df_filtered = df_all[df_all['num_iteration']==num_iteration]
# df_filtered = df_filtered[df_filtered['num_elem_initialisation']==num_elem_initialisation]
df_filtered = df_all[df_all["oligomer_size"] == oligomer_size]
print(df_filtered.shape)
df_list = df_filtered.groupby(
    ["which_acquisition", "config_dir", "search_algorithm", "case_name"]
)
print(len(df_list))
df_list.describe()

In [None]:
import pandas as pd


def join_name(x):
    return "_".join(x)


def load_search_list(df, min_num_iteration):
    search_list = []
    for index, row in df.iterrows():
        if min_num_iteration <= row["max_num_eval"]:
            search_list.append(pd.read_pickle(row["search_exp_file"]))
    return search_list


def get_results_length(x):
    try:
        return len(pd.read_pickle(x)["fitness_acquired"])
    except Exception:
        # print(e)
        return 0


results_dict = {}
num_searches_with_min, df_plot_dict = {}, {}
min_num_iteration = 70
for name, df in df_list:
    if len(df) != 0:
        df["max_num_eval"] = df["search_exp_file"].apply(
            lambda x: get_results_length(x)
        )
        key = join_name(name)
        df["key"] = key
        if df["max_num_eval"].max() >= min_num_iteration:
            results_dict[key] = load_search_list(df, min_num_iteration)
            num_searches_with_min[key] = (
                len(results_dict[key]),
                df["df_total"].values[0],
            )
df_summary_1 = pd.DataFrame.from_dict(
    num_searches_with_min,
    orient="index",
    columns=["number of explorations with min iterations", "df_path"],
)
# df_summary = df_summary.merge(df_summary_1, left_index=True, right_index=True)
df_summary_1["key"] = df_summary_1.index
df_summary_1["case_name"] = df_summary_1["key"].apply(
    lambda x: x.split("_")[-2] + "_" + x.split("_")[-1]
)
df_summary_1["num_mol_training"] = df_summary_1["key"].apply(
    lambda x: x.split("nummol")[-1]
)
df_summary_1["wide"] = df_summary_1["key"].apply(lambda x: "_wide" in x)
df_summary_1["narrow"] = df_summary_1["key"].apply(lambda x: "_narrow" in x)
df_summary_1["total"] = df_summary_1["key"].apply(lambda x: "_total" in x)
# df_summary_1['key'] = df_summary_1['case']
df_summary_1.reset_index(inplace=True)
df_summary_1

In [None]:
import datetime
import importlib
import pickle

from stk_search.utils import Search_results_plot, plot_results_all

importlib.reload(Search_results_plot)
importlib.reload(plot_results_all)
save_folder = "data/figures/" + datetime.datetime.now().strftime("_%Y_%m_%d")
plot_function_list_single = [
    Search_results_plot.plot_simple_regret,
    Search_results_plot.plot_inst_regret,
    Search_results_plot.plot_cumulative_regret,
    Search_results_plot.plot_number_of_molecule_discovered,
    Search_results_plot.plot_rate_of_discovery,
]
plot_function_list_multi = [
    Search_results_plot.plot_simple_regret_batch,
    Search_results_plot.plot_number_of_molecule_discovered_sum,
    Search_results_plot.plot_total_rate_of_discovery,
]

In [None]:
%matplotlib inline
df_plot = df_summary_1.copy()
df_plot = df_plot.loc[[0, 1, 2, 3]]
fig, axes, metric_dict_res = plot_results_all.plot_metric(
    df_plot,
    plot_function_list_single,
    results_dict,
    df_list,
    nb_iterations=min_num_iteration,
)
fig.tight_layout()
