## Customize your reaction prediction module

This tutorial provides a step-by-step introduciton about how to customize your own reaction prediction module in Seq-Stack-Reaction. A reaction prediction model takes a list of reactant sets which are represented as SMILES as input and output a list of products which also represented as SMILES. We use a dummy module to demonstrate the usage and implemention. You shall use your own module instead.

### 1. Initialize parameters

In [1]:
import json
import pathlib
from datetime import date
from ssr import SMC
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

with open("./setting.json") as f:
    variables = json.load(f)
variables["model_name"] = "SMC-RECUR_GTM"
main_path = variables["main_path"]

variables_addition = {
    "initial_pool_path": main_path + "data/pool.csv",
    "gtm_path": main_path + "model/GTM/enamine_gtm",
    "reactor_model_path": main_path + "model/molecular_transformer.pt",
    "sample_column": [
        "reactant",
        "product",
        "reactant_index",
        "ll",
        "surrogate_ll",
        "c_label",
        "freq",
        "target",
        "surrogate_target",
        "is_new",
    ],
    "surrogate_forward_model_path": {
        "qed": main_path + "model/QSPR/reactant_qed",
        "logp": main_path + "model/QSPR/reactant_logp",
    },
    "forward_model_path": {
        "qed": main_path + "model/QSPR/product_qed",
        "logp": main_path + "model/QSPR/product_logp",
    },
    "unique_col": "reactant_index",
    "black": "False",
}
variables.update(variables_addition)


variables["experiment_date"] = str(date.today())
variables["n_r"] = variables["n_r"] + 1



for i in range(variables["n_r"]):
    variables["sample_column"].append("r" + str(i + 1))
    variables["sample_column"].append("r" + str(i + 1) + "_id")
    variables["sample_column"].append("r" + str(i + 1) + "_gen")
    if i > 0:
        variables["sample_column"].append("p" + str(i))
for y in variables["y_list"]:
    variables["sample_column"].append(y)
    variables["sample_column"].append("surrogate_" + y)
variables["generation_temperature"] = -1 / variables["generation_threshold"] * 2

if variables["model_name"] == "SMC-RECUR_GTM":
    variables["SMC_reactor"] = True
    variables["surrogate"] = False
    variables["SMC_enrich"] = True
    variables["reactor_enrich"] = False
    variables["optimization"] = True
elif variables["model_name"] == "SMC-RECUR_GTM_SR_PL":
    variables["SMC_reactor"] = False
    variables["surrogate"] = True
    variables["SMC_enrich"] = False
    variables["reactor_enrich"] = True
    variables["optimization"] = True
elif variables["model_name"][:-1] == "SMC_":
    variables["SMC_reactor"] = True
    variables["surrogate"] = False
    variables["SMC_enrich"] = False
    variables["reactor_enrich"] = False
    variables["n_r"] = int(variables["model_name"][4:])
    variables["optimization"] = True
elif variables["model_name"] == "Random":
    variables["SMC_reactor"] = True
    variables["surrogate"] = False
    variables["SMC_enrich"] = False
    variables["reactor_enrich"] = False
    variables["p_exploitation"] = 0
    variables["optimization"] = False
elif variables["model_name"] == "Random_RECUR":
    variables["SMC_reactor"] = True
    variables["surrogate"] = False
    variables["SMC_enrich"] = True
    variables["reactor_enrich"] = False
    variables["p_exploitation"] = 0
    variables["optimization"] = False
else:
    raise Exception("wrong model name: {}".format(variables["model_name"]))

model_result_path = (
    main_path
    + "results/"
    + variables["experiment_date"]
    + "/"
    + variables["model_name"]
)
variables["model_df_path"] = model_result_path + "/dfs/"
if variables["reactor_enrich"]:
    variables["warehouse"] = model_result_path + "/warehouse"
    variables["warehouse_key"] = variables["warehouse"] + "/key"
    pathlib.Path(variables["warehouse"]).mkdir(parents=True, exist_ok=True)
    open(variables["warehouse_key"], "a").close()
    variables["pool_folder"] = model_result_path + "/pool_folder"
    variables["pool_folder_key"] = variables["pool_folder"] + "/key"
    variables["pool_updated_marker"] = variables["pool_folder"] + "/updated_marker"
    variables["pool_updated_path"] = variables["pool_folder"] + "/updated_pool.csv"
    pathlib.Path(variables["pool_folder"]).mkdir(parents=True, exist_ok=True)
    pathlib.Path(variables["pool_folder"] + "/addition").mkdir(
        parents=True, exist_ok=True
    )
    open(variables["pool_folder_key"], "a").close()


### 2. Read reactant pool

In [2]:
initial_reactant_pool = pd.read_csv(variables["initial_pool_path"])
print("SMC: initial pool size: {}".format(initial_reactant_pool.shape[0]))

SMC: initial pool size: 150549


### 3. Define the reaction prediction module

In [4]:
import random
class dummy_MolecularTransformer:
    def react(self, reactant_list):
        product_list = [random.choice(R.split(".")) for R in reactant_list]
        return product_list    

### 4. Plug the reaction prediction module into SMC

In [5]:
reactor = dummy_MolecularTransformer()
SMC_instance = SMC(variables=variables, pool=initial_reactant_pool, reactor=reactor)
SMC_instance()