In [1]:
import os
import re
import json
import tempfile
import argparse

In [None]:
# --------- change these path variables as required
reinvent_dir = "{path to location}"
reinvent_env = "{path to environment file}"
output_dir = "{output location path}"

prior = "{path to prior model}"
first_agent = "{path to first agent model}"
second_agent = "{path to second agent model}"

# scoring function parameters (change this accordingly)
low = 0.5
high = 0.9 

In [None]:
# if required, generate a folder to store the results
try:
    os.mkdir(output_dir)
except FileExistsError:
    pass

In [None]:
# initialize the dictionary
configuration = {
    "version": 2,                          # we are going to use REINVENT's newest release
    "run_type": "reinforcement_learning"   # other run types: "sampling", "validation",
                                           #                  "transfer_learning",
                                           #                  "scoring" and "create_model"
}

In [None]:
configuration["logging"] = {
    "recipient": "local",                  # either to local logging or use a remote REST-interface
    "logging_frequency": 50,               # log every x-th steps
    "logging_path": os.path.join(output_dir, "progress.log"), # load this folder in tensorboard
    "resultdir": output_dir,         # will hold the compounds (SMILES) and summaries
    "job_name": "{job_name}",  
    "job_id": 1,             # set an arbitrary job name for identification
    "sender": "",
}

In [None]:
# add the "parameters" block
configuration["parameters"] = {}


# set all "reinforcement learning"-specific run parameters
configuration["parameters"]["reinforcement_learning"] = {
    "prior": prior, # path to the pre-trained model
    "agent": first_agent, # path to the pre-trained model
    "second_agent": second_agent,
    "n_steps": 1000,                       # the number of epochs (steps) to be performed; often 1000
    "sigma": 128,                          # used to calculate the "augmented likelihood", see publication
    "learning_rate": 0.0001,               # sets how strongly the agent is influenced by each epoch
    "batch_size": 128,                     # specifies how many molecules are generated per epoch
    "reset": 0,                            # if not '0', the reset the agent if threshold reached to get
                                           # more diverse solutions
    "reset_score_cutoff": 0.5,             # if resetting is enabled, this is the threshold
    "margin_threshold": 50                 # specify the (positive) margin between agent and prior
}

In [None]:
# prepare the scoring function (in this example tanimoto similarity) definition and add at the end
scoring_function = {
            "name": "custom_sum",              # this is our default one (alternative: "custom_sum")
            "parallel": False,                     # sets whether components are to be executed
                                                # in parallel; note, that python uses "False" / "True"
                                                # but the JSON "false" / "true"

            # the "parameters" list holds the individual components
            "parameters": [
            # add component: QED
            {
            "component_type": "tanimoto_similarity",
            "name": "EXP 4 - Tanimoto",
            "weight": 1,
            "model_path": None,
            "specific_parameters": {
                "transformation": True,
                "transformation_type": "step",
                "high":high,
                "low": low,            
                    },  
                }
            ]
        }

configuration["parameters"]["scoring_function"] = scoring_function

In [None]:
# prepare the inception (we do not use it in this example, so "smiles" is an empty list)
configuration["parameters"]["inception"] = {
    "smiles": [],                          # fill in a list of SMILES here that can be used (or leave empty)
    "memory_size": 100,                    # sets how many molecules are to be remembered
    "sample_size": 10                      # how many are to be sampled each epoch from the memory
}

In [None]:
# add a "diversity_filter"
configuration["parameters"]["diversity_filter"] =  {
    "name": "NoFilter",                    # other options are: "IdenticalTopologicalScaffold", 
                                           # "IdenticalMurckoScaffold" and "ScaffoldSimilarity"
                                           # -> use "NoFilter" to disable this feature
    "nbmax": 25,                           # the bin size; penalization will start once this is exceeded
    "minscore": 0.4,                       # the minimum total score to be considered for binning
    "minsimilarity": 0.4                   # the minimum similarity to be placed into the same bin
}

In [None]:
# write the configuration file to the disc
configuration_JSON_path = os.path.join(output_dir, f"RL_config_.json")
with open(configuration_JSON_path, 'w') as f:
    json.dump(configuration, f, indent=4, sort_keys=True)