In [None]:
import os
import re
import json
import tempfile
import argparse

In [None]:
# change these paths as required

prior = "{prior location}"
agent = "{agent location}"
output_dir = "{results location}"
low = "{reward min}" # change this to match property and reward threshold. 

reinvent_dir = os.path.expanduser("{reinvent file location}/Reinvent")
reinvent_env = os.path.expanduser("{reinvent file location}/reinvent.v3.0")

# if required, generate a folder to store the results
try:
    os.mkdir(output_dir)
except FileExistsError:
    pass

: 

In [None]:
# initialize the dictionary
configuration = {
    "version": 3,                          # we are going to use REINVENT's newest release
    "run_type": "reinforcement_learning"   # other run types: "sampling", "validation",
                                           #                  "transfer_learning",
                                           #                  "scoring" and "create_model"
}

In [None]:
configuration["logging"] = {
    "recipient": "local",                  # either to local logging or use a remote REST-interface
    "logging_frequency": 50,               # log every x-th steps
    "logging_path": os.path.join(output_dir, "progress.log"), # load this folder in tensorboard
    "result_folder": output_dir,         # will hold the compounds (SMILES) and summaries
    "job_name": "REI Exp 1 (1).",                # set an arbitrary job name for identification
}

In [None]:
# add the "parameters" block
configuration["parameters"] = {}

In [None]:
# add a "diversity_filter"
configuration["parameters"]["diversity_filter"] =  {
    "name": "NoFilter",                    # other options are: "IdenticalTopologicalScaffold", 
                                           # "IdenticalMurckoScaffold" and "ScaffoldSimilarity"
                                           # -> use "NoFilter" to disable this feature
    "nbmax": 25,                           # the bin size; penalization will start once this is exceeded
    "minscore": 0.4,                       # the minimum total score to be considered for binning
    "minsimilarity": 0.4                   # the minimum similarity to be placed into the same bin
}

In [None]:
# prepare the inception (we do not use it in this example, so "smiles" is an empty list)
configuration["parameters"]["inception"] = {
    "smiles": [],                          # fill in a list of SMILES here that can be used (or leave empty)
    "memory_size": 100,                    # sets how many molecules are to be remembered
    "sample_size": 10                      # how many are to be sampled each epoch from the memory
}


In [None]:
# set all "reinforcement learning"-specific run parameters
configuration["parameters"]["reinforcement_learning"] = {
    "prior": prior, # path to the pre-trained model
    "agent": agent, # path to the pre-trained model
    "n_steps": 1000,                       # the number of epochs (steps) to be performed; often 1000
    "sigma": 128,                          # used to calculate the "augmented likelihood", see publication
    "learning_rate": 0.0001,               # sets how strongly the agent is influenced by each epoch
    "batch_size": 128,                     # specifies how many molecules are generated per epoch
    "reset": 0,                            # if not '0', the reset the agent if threshold reached to get
                                           # more diverse solutions
    "reset_score_cutoff": 0.5,             # if resetting is enabled, this is the threshold
    "margin_threshold": 50                 # specify the (positive) margin between agent and prior
}

In [None]:
# prepare the scoring function definition and add at the end
scoring_function = {
    "name": "custom_sum",              # this is our default one (alternative: "custom_sum")
    "parallel": False,                     # sets whether components are to be executed
                                           # in parallel; note, that python uses "False" / "True"
                                           # but the JSON "false" / "true"

    # the "parameters" list holds the individual components. Change to alter the reward function.
    "parameters": [
    {
    "component_type": "num_hbd_lipinski",
    "name": "HB-donors (Lipinski)",
    "weight": 1,
    "specific_parameters": {
        "transformation": {
            "transformation_type": "step",
            "low": low
            }
        }   
    }
    ]
}
configuration["parameters"]["scoring_function"] = scoring_function

# write the configuration file to the disc
configuration_JSON_path = os.path.join(output_dir, "RL_config.json")
with open(configuration_JSON_path, 'w') as f:
    json.dump(configuration, f, indent=4, sort_keys=True)