In [1]:
import sys
sys.path.append("reinvent/")

In [2]:
#import os
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [3]:
import pandas as pd
import numpy as np 
import scipy.stats as sps
from tqdm.auto import tqdm, trange
from models import dataset
from models.model import Model
from utils.smiles import standardize_smiles



In [4]:
# load dependencies
import os
import re
import json
import tempfile

# --------- change these path variables as required
reinvent_dir = os.path.expanduser("***CHANGE ME PROJECT PATH ROOT FOLDER***/reinvent")
output_dir = os.path.expanduser("***CHANGE ME PROJECT PATH ROOT FOLDER***/output")

# --------- do not change
# get the notebook's root path
try: ipynb_path
except NameError: ipynb_path = os.getcwd()
# if required, generate a folder to store the results
os.makedirs(output_dir, exist_ok=True)


configuration = {
    "version": 2,                          # we are going to use REINVENT's newest release
    "run_type": "transfer_learning",        # other run types: "scoring", "validation",
                                           #                  "transfer_learning",
                                           #                  "reinforcement_learning" and
                                           #                  "create_model"
   "seed": 1234
}
configuration["logging"] = {
    "sender": "http://127.0.0.1",          # only relevant if "recipient" is set to "remote"
    "recipient": "local",                  # either to local logging or use a remote REST-interface
    "logging_path": os.path.join(output_dir, "progress.log"), # where the run's output is stored
    "job_name": "Transfer Learning Promiscuous Compounds", # set an arbitrary job name for identification
    "job_id": "na"                       # only relevant if "recipient" is set to "remote"
}
adaptive_lr_config = {
      "mode": "constant", # other modes: "exponential", "adaptive", "constant"
      "gamma": 0.8,
      "step": 1,
      "start": 1E-4,
      "min": 1E-5,
      "threshold": 1E-4,
      "average_steps": 4,
      "patience": 8,
      "restart_value": 1E-5,
      "sample_size": 1000,
      "restart_times": 0
    }
output_model_path = os.path.join(output_dir, "focused.agent") \
# The final focused agent will be named "focused.agent"
# The intermediate steps will be named "focused.agent.1", "focused.agent.2", "focused.agent.3" and etc.

# add the "parameters" block
configuration["parameters"] = {
    "input_model_path": os.path.join(reinvent_dir,        # path to prior or trained agent
                               "data",
                               "augmented.prior"),
    "output_model_path": output_model_path,                # location to store the focused agent
    "input_smiles_path": os.path.join(ipynb_path,
                               "data",
                               "pubchem_assay_compounds_processed_training.smi"),
    "save_every_n_epochs": 1,      # how often to save the focused Agent. Here its stored after each epoch
    "batch_size": 100,             # batch size the input data
    "num_epochs": 200,              # number of epochs to focus the agent for
    "standardize": True,           # the input may contain SMILES strings that are invalid according to the agent
                                   # this atempts to clean up the input dataset
    "randomize": True,             # this triggers data augmentation which is quite important for small datasets
    "adaptive_lr_config": adaptive_lr_config,        # setting the learning rate behavior
    "collect_stats_frequency": 99999999
}
configuration_JSON_path = os.path.join(output_dir, "transfer_learning_config.json")
with open(configuration_JSON_path, 'w') as f:
    json.dump(configuration, f, indent=4, sort_keys=True)

In [5]:
%%capture captured_err_stream --no-stderr

# execute REINVENT from the command-line
!python {reinvent_dir}/input.py {configuration_JSON_path}

In [6]:
# print the output to a file, just to have it for documentation
with open(os.path.join(output_dir, "run.err"), 'w') as file:
    file.write(captured_err_stream.stdout)