# Saturn Run Configuration Generator

This `Jupyter` notebook introduces each component of the run configuration for `Saturn`. Running the notebook itself will output the `JSON` which can then be run directly via:

**python saturn.py `JSON`**

**Note**: The syntax of the configuration `JSON` is similar to `REINVENT 3.2`: https://github.com/MolecularAI/Reinvent


In [1]:
# Imports
import os
import json

In [3]:
# Paths denoted "FILL" should be modified accordingly

config = {
  "logging": {
    "logging_path": "FILL",  # Denotes where to save the output log file
    "model_checkpoints_dir": "FILL"  # Denotes the directory to save the model checkpoints
  },
  "oracle": {
    "budget": 1000,  # Denotes the number of *unique* oracle calls permitted
    "allow_oracle_repeats": False,  # Denotes whether to allow oracle repeats - if this is True, using Mamba will result in lots of repeated oracle calls
    "aggregator": "product",  # Denotes the reward aggregator - relevant with > 1 oracle components. See oracles/reward_aggregator/reward_aggregator.py for more details

    # Each one of the blocks below denotes an oracle component to optimize for - there is no limit to the number of simultaneous objectives but 
    # too many will make optimization difficult due to reward sparsity. In a situation like this, curriculum learning could be applied: https://www.nature.com/articles/s42256-022-00494-4
    # For a list of supported oracle components, see oracles/utils.py
    # Below are details on the key parameters:
    # "weight":weight of the oracle component - higher makes the reward contribution from the component more important
    # "preliminary_check": whether to run this specific oracle component first and if the reward does not pass a threshold, discard the SMILES. 
    #                       This is useful for components that are computationally inexpensive as a way to "pre-screen" the batch and not waste oracle calls 
    # "reward_shaping_function_parameters": reward shaping function to apply to the component. The syntax is exactly the same as REINVENT 3.2. 
    #                                       See the following notebook for function visualizations:nhttps://github.com/MolecularAI/ReinventCommunity/blob/master/notebooks/Score_Transformations.ipynb
    "components": [
      {
        "name": "tpsa",
        "weight": 1,
        "preliminary_check": False,
        "specific_parameters": {},
        "reward_shaping_function_parameters": {
          "transformation_function": "sigmoid",
          "parameters": {
            "low": 75,
            "high": 350,
            "k": 0.15
          }
        }
      },
      {
        "name": "mw",
        "weight": 1,
        "preliminary_check": False,
        "specific_parameters": {},
        "reward_shaping_function_parameters": {
          "transformation_function": "double_sigmoid",
          "parameters": {
            "low": 0,
            "high": 350,
            "coef_div": 500,
            "coef_si": 250,
            "coef_se": 250
          }
        }
      },
      {
        "name": "num_rings",
        "weight": 1,
        "preliminary_check": False,
        "specific_parameters": {},
        "reward_shaping_function_parameters": {
          "transformation_function": "step",
          "parameters": {
            "low": 2,
            "high": 5
          }
        }
      }
    ]
  },
  "goal_directed_generation": {
    "reinforcement_learning": {
      "prior": "FILL",  # Path to pre-trained model
      "agent": "FILL",  # Usually same as prior
      # The parameters below are the optimal found in the paper, but can be adjusted.
      "batch_size": 16,
      "learning_rate": 0.0001,
      "sigma": 128.0,
      "augmented_memory": True,
      "augmentation_rounds": 10,
      "selective_memory_purge": True  # *Highly* recommended that this is kept True - otherwise detrimental mode collapse rapidly occurs, especially with augmentation rounds > 2
    },
    "experience_replay": {
      "memory_size": 100,  # Maximum size of Replay Buffer
      "sample_size": 10,  # Number of SMILES to sample for experience replay - this is independent of Augmented Memory which will always sample the full memory
      "smiles": []  # Optionally seed the Replay Buffer with known positive examples - this is known as "Inception" in REINVENT
    },
    # Based on REINVENT
    "diversity_filter": {
      "name": "IdenticalMurckoScaffold",  # This is not used at the moment - by default, scaffolds are Bemis-Murcko (considers heavy atoms)
      "bucket_size": 10  # Maximum number of times a Bemis-Murcko scaffold can be sampled before reward is truncated to 0
    },
    # This block controls whether to active the genetic algorithm on the Replay Buffer
    "hallucinated_memory": {
      "execute_hallucinated_memory": False,  # True will execute the genetic algorithm
      "hallucination_method": "ga",  # Technically, "sequence" and "ga" are supported, but "sequence" is unused at the moment
      "num_hallucinations": 100,  # Number of SMILES to hallucinate
      "num_selected": 5,  # Number of SMILES to select from the total hallucinations
      "selection_criterion": "random"  # How to select the hallucinations - "random" selects randomly while "tanimoto_distance" selects based on max *dissimilarity* to the Replay Buffer
    },
    # This block controls whether to run Beam Enumeration
    # NOTE: This is currently only implemented for the "rnn" model architecture
    # See https://arxiv.org/abs/2309.13957 for details on the parameters
    "beam_enumeration": {
      "execute_beam_enumeration": False,  # True will execute Beam Enumeration
      "beam_k": 2,  
      "beam_steps": 18,
      "substructure_type": "structure",
      "structure_min_size": 15,
      "pool_size": 4,
      "pool_saving_frequency": 1000,
      "patience": 5,
      "token_sampling_method": "topk",
      "filter_patience_limit": 100000
    }
  },
  "distribution_learning": {
    "parameters": {
      "agent": "FILL",  # Only used if transfer_learning is True - use case is transfer learning on an already trained model
      "training_steps": 20,
      "batch_size": 512,
      "learning_rate": 0.0001,
      "training_dataset_path": "FILL",  # Denotes the path to the training dataset of SMILES
      "train_with_randomization": True,  # If True, the training batch is randomized at every training step
      "transfer_learning": False
    }
  },
  "running_mode": "goal_directed_generation",  # Choose between "distribution_learning" or "goal_directed_generation"
  "model_architecture": "mamba",  # Choose between "rnn", "decoder", "mamba"
  "device": "cuda",  # All models can be run on CPU, in which case, change to "cpu". NOTE: Mamba runs will be notably slower on CPU
  "seed": 0
}

# Save the configuration file
with open("config.json", "w") as f:
    json.dump(config, f, indent=2)