Notebook for setting up and runnning a modified version of the genetic algorithm developed by Jensen (Jensen, J. H. (2019). A graph-based genetic algorithm and generative model/Monte Carlo tree search for the exploration of chemical space. Chemical science, 10(12), 3567-3572.
). The scoring function selected below is FPSeacrh (trying to find moleceules that are similar to aspirin), but it can be switched for LogPSearch or DockSearch if desired. Just remember to switch the scoring inputs as well.

In [1]:
from dataclasses import dataclass, field
from typing import Dict

from setup import save_config
from main import main

In [2]:
@dataclass
class datasettings():
    O: bool = True # Overwrite
    o: str = '/path/to/output' # Directory to store output files
    i: str = "data/ZINC.smi"  # .smi file with molecules for start population
    config: str = 'json_files/config.json' # Name of configuration file that will be written
    p_size: int = 100 # Population size
    children: int = 100 # Number of children per generation / Population size
    crossover_rate: float = 0.1 # Crossover rate
    mutation_rate: float = 1.0 # Crossover rate
    iterations: int = 20 # Number of iterations
    sim_cutoff: float = 0.35 # Similarity cutoff applied after each iteration (1.00: no filtering)
    cpu: int = 64 # Number of CPUs to use
    gpu: int = 8  # Number of GPUs to use
    mode: str = 'FPSearch'  # FPSearch, LogPSearch, DockSearch
    scoring_inputs: Dict[str, str] = field(default_factory=lambda: {"smiles": "CC(=O)OC1=CC=CC=C1C(=O)O"}
                                           ) # Inputs required for the scoring function
    filtering_inputs: Dict[str, float] = field(default_factory=lambda: {"minlogP": -4,
                                                                        "maxlogP": 4.2,
                                                                        "minMw": 0,
                                                                        "maxMw": 460,
                                                                        "minHBA": 0,
                                                                        "maxHBA": 9,
                                                                        "minHBD": 0,
                                                                        "maxHBD": 5,
                                                                        "minRings": 0,
                                                                        "maxRings": 4,
                                                                        "minRotB": 0,
                                                                        "maxRotB": 10
                                                                        }
                                           ) # Inputs for molecule filtering

In [3]:
settings = datasettings()
save_config(settings)
out = main("GA", settings.config)