Notebook for setting up and runnning a SpaceGA. The scoring function selected below is LogPSearch (trying to find moleceules with a high logP), but it can be switched for LogPSearch or DockSearch if desired. Just remember to switch the scoring inputs as well. To run SpaceGA, you need to have [SpaceLight](https://www.biosolveit.de/spacelight-a-spotlight-on-the-analog-hunter-for-chemical-spaces/) installed with a license. Further, to use [Lilly Medchem Rules](https://github.com/IanAWatson/Lilly-Medchem-Rules) as an additional filter, you need to have that installed as well. 

In [1]:
from dataclasses import dataclass, field
from typing import Dict

from setup import save_config
from main import main

In [14]:
@dataclass
class datasettings():
    O: bool = True # Overwrite
    o: str = '/home/laust/scratch/spacega' # Directory to store output files
    i: str = "data/ZINC.smi"  # .smi file with molecules for start population
    config: str = 'json_files/spacega.json' # Name of configuration file that will be written
    p_size: int = 10 # Population size
    children: int = 100 # Number of children per generation / Population size
    crossover_rate: float = 0.1 # Crossover rate
    iterations: int = 10 # Number of iterations
    sim_cutoff: float = 1.00 # Similarity cutoff applied after each iteration (1.00: no filtering)
    cpu: int = 10 # Number of CPUs to use
    gpu: int = 1  # Number of GPUs to use
    space: str = "/home/laust/Downloads/REALSpace_70bn_2024-09.space" # Path to BiosolveIT space
    spacelight: str = "/home/laust/code/biosolveit/spacelight-1.5.0-Linux-x64/spacelight" # Path to spacelight executable
    f_comp: int = 100 # Find top f_comp*children most similar molecules to compensate for filterng
    mode: str = 'LogPSearch'  # FPSearch, LogPSearch, DockSearch
    scoring_inputs: Dict[str, str] = field(default_factory=lambda: {}
                                           ) # Inputs required for the scoring function
    filtering_inputs: Dict[str, float] = field(default_factory=lambda: {"minlogP": -4,
                                                                        "maxlogP": 100,
                                                                        "minMw": 0,
                                                                        "maxMw": 460,
                                                                        "minHBA": 0,
                                                                        "maxHBA": 9,
                                                                        "minHBD": 0,
                                                                        "maxHBD": 5,
                                                                        "minRings": 0,
                                                                        "maxRings": 4,
                                                                        "minRotB": 0,
                                                                        "maxRotB": 10,
                                                                        "BRENK": True,
                                                                        #"Lilly": "/path/to/Lilly_Medchem_Rules.rb",
                                                                        "PAINS": True,
                                                                        #"Substructure": "[NX3][CX3](=[OX1])[#6]"
                                                                        }
                                           ) # Inputs required for molecule filtering

In [15]:
settings = datasettings()
save_config(settings)
out = main("SpaceGA", settings.config)

Overwriting output
00:00:00: Generation 1 - 1000 molecules have been scored (mean: 5.47, best: 6.12)
00:00:43: Generation 2 - 1984 molecules have been scored (mean: 6.97, best: 7.28)
00:01:27: Generation 3 - 2935 molecules have been scored (mean: 7.71, best: 7.99)
00:02:08: Generation 4 - 3873 molecules have been scored (mean: 7.96, best: 8.03)
00:02:49: Generation 5 - 4772 molecules have been scored (mean: 8.19, best: 8.61)
00:03:33: Generation 6 - 5712 molecules have been scored (mean: 8.67, best: 8.97)
00:04:16: Generation 7 - 6702 molecules have been scored (mean: 8.83, best: 8.97)
00:04:59: Generation 8 - 7686 molecules have been scored (mean: 8.98, best: 9.08)
00:05:40: Generation 9 - 8671 molecules have been scored (mean: 9.05, best: 9.20)
00:06:21: Generation 10 - 9643 molecules have been scored (mean: 9.07, best: 9.20)
