Notebook for setting up and runnning a SpaceGA. The scoring function selected below is LogPSearch (trying to find moleceules with a high logP), but it can be switched for LogPSearch or DockSearch if desired. Just remember to switch the scoring inputs as well. To run SpaceGA, you need to have [SpaceLight](https://www.biosolveit.de/spacelight-a-spotlight-on-the-analog-hunter-for-chemical-spaces/) installed with a license. Further, to use [Lilly Medchem Rules](https://github.com/IanAWatson/Lilly-Medchem-Rules) as an additional filter, you need to have that installed as well. 

In [None]:
from dataclasses import dataclass, field
from typing import Dict
import numpy as np

from spacega import SpaceGA

In [None]:
@dataclass
class datasettings():
    # Required inputs
    O: bool = True # Overwrite
    o: str = '/path/to/output' # Directory to store output files
    i: str = "/path/to/input.smi"  # .smi file with molecules for start population
    space: str = "data/desired_space.space" # Path to BiosolveIT space
    spacelight: str = "/path/to/spacelight" # Path to spacelight executable

    # Optional inputs
    p_size: int = 100 # Population size
    children: int = 100 # Number of children per generation / Population size
    crossover_rate: float = 0.1 # Crossover rate
    iterations: int = 10 # Number of iterations
    sim_cutoff: float = 1.00 # Similarity cutoff applied after each iteration (1.00: no filtering)
    cpu: int = 64 # Number of CPUs to use
    sl_cpu: int = 64 # Number of CPUs to use when running SpaceLight
    f_comp: int = 100 # Find top f_comp*children most similar molecules to compensate for filterng
    fp_type: str = "ECFP4" # Fingerprint type
    # Optional inputs for al
    al: bool = True
    model_name: str = "NN1"
    patience: int = 5
    # Optional inputs for scoring
    scoring_inputs: Dict[str, str] = field(default_factory=lambda: {}
                                           ) # Inputs required for the scoring function
    # Optional inputs for filtering
    filtering_inputs: Dict[str, float] = field(default_factory=lambda: {"minlogP": -4,
                                                                        "maxlogP": 100,
                                                                        "minMw": 0,
                                                                        "maxMw": 460,
                                                                        "minHBA": 0,
                                                                        "maxHBA": 9,
                                                                        "minHBD": 0,
                                                                        "maxHBD": 5,
                                                                        "minRings": 0,
                                                                        "maxRings": 4,
                                                                        "minRotB": 0,
                                                                        "maxRotB": 10,
                                                                        "BRENK": True,
                                                                        #"Lilly": "/path/to/Lilly_Medchem_Rules.rb",
                                                                        "PAINS": True,
                                                                        "Substructure": "CCCO",
                                                                        }) 

In [None]:
class MyScoringTool:
    def __init__(self, arguments):
        pass

    def score(self, smi_lst, name_lst):
        scores = np.random.random(len(smi_lst))
        return scores

In [None]:
# Run a single round of SpaceGA
generation = 0 # Don't restart (to restart, set generation to the generation number of the last completed run)
settings = datasettings()
spacega = SpaceGA(generation=generation, **settings.__dict__)
scoringtool = MyScoringTool(spacega.__dict__)
spacega.gen += 1
offspring = spacega.reproduce()
offspring["scores"] = scoringtool.score(offspring.smi.to_list(), offspring.name.to_list())
spacega.population = spacega.update_pop(offspring)