Notebook for setting up and runnning active learning (AL). The scoring function selected below is DockSearch (trying to optimize AutoDock-GPU docking scores), but it can be switched for LogPSearch or FPSearch if desired. Just remember to switch the scoring inputs as well. Before runnning, please ensure that fingeprint.npz and SMILES.parquet files are available. The data/convert.py is available to demonstrate the conversion of space separated .smi files. Please also ensure that AutoDock-GPU and the GPU implementation of torch are installed and availabel.

In [None]:
from dataclasses import dataclass, field
from typing import Dict, List

from setup import save_config
from main import main

In [None]:
@dataclass
class datasettings():
    O: bool = True # Overwrite
    o: str = '/path/to/output' # Directory to store output files
    i: str = "data"  # location of the directories 'smis' and 'fps'
    config: str = 'json_files/config.json' # Name of configuration file that will be written
    p_size: int = 10000 # Population size
    iterations: int = 10 # Number of iterations
    model_name: str = "NN1" # Name of model class in ml/models.py
    cpu: int = 64 # Number of CPUs to use
    gpu: int = 8  # Number of GPUs to use
    maxmodels: int = 1 # Max number of models to run on each GPU for prediction
    bsize: int = 512 # Batch size when training the model
    init_split: List[float] = field(default_factory=lambda: [0.8, 0.1, 0.1]) # how to split data into train/val/test set after first iteration
    mode: str = 'DockSearch'  # FPSearch, LogPSearch, DockSearch
    scoring_inputs: Dict[str, str] = field(default_factory=lambda: {"fld_file": "/path/to/fld_file.fld", 
                                                                    "autodock": "/path/to/autodock_gpu", # Path to AutoDock-GPU executable
                                                                    "workdir": "/path/to/output", # Can be any scratch location
                                                                    "obabel": "/path/to/obabel", # Path to obabel executable 
                                                                   }
                                           ) # Inputs required for the scoring function

In [None]:
settings = datasettings()
save_config(settings)
out = main("AL", settings.config)