## Manual RUAL
This notebook shows how RUAL can be run using a custom scoring , and the built-in docking pipeline utilizing SMINA.

In [None]:
# Load RUAL
from rual.al.rual import RUAL

In [None]:
# Define input parameters
from dataclasses import dataclass
@dataclass
class datasettings():
    O: bool = True # Overwrite
    o: str = '/path/to/output' # Directory to store output files
    database: str = "/path/to/rual_db"  # location of the directories 'smis' and 'fps'. Can be generated using /path/to/rual/rual/database/dbbuilder.py
    config: str = '/path/to/config.json' # Name of configuration file that will be written
    batch_size: int = 10000 # Number of molecules tested per iteration
    final_sample: int = 20000 # Number of molecules to test in the final iteration
    iterations: int = 10 # Number of iterations
    base_bundles: int = 1 # Number of bundles evaluated in first iteration
    max_bundle_size: int = 10**6 # Number of molecules in largerst bundle
    model_name: str = None # Name of model class in ml/models.py
    test_fraction: float = 0 # Fraction of data saved as test set after round 1
    cpus: int = 10 # Number of CPUs to us
    restart: int =  1 # Round to start from. If larger than 1 then restarting 

In [None]:
# Define scoring function
from abc import ABC, abstractmethod
from rdkit import Chem
from rdkit.Chem.Descriptors import MolLogP

class Scorer(ABC):
    @abstractmethod
    def __init__(self, arguments):
        pass

    def score(self, df, workdir):
        df["score"] = 0
        pass

class LogP():
    def __init__(self, arguments):
        pass

    def smi2logp(self, smi):
        mol = Chem.MolFromSmiles(smi)
        logp = MolLogP(mol)
        return logp

    def score(self, df, workdir):
        df["score"] = df.apply(lambda x: self.smi2logp(x['smi']), axis=1)
        return df

In [None]:
# Define surrogate model
import numpy as np
from sklearn.ensemble import RandomForestRegressor

class RF1std:
    def __init__(self):
        self.model = RandomForestRegressor(n_jobs=-1)
        self.model.fit(np.random.randint(2, size=[1000, 2048]), np.random.randint(10, size=1000))

    def predict(self, X):
        return self.model.predict(X)

In [None]:
# Setup and run RUAL
settings = datasettings()
setting_dict = settings.__dict__
rual_instance = RUAL(setting_dict)
rual_instance.scorer = LogP(setting_dict)
rual_instance.model = RF1std()

while not rual_instance.final:
    rual_instance.new_round()