In [1]:
# add path (for local)
import sys
repo_root = "../"
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

In [2]:
from typing import Optional
from guacamol.goal_directed_generator import GoalDirectedGenerator
from guacamol.scoring_function import ScoringFunction
from guacamol.assess_goal_directed_generation import assess_goal_directed_generation
from rdkit import Chem

from reward import MolReward
from utils import conf_from_yaml, generator_from_conf

class GuacaMolReward(MolReward):
    def __init__(self, scoring_function: ScoringFunction):    
        self.scoring_function = scoring_function
        
    # implement
    def mol_objective_functions(self):
        def raw_score(mol):
            smiles = Chem.MolToSmiles(mol)
            return self.scoring_function.score(smiles)

        return [raw_score]

    # implement
    def reward_from_objective_values(self, objective_values):
        score = objective_values[0]
        return score

class V3DeNovoGenerator(GoalDirectedGenerator):
    def __init__(self, conf):
        self.conf = conf
        self.generator = generator_from_conf(self.conf)

    # implement
    def generate_optimized_molecules(self, scoring_function: ScoringFunction, number_molecules: int, starting_population: Optional[list[str]] = None) -> list[str]:
        self.generator.reward = GuacaMolReward(scoring_function=scoring_function)
        self.generator.generate(max_generations=self.conf.get("max_generations"), time_limit=self.conf.get("time_limit"))
        return self.generator.generated_keys()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from guacamol.assess_goal_directed_generation import _evaluate_goal_directed_benchmarks
from guacamol.benchmark_suites import goal_directed_benchmark_suite
from guacamol.goal_directed_benchmark import GoalDirectedBenchmarkResult

def assess(goal_directed_molecule_generator: GoalDirectedGenerator, benchmark_version='v1') -> list[GoalDirectedBenchmarkResult]:

    benchmarks = goal_directed_benchmark_suite(version_name=benchmark_version)

    results = _evaluate_goal_directed_benchmarks(
        goal_directed_molecule_generator=goal_directed_molecule_generator,
        benchmarks=benchmarks)
    
    return results

In [None]:
import optuna
from rdkit import RDLogger
from utils import generator_from_conf, conf_from_yaml
RDLogger.DisableLog('rdApp.*')

yaml_path = "config/optuna_guacamol.yaml"
v1_benchmark_names = ['C11H24', 'C7H8N2O2', 'C9H10N2O2PF2Cl', 'Cobimetinib MPO', 'Osimertinib MPO', 'Fexofenadine MPO', 'Physchem MPO', 'Ranolazine MPO', 'Celecoxib rediscovery', 'Troglitazone rediscovery', 'Thiothixene rediscovery', 'Aripiprazole similarity', 'Albuterol similarity', 'Mestranol similarity', 'logP (target: -1.0)', 'logP (target: 8.0)', 'TPSA (target: 150.0)', 'CNS MPO', 'QED', 'Median molecules 1']

conf = conf_from_yaml(yaml_path, repo_root)
name = conf.get("study_name")
n_trials = conf.get("n_trials")

def objective(trial: optuna.Trial):
    conf = conf_from_yaml(yaml_path, repo_root)

    conf["max_generations"] = trial.suggest_categorical("max_generations", [25000, 20000, 10000, 8192, 5000])    

    conf.setdefault("transition_args", {})
    conf["transition_args"]["sharpness"] = trial.suggest_float("sharpness", 0.8, 1.1)
    conf["transition_args"]["top_p"] = trial.suggest_float("top_p", 0.993, 0.999)
    
    conf.setdefault("policy_args", {})
    conf["policy_args"]["c"] = trial.suggest_float("c", 0.05, 0.4)
    conf["policy_args"]["best_rate"] = trial.suggest_float("best_rate", 0, 1)
    conf["policy_args"]["prior"] = trial.suggest_float("prior", 0.5, 1.4)
    conf["policy_args"]["prior_weight"] = trial.suggest_int("prior_weight", 0, 2)
    conf["policy_args"]["max_prior"] = trial.suggest_float("max_prior", 0.2, 0.9)

    conf.setdefault("generator_args", {})
    conf["generator_args"]["eval_width"] = trial.suggest_int("eval_width", 1, 40)
    conf["generator_args"]["n_evals"] = trial.suggest_int("n_evals", 1, 10)
    conf["generator_args"]["n_tries"] = trial.suggest_int("n_tries", 1, 3)
    
    gdg = V3DeNovoGenerator(conf)
    gdg.generator.logger.info(f"params={trial.params}")
    
    scores = assess(gdg)
    scores = [result.score for result in scores]
    
    for i, score in enumerate(scores):
        trial.set_user_attr(v1_benchmark_names[i], score)
    sum_score = sum(scores)
    trial.set_user_attr("sum_score", sum_score)

    print_trial(trial)
    
    return sum_score
    
def print_trial(trial: optuna.Trial):
    print(f"Trial {trial.number}: attrs={trial.user_attrs}, params={trial.params}")
    
def print_best_trials(study: optuna.Study):
    print("Optuna trials completed.")
    print("------ Best trials -----")
    best_trials = sorted([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE], key=lambda t: t.value, reverse=True)[:5]
    for t in best_trials:
        print_trial(t)

In [None]:
# start search
storage = "sqlite:///optuna/" + name + ".db"
sampler = sampler=optuna.samplers.TPESampler(multivariate=True, group=True)
# sampler = optuna.samplers.GPSampler(deterministic_objective=False) # better if not using pruner?
pruner = optuna.pruners.MedianPruner(n_startup_trials=3, n_warmup_steps=0, interval_steps=1)
study = optuna.create_study(direction="maximize", study_name=name, storage=storage, sampler=sampler, pruner=pruner)
study.enqueue_trial({'sharpness': 0.98, 'top_p': 0.998, 'c': 0.17, 'best_rate': 0.95, 'prior': 1.0, 'prior_weight': 2, 'max_prior': 0.3, 'eval_width': 16, 'n_evals': 5, 'n_tries': 3})
study.optimize(objective, n_trials=n_trials)
print_best_trials(study)

In [None]:
# continue search
study = optuna.study.load_study(study_name="d_score_200000", storage="sqlite:///optuna/d_score_200000.db")
study.optimize(objective, n_trials=300)
print_best_trials(study)

In [None]:
# add parameters
study = optuna.study.load_study(study_name="d_score_200000", storage="sqlite:///sqlite:///optuna/d_score_200000.db")

sampler = sampler=optuna.samplers.TPESampler(multivariate=True, group=True)
pruner = optuna.pruners.MedianPruner(n_startup_trials=3, n_warmup_steps=0, interval_steps=1)
new_storage="sqlite:///optuna/d_score_200000_new.db"
study_with_new_param = optuna.create_study(direction="maximize", study_name=name, storage=new_storage, sampler=sampler, pruner=pruner)

for trial in study.trials:
    params = trial.params
    dists = trial.distributions

    params["cut_failed_child"] = False
    dists["cut_failed_child"] = optuna.distributions.CategoricalDistribution([True, False])

    trial.params = params
    trial.distributions = dists

    study_with_new_param.add_trial(trial)