# Basic workflow (without YAML)

More practical workflow (de novo molecular generation, chained to lead optimizaiton) will be covered in `tutorial_user_2.ipynb`.

For YAML workflow, refer to `generation.ipynb`, `chain_generation.ipynb` and `config/generation/mcts_example.yaml`.

In [None]:
# Path setup

import sys
repo_root = "../" # Change this if running the notebook from a different directory
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

In [None]:
# Imports (may take some time on the first run)

from filter import ValidityFilter, RadicalFilter
from generator import MCTS
from node import SMILESStringNode
from policy import PUCT
from transition import JensenTransition
from reward import LogPReward

In [None]:
# Set up a generator (without YAML)

benzene = SMILESStringNode.node_from_key("c1ccccc1")
reward = LogPReward()
filters = [ValidityFilter(), RadicalFilter()] # ValidityFilter checks whether the molecule is valid. Since other filters and rewards typically assume validity and do not recheck it, this filter should usually be applied first in molecular generation.

uct = PUCT(c=0.1, best_rate=0.9) # Hover over the class name (e.g., "UCT") to see its available arguments, typpes, default values, and descriptions (may not be supported in all IDEs)

generator = MCTS(root=benzene, transition=JensenTransition(), reward=reward, filters=filters, filter_reward=[-1,0], policy=uct,
                 avoid_duplicates=True,
                 cut_failed_child=True,
                 info_interval=100, output_dir="generation_result/tutorial_1")

In [None]:
# Start generation

generator.generate(max_generations=1000, time_limit=60) # Stops generation when either the number of generated nodes reaches 1000 or 60 seconds have passed. 
# Each generated molecule is logged to a CSV file in the output directory as soon as it is generated.

In [None]:
# Analyze and plot results

generator.analyze()
generator.plot(moving_average_window=0.05, reward_top_ps=[0.1, 0.5]) # Plot the objective values and final reward for the generated molecules. The plots will also be saved to the output directory.

# Save and load

In [None]:
# Continue generation using the existing generator

generator.generate(max_generations=200, time_limit=60)

In [None]:
# Save the generator and its current progress to a file

save_path = generator.output_dir() + "save.gtr" # Generator's output directory can be fetched using output_dir()
generator.save(save_path)

In [None]:
# Load generator

generator = MCTS.load(save_path, transition=JensenTransition()) # Since some transitions rely on heavy models, they are separated from the generator's saved state.

In [None]:
# Continue generation using the loaded generator

generator.generate(max_generations=200, time_limit=60)

# Analysis

In [None]:
# Generator methods for statistics.

print("Number of generated nodes: ", generator.n_generated_nodes())
print("Generated node per second: ", generator.node_per_sec())
print("Proportion of unfiltered generated nodes: ", generator.node_per_sec())
print("Average reward of top_p generated nodes: ", generator.average_reward(top_p=0.5))
print("AUC of the average of top_k rewards within 1000 generation: ", generator.auc(top_k=5, max_oracle_calls=1000)) # Assumes the reward is in the range [0, 1].

In [None]:
# Check transition

from utils import draw_mol

for child in generator.root.children: # child nodes of the root node
    print(f"Probability: {child.last_prob:.3f} Action: {child.last_action}")
    draw_mol(child.mol(), width=70, height=70) # All MolNode subclasses have a mol() method.