## Tutorial (user): Define molecular reward
- Define a reward for molecular generation
- Generation with custom reward

In [None]:
# Imports

import sys
repo_root = "../../" # Change this if running the notebook from a different directory
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)
    
import os
from filter import ValidityFilter
from generator import MCTS
from language import Language
from node import MolSentenceNode
from policy import UCT
from transition import RNNTransition

# Define a reward for molecular generation
A reward class maps nodes to real values, representing the objective to be optimized. We also support multi-objective settings: although the objectives must ultimately be folded into a single scalar value for optimization, our method allows tracking of individual objective values throughout the process.

For molecular generation, we recommend implementing a reward by inheriting from `MolReward`. For non-molecular nodes, see `dev_1_make_components.ipynb`.

In [None]:
# Define a molecular reward

import numpy as np
from rdkit.Chem import Descriptors
from reward import MolReward

def sigmoid(x, a):
    return 1 / (1 + np.exp(-a * x))

class CustomReward(MolReward):
    """Reward based on (LogP value - max ring size)."""
    def __init__(self, a):
        self.a = a
        
    def mol_objective_functions(self):
        """Return objective functions of the node; each function returns an objective value."""
        
        def log_p(mol):
            return Descriptors.MolLogP(mol)
        
        def max_ring_size(mol):
            ri = mol.GetRingInfo()
            max_ring_size = max((len(r) for r in ri.AtomRings()), default=0)
            return max_ring_size

        return [log_p, max_ring_size]
    

    def reward_from_objective_values(self, objective_values):
        """Compute the final reward based on the objective values calculated by objective_functions()."""
        log_p, max_ring_size = objective_values[0], objective_values[1]
        return sigmoid(x=log_p - max_ring_size, a=self.a) # It is recommended to scale the reward to the range [0, 1].

# Generation with custom reward
Note: For YAML workflow, define the reward in a `***.py` file and place it in the `reward` directory. For details of the codes below, refer to `dev_3_rnn_and_chain.ipynb`.

In [None]:
# Generate molecules with CustomReward

model_dir = os.path.join(repo_root, "model/smiles/drugs_zinc/gru")
device = "cpu"
smiles_lang = Language.load(os.path.join(model_dir, "smiles_zinc.lang"))
rnn_transition = RNNTransition(lang=smiles_lang, model_dir=model_dir, device=device,
                               top_p=0.995) 

MolSentenceNode.lang = smiles_lang
MolSentenceNode.device = device
root = MolSentenceNode.node_from_key(key="")
reward = CustomReward(a=0.5)
filters = [ValidityFilter()] # Apply ValidityFilter first (unless molecular validity is already guaranteed), since other MolFilter and MolReward do not check validity by default.
policy = UCT(c=0.1, best_rate=0.5)

generator = MCTS(root=root, transition=rnn_transition, reward=reward, filters=filters, filter_reward=0, policy=policy,
                         avoid_duplicates=False, cut_failed_child=False)

generator.generate(max_generations=1000, time_limit=100)
generator.plot()