# Tutorial - Step2 : MTMR finetuning with Reinforcement learning

## 1. Import requirements

In [1]:
import os
import pandas as pd
import torch
import time
import matplotlib.pyplot as plt

In [2]:
from MTMR.dataset import TrainingSmilesDataset, ValidationSmilesDataset
from MTMR.vae import SmilesAutoencoder, RewardFunctionLogP
from MTMR.properties import plogp, similarity

## 2. Configure GPU (if available)

In [3]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print(device)

cuda:0


## 3. Specify a target property

In [4]:
PROPERTY_NAME = "logp06"

SCORING_PROPERTY_FT = plogp()
SCORING_TANIMOTO_FT = similarity

## Configure the parameters of a reward function for the target property
threshold_property = 0.
threshold_similarity = 0.3

## 4. Set directories (for inputs and outputs)

In [5]:
input_data_dir = os.path.join("DATA", PROPERTY_NAME)
input_ckpt_dir = os.path.join("outputs_Tutorial_1_MTMR_pretraining", PROPERTY_NAME)

In [6]:
_output_dir = "outputs_Tutorial_2_MTMR_finetuning"
if not os.path.exists(_output_dir):
    os.mkdir(_output_dir)

output_dir = os.path.join(_output_dir, PROPERTY_NAME)
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

## 5. Set file names

In [7]:
filepath_train = os.path.join(input_data_dir, "rdkit_train_triplet.txt")
filepath_valid = os.path.join(input_data_dir, "rdkit_valid.txt")

In [8]:
filepath_pretrain_ckpt     = os.path.join(input_ckpt_dir, "checkpoints.pt")
filepath_pretrain_configs  = os.path.join(input_ckpt_dir, "configs.csv")
filepath_pretrain_char2idx = os.path.join(input_ckpt_dir, "char2idx.csv")

In [9]:
filepath_char2idx      = os.path.join(output_dir, "char2idx.csv")
filepath_configs       = os.path.join(output_dir, "configs.csv")
filepath_checkpoint    = os.path.join(output_dir, "checkpoints.pt")
filepath_history       = os.path.join(output_dir, "history.csv")
filepath_history_valid = os.path.join(output_dir, "history_valid.csv")

## 6. Load datasets (for training and validation)

In [10]:
dataset = TrainingSmilesDataset(filepath_train, filepath_char2idx=filepath_pretrain_char2idx, device=device)
dataset.save_char2idx(filepath_char2idx)
dataset_valid = ValidationSmilesDataset(filepath_valid, filepath_char2idx, device=device)

## 7. Load a pretrained generator of MTMR

In [11]:
## Model configuration
model_configs = {"hidden_size"    :None,
                 "latent_size"    :None,
                 "num_layers"     :None,
                 "vocab_size"     :None,
                 "sos_idx"        :None,
                 "eos_idx"        :None,
                 "pad_idx"        :None,
                 "device"         :device,
                 "filepath_config":filepath_pretrain_configs}

## Model initialization
generator = SmilesAutoencoder(**model_configs)

## Load pretrained model
generator.load_model(filepath_pretrain_ckpt)

## Configuration save
generator.save_config(filepath_configs)

## 8. Define a reward function

In [12]:
reward_ft = RewardFunctionLogP(similarity_ft=SCORING_TANIMOTO_FT,
                               scoring_ft=SCORING_PROPERTY_FT,
                               threshold_property=threshold_property,
                               threshold_similarity=threshold_similarity)

## 9. Train

In [None]:
df_history, df_history_valid = generator.policy_gradient(dataset, reward_ft,
                                                         validation_dataset=dataset_valid,
                                                         checkpoint_filepath=filepath_checkpoint)

df_history.to_csv(filepath_history, index=False)
df_history_valid.to_csv(filepath_history_valid, index=False)

[000000/002000]  loss: 0.092  reward: 0.808  similarity: 0.525  property[0]: 0.808  valid_ratio(va): 0.950  similarity(va): 0.384  property(va): 0.841
[000010/002000]  loss: 0.091  reward: 0.911  similarity: 0.539  property[0]: 0.911  valid_ratio(va): 0.990  similarity(va): 0.381  property(va): 1.206
[000020/002000]  loss: 0.137  reward: 1.335  similarity: 0.525  property[0]: 1.335  valid_ratio(va): 0.995  similarity(va): 0.378  property(va): 1.517
[000030/002000]  loss: 0.133  reward: 1.313  similarity: 0.508  property[0]: 1.313  valid_ratio(va): 1.000  similarity(va): 0.359  property(va): 1.820
[000040/002000]  loss: 0.161  reward: 1.626  similarity: 0.497  property[0]: 1.626  valid_ratio(va): 1.000  similarity(va): 0.344  property(va): 2.085
[000050/002000]  loss: 0.150  reward: 1.600  similarity: 0.500  property[0]: 1.600  valid_ratio(va): 1.000  similarity(va): 0.341  property(va): 2.246
[000060/002000]  loss: 0.169  reward: 1.804  similarity: 0.505  property[0]: 1.804  valid_rati

[000550/002000]  loss: 0.305  reward: 4.560  similarity: 0.447  property[0]: 4.560  valid_ratio(va): 1.000  similarity(va): 0.310  property(va): 4.532
[000560/002000]  loss: 0.310  reward: 4.226  similarity: 0.448  property[0]: 4.226  valid_ratio(va): 1.000  similarity(va): 0.309  property(va): 4.725
[000570/002000]  loss: 0.306  reward: 4.474  similarity: 0.450  property[0]: 4.474  valid_ratio(va): 1.000  similarity(va): 0.307  property(va): 4.712
[000580/002000]  loss: 0.307  reward: 4.515  similarity: 0.438  property[0]: 4.515  valid_ratio(va): 1.000  similarity(va): 0.315  property(va): 4.544
[000590/002000]  loss: 0.336  reward: 4.647  similarity: 0.450  property[0]: 4.647  valid_ratio(va): 1.000  similarity(va): 0.311  property(va): 4.484
[000600/002000]  loss: 0.317  reward: 4.831  similarity: 0.446  property[0]: 4.831  valid_ratio(va): 1.000  similarity(va): 0.307  property(va): 4.592
[000610/002000]  loss: 0.325  reward: 4.902  similarity: 0.435  property[0]: 4.902  valid_rati

## 10. Visualize for reward & loss analysis

In [None]:
fig, axes = plt.subplots(4,1,figsize=(8,8))

axes[0].plot(df_history.loc[:,"LOSS"], label="Loss")
axes[1].plot(df_history.loc[:,"REWARD"], label="Reward")
axes[2].plot(df_history.loc[:,"SIMILARITY"], label="Tanimoto Coeff.")
axes[3].plot(df_history.loc[:,"PROPERTY"], label=f"Property ({PROPERTY_NAME})")

axes[3].set_xlabel("Iteration")
for ax in axes:
    ax.legend(loc='best')

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(3,1,figsize=(8,4))

axes[0].plot(df_history_valid.loc[:,"VALID_RATIO"], label="Validity")
axes[1].plot(df_history_valid.loc[:,"AVERAGE_SIMILARITY"], label="Tanimoto coeff.")
axes[2].plot(df_history_valid.loc[:,"AVERAGE_PROPERTY"], label=f"Property ({PROPERTY_NAME})")

axes[0].set_ylim(0.95, 1.005)

axes[2].set_xlabel("Iteration")
for ax in axes:
    ax.legend(loc='best')

plt.tight_layout()
plt.show()