#### Arguments

In [None]:
args = {
    # General
    'seed': 42,
    'device': 'cpu',
    'root_dir': '/Users/svlg/MasterThesis/v02',

    # FlowMol
    'model': 'qm9_ctmc',
    'n_molecules': 2,
    'n_timesteps': 50,

    # Reward model
    'reward_model': 'PAMNet_s',
    'n_layer': 6,
    'dim': 128,
    'target': 7,
    'cutoff_l': 5.0,
    'cutoff_g': 5.0,

    # Data / Dataset
    'dataset': 'QM9',
    'data_path': '/data',
    'batch_size': 2,
}

In [None]:
import torch
import numpy as np
import random

def set_seed(seed):
    """Seed all random generators."""
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

set_seed(args['seed'])

#### Data

In [None]:
import os
import os.path as osp
from torch_geometric.loader import DataLoader
from dataset.QM9 import QM9

In [None]:
data_root = osp.join(args['root_dir'] + args['data_path'])
train_dataset = QM9(data_root)
train_dataset.load(osp.join(data_root, 'qm9_train_data.pt'))
train_loader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)
val_dataset = QM9(data_root)
val_dataset.load(osp.join(data_root, 'qm9_val_data.pt'))
val_loader = DataLoader(val_dataset, batch_size=args['batch_size'], shuffle=False)
test_dataset = QM9(data_root)
test_dataset.load(osp.join(data_root, 'qm9_test_data.pt'))
test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

In [None]:
for data in train_loader:
    print(data)
    break

In [None]:
from rdkit import Chem
from rdkit.Geometry.rdGeometry import Point3D
bond_type_map = [Chem.rdchem.BondType.SINGLE, 
                 Chem.rdchem.BondType.DOUBLE, 
                 Chem.rdchem.BondType.TRIPLE,
                 Chem.rdchem.BondType.AROMATIC, 
                 None] # last bond type is for masked bonds

In [None]:
def pyg_to_rdmol(positions, atom_types, bond_src_idxs, bond_dst_idxs, bond_types):
    """Builds a rdkit molecule from the given atom and bond information."""
    # create a rdkit molecule and add atoms to it
    mol = Chem.RWMol()
    for atom_type in atom_types:
        a = Chem.Atom(int(atom_type))
        mol.AddAtom(a)

    # add bonds to rdkit molecule
    visited = set()
    for bond_type, src_idx, dst_idx in zip(bond_types, bond_src_idxs, bond_dst_idxs):
        src_idx = int(src_idx)
        dst_idx = int(dst_idx)
        if (src_idx, dst_idx) in visited or (dst_idx, src_idx) in visited:
            continue
        mol.AddBond(src_idx, dst_idx, bond_type_map[bond_type])
        visited.add((src_idx, dst_idx))

    try:
        mol = mol.GetMol()
    except Chem.KekulizeException:
        return None

    # Set coordinates
    conf = Chem.Conformer(mol.GetNumAtoms())
    for i in range(mol.GetNumAtoms()):
        x, y, z = positions[i]
        x, y, z = float(x), float(y), float(z)
        conf.SetAtomPosition(i, Point3D(x,y,z))
    mol.AddConformer(conf)

    # SMILES = Chem.MolToSmiles(mol)

    return mol

#### Sampling

In [None]:
import flowmol

In [None]:
model = flowmol.load_pretrained(args['model'])
model = model.to(args['device'])
model.eval()

In [None]:
print(f"Sampling {args['n_molecules']} molecules...")
sampled_molecules = model.sample_random_sizes(n_molecules=args['n_molecules'], n_timesteps=args['n_timesteps'], device=args['device'])

In [None]:
# Create dict with SMILES and pgy objects
rdkit_molecules = []
smiles_molecules = []
pgy_molecules = []
dgl_molecules = []
for mol in sampled_molecules:
    smiles_molecules.append(mol.smiles)
    pgy_molecules.append(mol.pyg_mol)
    rdkit_molecules.append(mol.rdkit_mol)
    dgl_molecules.append(mol.g)

In [None]:
for mol in smiles_molecules:
    print(mol)

#### SA Score

In [None]:
from molscore.scoring_functions.SA_Score import sascorer
from rdkit import Chem

In [None]:
sa_scores = []
for tmp in rdkit_molecules:
    Chem.GetSSSR(tmp)
    Chem.SanitizeMol(tmp)
    score = sascorer.calculateScore(tmp)
    sa_scores.append(score)

print(sa_scores)

#### AiZynthFinder

In [None]:
from molscore.scoring_functions.aizynthfinder import AiZynthFinder

In [None]:
aizynthfinder = AiZynthFinder(filter_policy=None)

In [None]:
results = aizynthfinder(smiles_molecules, directory='ai_finder_results')
results

In [None]:
import json
import pandas as pd

# Open and read the JSON file
with open('/Users/svlg/MasterThesis/v02/ai_finder_results/aizynth_out.json', 'r') as file:
    f = json.load(file)

# Extract schema and data
schema = f["schema"]
data = f["data"]

# Create a DataFrame
df = pd.DataFrame(data)

type_mapping = {
    "integer": "int64",
    "number": "float64",
    "string": "object",
    "boolean": "bool"
}

for field in schema["fields"]:
    column_name = field["name"]
    column_type = field["type"]
    if column_type in type_mapping:
        df[column_name] = df[column_name].astype(type_mapping[column_type])

df.set_index(schema["primaryKey"], inplace=True)
df

#### FS-Score

In [None]:
from fsscore.score import Scorer
from fsscore.models.ranknet import LitRankNet

In [None]:
PRETRAIN_MODEL_PATH = "/Users/svlg/MasterThesis/v02/pretrained_models/FSscore/pretrain_graph_GGLGGL_ep242_best_valloss.ckpt"
model = LitRankNet.load_from_checkpoint(PRETRAIN_MODEL_PATH)
model.to(args['device'])
model.eval()
scorer = Scorer(model=model, device=args['device'])

In [None]:
scores = scorer.score(smiles_molecules)
scores

#### RA-Score

In [None]:
from molscore.scoring_functions.rascore_xgb import RAScore_XGB
# import requests

In [None]:
ra_scorer = RAScore_XGB(model = "GDB")

In [None]:
ra_scores = ra_scorer.send_smiles_to_server(smiles_molecules)
ra_scores

#### Full MolScore

In [None]:
from molscore import MolScore

In [None]:
# Calculate the constraints
constraints_mol_scores = MolScore(model_name='RAscore', task_config="/Users/svlg/MasterThesis/v02/RAscorer.json").score(smiles_molecules)

#### PoseBuster

In [None]:
from posebusters import PoseBusters

In [None]:
buster = PoseBusters(config="mol")
df = buster.bust(rdkit_molecules, None, None, full_report=True)
df

In [None]:
df.columns

In [None]:
df.iloc[0]

#### XTB Simulation

In [None]:
from true_reward import xtb_simulation

In [None]:
#  Calculate the true reward
true_rewards = []
for mol in dgl_molecules:
    quantity_value = xtb_simulation.compute_true_reward(mol, "dgl", "homolumo")
    homolumo_gap, lumo, homo = quantity_value
    print(f"HOMO-LUMO gap: {homolumo_gap:.6f} eV")
    print(f"LUMO: {lumo} eV\nHOMO: {homo} eV")

#### Differentiable Reward

In [None]:
from PAMNet.models import PAMNet_s, Config

In [None]:
config = Config(dataset=args['dataset'], dim=args['dim'], n_layer=args['n_layer'], cutoff_l=args['cutoff_l'], cutoff_g=args['cutoff_g'])
reward_model = PAMNet_s(config).to(args['device'])
reward_model.eval()

targets = []
for data in sampled_molecules:
    data.pyg_mol.pos.requires_grad_()
    tmp = reward_model(data.pyg_mol)
    targets.append(tmp)
    tmp.backward()
    pos_grad = data.pyg_mol.pos.grad
print(len(targets))

#### Visualize the Molecules

In [None]:
import py3Dmol

In [None]:
# Convert RDKit Mol to PDB block
pdb_blocks = []
for mol in rdkit_molecules:
    pdb_blocks.append(Chem.MolToMolBlock(mol))

# Visualize using py3Dmol
viewer = py3Dmol.view(width=1250, height=250, viewergrid=(1, 2))
viewer.addModel(pdb_blocks[0], "mol", viewer=(0, 0))
viewer.addModel(pdb_blocks[1], "mol", viewer=(0, 1))
# viewer.addModel(pdb_blocks[2], "mol", viewer=(0, 2))
# viewer.addModel(pdb_blocks[3], "mol", viewer=(0, 3))
# viewer.addModel(pdb_blocks[4], "mol", viewer=(0, 4))
# viewer.addModel(pdb_blocks[5], "mol", viewer=(1, 0))
# viewer.addModel(pdb_blocks[6], "mol", viewer=(1, 1))
# viewer.addModel(pdb_blocks[7], "mol", viewer=(1, 2))
# viewer.addModel(pdb_blocks[8], "mol", viewer=(1, 3))
# viewer.addModel(pdb_blocks[9], "mol", viewer=(1, 4))
viewer.setStyle({"stick": {}, "sphere": {"scale": 0.3}})
viewer.zoomTo()
viewer.show()

In [None]:
# smiles_molecules

In [None]:
# try:
#     response = requests.post(server_url + "/", json=payload)
# except requests.exceptions.ConnectionError as e:
#     f"{e}: "
#     f"\n\tAre sure the server was running at {server_url}?"
#     f"\n\tAre you sure the right environment engine was used (I'm using)?"
#     raise e

In [None]:
# response

In [None]:
# response = requests.post(server_url + "/", json=payload)
# response.status_code

#### Full MolScore

In [None]:
from molscore import MolScore

In [None]:
# Calculate the constraints
constraints_mol_scores = MolScore(model_name='RAscore', task_config="/Users/svlg/MasterThesis/v02/RAscorer.json").score(smiles_molecules)

#### PoseBuster

In [None]:
from posebusters import PoseBusters

In [None]:
buster = PoseBusters(config="mol")
df = buster.bust(rdkit_molecules, None, None, full_report=True)
print(df.shape)
df

In [None]:
df.columns

In [None]:
df.iloc[0]

#### XTB Simulation

In [None]:
from true_reward import xtb_simulation

In [None]:
#  Calculate the true reward
true_rewards = []
for mol in dgl_molecules:
    quantity_value = xtb_simulation.compute_true_reward(mol, "dgl", "homolumo")
    homolumo_gap, lumo, homo = quantity_value
    print(f"HOMO-LUMO gap: {homolumo_gap:.6f} eV")
    print(f"LUMO: {lumo} eV\nHOMO: {homo} eV")

#### Differentiable Reward

In [None]:
from PAMNet.models import PAMNet_s, Config

In [None]:
config = Config(dataset=args['dataset'], dim=args['dim'], n_layer=args['n_layer'], cutoff_l=args['cutoff_l'], cutoff_g=args['cutoff_g'])
reward_model = PAMNet_s(config).to(args['device'])
reward_model.eval()

targets = []
for data in sampled_molecules:
    data.pyg_mol.pos.requires_grad_()
    tmp = reward_model(data.pyg_mol)
    targets.append(tmp)
    tmp.backward()
    pos_grad = data.pyg_mol.pos.grad
print(len(targets))

#### Visualize the Molecules

In [None]:
import py3Dmol

In [None]:
# Convert RDKit Mol to PDB block
pdb_blocks = []
for mol in rdkit_molecules:
    pdb_blocks.append(Chem.MolToMolBlock(mol))

# Visualize using py3Dmol
viewer = py3Dmol.view(width=1250, height=250, viewergrid=(1, 5))
viewer.addModel(pdb_blocks[0], "mol", viewer=(0, 0))
viewer.addModel(pdb_blocks[1], "mol", viewer=(0, 1))
viewer.addModel(pdb_blocks[2], "mol", viewer=(0, 2))
viewer.addModel(pdb_blocks[3], "mol", viewer=(0, 3))
viewer.addModel(pdb_blocks[4], "mol", viewer=(0, 4))
# viewer.addModel(pdb_blocks[5], "mol", viewer=(1, 0))
# viewer.addModel(pdb_blocks[6], "mol", viewer=(1, 1))
# viewer.addModel(pdb_blocks[7], "mol", viewer=(1, 2))
# viewer.addModel(pdb_blocks[8], "mol", viewer=(1, 3))
# viewer.addModel(pdb_blocks[9], "mol", viewer=(1, 4))
viewer.setStyle({"stick": {}, "sphere": {"scale": 0.3}})
viewer.zoomTo()
viewer.show()