In [None]:
# add path (for local)
import sys
repo_root = "../"
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

In [None]:
%load_ext autoreload
%autoreload 2
from rdkit import Chem
from utils import HELMConverter, draw_mol, get_main_mol, remove_isotopes, is_same_mol

lib_files = [
    "chembl_35_monomer_library.xml",
    "chembl_35_monomer_library_diff.xml",
    "HELMCoreLibrary.json",
    "monomerLib2.0.json"
]
converter = HELMConverter().load(*[repo_root + f"data/helm/library/{name}" for name in lib_files])

In [None]:
helm = "RNA1{R(C)P.R(T)P.R(C)P.R(C)P.R(A)P.R(A)P.R(C)P.R(A)P.R(T)P.R(C)P.R(A)P.R(A)P.R(G)P.R(G)P}$$$$"

mol = converter._convert(helm, verbose=True)
draw_mol(mol, 200, 300)

In [None]:
helm, smiles = "CHEM1{[SMCC]}|PEPTIDE1{L.M}|RNA1{R(C)P.R(A)P}$RNA1,PEPTIDE1,6:R2-1:R1|PEPTIDE1,CHEM1,2:R2-1:R1$$$", "CC(C)C[C@@H](C(=O)N[C@@H](CCSC)C(=O)C(=O)C1CCC(CC1)CN2C(=O)CC([H])C2=O)NP(=O)(O)O[C@@H]3[C@@H](COP(=O)(O)O[C@@H]4[C@@H](CO[H])O[C@H]([C@@H]4O)N5C=CC(=NC5=O)N)O[C@H]([C@@H]3O)N6C=NC7=C6N=CN=C7N"

mol_helm = converter.convert(helm)
mol_helm = Chem.RemoveHs(mol_helm)
# mol_smiles = standardize_mol(Chem.MolFromSmiles(smiles))
mol_smiles = Chem.MolFromSmiles(smiles)

width = 500
height = 500

print(is_same_mol(mol_helm, mol_smiles))
print(Chem.MolToSmiles(mol_helm) + " ...helm -> smiles")
print(Chem.MolToSmiles(mol_smiles) + " ...smiles -> smiles")
print(smiles + " ...raw")

draw_mol(mol_helm, width, height)
draw_mol(mol_smiles, width, height)

In [None]:
mol = converter.check_monomer("C", "PEPTIDE")
print(Chem.MolToSmiles(mol))
mol

In [None]:
#test converter

import cProfile
import csv

test_data: list[tuple] = []
file_path = repo_root + "data/helm/chembl34_protein_helm.csv"
#file_path = molgen_path + "data/helm/pa_test.csv"

with open(file_path, newline='', encoding="utf-8") as f:
    reader = csv.reader(f, delimiter=',')
    next(reader)    # skip header
    for row in reader:
        test_data.append(tuple(row))

def test():
    success_count = 0
    unmatch_count = 0
    none_count = 0
    for helm, id, smiles in test_data:
        mol1 = converter.convert(helm)
        mol2 = Chem.MolFromSmiles(smiles)
        mol2 = remove_isotopes(mol2)
        mol2 = get_main_mol(mol2)

        if mol1 is None:
            none_count += 1
            print("None - helm: " + helm + ", smiles: " + smiles)
        elif is_same_mol(mol1, mol2):
            success_count += 1
        else:
            unmatch_count += 1
            print("Unmatch - helm: " + helm + ", smiles: " + smiles)
    print("success_count: " + str(success_count) + ", none_count: " + str(none_count) + " unmatch_count: " + str(unmatch_count))

#cProfile.run("test()")
test()