In [1]:
cluster_type="RMSD"
logger=None
threshold=1.0


In [2]:
import glob
import os
from collections import defaultdict
from rdkit import Chem
from rdkit.Chem import rdMolAlign,AllChem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.ML.Cluster import Butina
from rdkit.Chem.Draw import rdDepictor, rdMolDraw2D

import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt

In [3]:
import py3Dmol
def drawit(m, cids=[-1], p=None, removeHs=True,
           colors=('cyanCarbon','redCarbon','blueCarbon','magentaCarbon','whiteCarbon','purpleCarbon')):
        if removeHs:
            m = Chem.RemoveHs(m)
        if p is None:
            p = py3Dmol.view(width=400, height=400)
        p.removeAllModels()
        for i,cid in enumerate(cids):
            IPythonConsole.addMolToView(m,p,confId=cid)
        for i,cid in enumerate(cids):
            p.setStyle({'model':i,},
                            {'stick':{'colorscheme':colors[i%len(colors)]}})
        p.zoomTo()
        return p.show()

In [4]:
    # Type of clustering
    clusters_type_list = ["RMSD", "TFD"]
    if cluster_type.upper() not in clusters_type_list:
        m = '\n\t\t ERROR: Type of clustering "{}" is not allowed.\n'
        m += '\t\t ERROR: Allowing values are RMSD or TFD.\n'
        print(m) if logger is None else logger.error(m)
    cluster_type = cluster_type.upper()

In [5]:
    # Load pdb or mol2
    id_to_file_dict = defaultdict()
    extension = "pdb"
    list_files = sorted(glob.glob(os.path.join("./01-PDBs_OUT", "*.{}".format(extension))))
    nfiles = len(list_files)
    if extension == "mol2":
        molrdkit = Chem.MolFromMol2File(list_files[0], removeHs=False)
    elif extension == "pdb":
        molrdkit = Chem.MolFromPDBFile(list_files[0], removeHs=False)
    else:
        m = '\n\t\t ERROR: "{}" files are not still supported in clusterize rdkit.\n'.format(extension)
        print(m) if logger is None else logger.error(m)
        exit()
    molrdkit.RemoveAllConformers()

In [6]:
    # Assing conformers
    nconf = 0
    for iconf in range(0, nfiles):

        if extension == "mol2":
            mtmp = Chem.MolFromMol2File(list_files[iconf], removeHs=False)
        elif extension == "pdb":
            mtmp = Chem.MolFromPDBFile(list_files[iconf], removeHs=False)
        else:
            m = "\n\t\t ERROR: {} files are not still soported\n".format(extension)
            print(m) if logger is None else logger.error(m)
            exit()
        conf = Chem.Conformer(mtmp.GetConformer(0))
        molrdkit.AddConformer(conf, assignId=True)
        id_to_file_dict[nconf] = list_files[iconf]
        nconf += 1


In [7]:
     # Remove Hydrogens ==================================================================
     m = "\t\t Number of initial conformers: {}\n".format(molrdkit.GetNumConformers())
     m += "\t\t Total Number of atoms: {}\n".format(molrdkit.GetNumAtoms())
     molrdkit_no_h = Chem.RemoveHs(molrdkit)
     m += "\t\t Remove Hydrogens to perform the clustering\n"
     m += "\t\t Heavy Number of atoms: {}\n".format(molrdkit_no_h.GetNumAtoms())
     print(m) if logger is None else logger.info(m)

		 Number of initial conformers: 5
		 Total Number of atoms: 32
		 Remove Hydrogens to perform the clustering
		 Heavy Number of atoms: 12



In [8]:
def draw_molecule(mol, outfile):
    d2d = rdMolDraw2D.MolDraw2DCairo(300, 200)
    d2d.DrawMolecule(mol)
    d2d.FinishDrawing()
    d2d.WriteDrawingText(outfile)

large_mol = AllChem.MolFromPDBFile("./01-PDBs_OUT/Conf_0001_opt.pdb")
a = AllChem.Compute2DCoords(large_mol)
a
draw_molecule(large_mol, 'large_mol.png')

In [9]:
    # Allign conformers using VMD algorithm
    rmsd_vmd = []
    for cid in range(1, nconf):
        rmsd_vmd.append(AllChem.GetConformerRMS(molrdkit, 0, cid, prealigned=False))
    print("VMD       ", rmsd_vmd)
    # Allign conformers using AlignMol algorithm
    rmsd_AlignMol = []
    for i in range(1, molrdkit.GetNumConformers()):
        d = rdMolAlign.AlignMol(molrdkit, molrdkit, prbCid=i, refCid=0)
        rmsd_AlignMol.append(d)    
    print("AlignMol  ",rmsd_AlignMol)
    # Allign conformers using GetBestRMS algorithm
    rmsd_GetBestRMS = []
    for i in range(1, molrdkit.GetNumConformers()):
        d = rdMolAlign.GetBestRMS(molrdkit, molrdkit, prbId=i, refId=0)
        rmsd_GetBestRMS.append(d)
    print("GetBestRMS", rmsd_GetBestRMS)

VMD        [1.8680383125082918, 1.3780028086884284, 1.4662306676359846, 1.7875143752659424]
AlignMol   [2.2057437491014182, 1.6430912889341587, 2.1610522351431, 2.2964094068351915]
GetBestRMS [1.5543315169697416, 0.26181457558142607, 1.3228042510772937, 1.7873428803475213]


In [10]:
    # TEST allingment only with the first conformer
    # Allign conformers using VMD algorithm
    rmsd_noh_vmd = []
    for cid in range(1, nconf):
        rmsd_noh_vmd.append(AllChem.GetConformerRMS(molrdkit_no_h, 0, cid, prealigned=False))
    print("VMD       ", rmsd_noh_vmd)
    drawit(molrdkit_no_h, [0,2])
    # Allign conformers using AlignMol algorithm
    # rmsd_noh_AlignMol = []
    # for i in range(1, molrdkit_no_h.GetNumConformers()):
    #     d = rdMolAlign.AlignMol(molrdkit_no_h, molrdkit_no_h, prbCid=i, refCid=0)
    #     rmsd_noh_AlignMol.append(d)    
    # print("AlignMol  ",rmsd_noh_AlignMol)
    # Allign conformers using GetBestRMS algorithm
    rmsd_noh_GetBestRMS = []
    for i in range(1, molrdkit_no_h.GetNumConformers()):
        d = rdMolAlign.GetBestRMS(molrdkit_no_h, molrdkit_no_h, prbId=i, refId=0)
        rmsd_noh_GetBestRMS.append(d)
    print("GetBestRMS", rmsd_noh_GetBestRMS)
    drawit(molrdkit_no_h, [0,2])
    drawit(molrdkit_no_h, [0,4])
    drawit(molrdkit_no_h, [1,4])


VMD        [0.8769190681038447, 1.3062257489437616, 0.8020211360203033, 1.4077571872946704]


GetBestRMS [0.8769190681039506, 0.117953604190158, 0.8020211360203624, 1.407757187275025]


In [11]:
m_rmsd_noh_getbest=rdMolAlign.GetAllConformerBestRMS(molrdkit_no_h)
print(m_rmsd_noh_getbest)

(0.8769190681036912, 0.11795360418983672, 0.8749136860277505, 0.8020211360203624, 0.6929821435741474, 0.7988247500374526, 1.4077571872750787, 1.6548904503795034, 1.361270487680972, 1.2815068876909022)


In [12]:
fig, ax = plt.subplots()
ax.set_xlabel('RMSD')
ax.set_ylabel('count')
ax.hist(m_rmsd_noh_getbest)
plt.show()

In [13]:
    clusts = Butina.ClusterData(m_rmsd_noh_getbest, nconf, threshold, isDistData=True, reordering=True)


In [14]:
print(clusts)


((3, 0, 1, 2), (4,))


In [15]:
from stmol import showmol
import py3Dmol
# 1A2C
# Structure of thrombin inhibited by AERUGINOSIN298-A from a BLUE-GREEN ALGA
xyzview = py3Dmol.view(query='pdb:1A2C') 
xyzview.setStyle({'cartoon':{'color':'spectrum'}})
showmol(xyzview, height = 500,width=800)

2024-05-14 15:39:29.474 
  command:

    streamlit run /home/jramos/Programacion/sandboxes/sandbox_Castep-Parser/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
