In [8]:
%config Completer.use_jedi = False
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import IPythonConsole

from ds_generation.label import assign_mol_label
import numpy as np
import pandas as pd

In [2]:
#Import examples
mol = Chem.MolFromMolFile('/data/hookbill/hadfield/syntheticVS/data/DUDE_SRC_50ops_ac0025_t4_processed/actives/sdf/ligands/lig1.sdf')
protein = Chem.MolFromMolFile('/data/hookbill/hadfield/syntheticVS/data/DUDE_SRC_50ops_ac0025_t4_processed/actives/sdf/pharmacophores/pharm1.sdf')

In [3]:
assign_mol_label(mol, protein, threshold=4)

[array([ 0.0613, -4.9189, -1.2088]), array([ 1.8475, -4.1312,  1.6027])]

In [6]:
def rdmol_to_dataframe(mol):
    if mol is None or mol.GetNumHeavyAtoms() < 1:
        return pd.DataFrame({
            'x': [],
            'y': [],
            'z': [],
            'type': []
        })

    conf = mol.GetConformer()
    positions = np.array([np.array(conf.GetAtomPosition(i)) for
                          i in range(mol.GetNumHeavyAtoms())])
    atom_types = [mol.GetAtomWithIdx(i).GetAtomicNum() for
                  i in range(mol.GetNumHeavyAtoms())]

    if len(atom_types) == 1:
        positions = positions.reshape((1, 3))

    df = pd.DataFrame({
        'x': positions[:, 0],
        'y': positions[:, 1],
        'z': positions[:, 2],
        'type': atom_types
    })
    if isinstance(mol, Chem.RWMol):
        df['type'] = df['type'].map({8: 0, 7: 1, 6: 2})
    return df


In [13]:
print(Chem.MolToMolBlock(protein))


     RDKit          3D

  7  0  0  0  0  0  0  0  0  0999 V2000
    2.4375    1.2930   18.8438 O   0  0  0  0  0  0  0  0  0  0  0  0
    5.2663  -14.3242    0.6218 O   0  0  0  0  0  0  0  0  0  0  0  0
    1.3018  -11.9533   -0.2721 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.6101  -14.1458   -7.2059 N   0  0  0  0  0  0  0  0  0  0  0  0
    3.7134  -14.8579   -6.8942 O   0  0  0  0  0  0  0  0  0  0  0  0
   -2.0869   -8.7732    9.3217 O   0  0  0  0  0  0  0  0  0  0  0  0
    1.8475   -4.1312    1.6027 N   0  0  0  0  0  0  0  0  0  0  0  0
M  END



In [14]:
def vector_distance(x, y):
    diff = np.subtract(x,y)
    return np.linalg.norm(diff)


def create_gt_df(ligand, pharm, threshold):
    
    positive_coords = assign_mol_label(ligand, pharm, threshold = threshold)
    
    lig_df = rdmol_to_dataframe(ligand)
    pharm_df = rdmol_to_dataframe(pharm)
    
    
    lig_gt = [0]*lig_df.shape[0]
    pharm_gt = [0]*pharm_df.shape[0]
    
    
    for idx, row in lig_df.iterrows():
        
        for jdx, coord in enumerate(positive_coords):
            
            if vector_distance(np.array([row['x'], row['y'], row['z']]), coord) < 0.05:
                lig_gt[idx] = 1
                
    for idx, row in pharm_df.iterrows():
        
        for jdx, coord in enumerate(positive_coords):
            
            if vector_distance(np.array([row['x'], row['y'], row['z']]), coord) < 0.05:
                pharm_gt[idx] = 1
                
    lig_df['binding'] = lig_gt
    pharm_df['binding'] = pharm_gt
    
    return lig_df, pharm_df
    
    
    
    
    

In [15]:
test_lig, test_prot = create_gt_df(mol, protein, 4)

In [17]:
test_prot

Unnamed: 0,x,y,z,type,binding
0,2.4375,1.293,18.8438,8,0
1,5.2663,-14.3242,0.6218,8,0
2,1.3018,-11.9533,-0.2721,7,0
3,0.6101,-14.1458,-7.2059,7,0
4,3.7134,-14.8579,-6.8942,8,0
5,-2.0869,-8.7732,9.3217,8,0
6,1.8475,-4.1312,1.6027,7,1
