In [2]:
import tempfile
import pytraj as pt

def combineMOL2PDB(mol2file, pdbfile, outfile):
  from rdkit import Chem;
  lig = Chem.MolFromMol2File(mol2file)
  ligpdb = Chem.MolToPDBBlock(lig)

  atomlines = [i.replace("UNL", "LIG") for i in ligpdb.split("\n") if "HETATM" in i]
  with open(pdbfile, "r") as file1:
    pdborig = file1.read();
  linesorig = [i for i in pdborig.split("\n") if "HETATM" in i or "ATOM" in i]
  finallines = linesorig + atomlines;
  finalstr = "\n".join(finallines);

  temp = tempfile.NamedTemporaryFile(suffix=".pdb")
  temp.write(bytes(finalstr, "utf-8"))

  traj = pt.load(temp.name)

  temp.close()
  pt.save(outfile, traj, overwrite=True)
  return outfile


a="/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/1ctu/1ctu_ligand.mol2"
b="/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/1ctu/1ctu_protein.pdb"
c="/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/1ctu/1ctu_complex.pdb"
combineMOL2PDB(a,b,c)


'/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/1ctu/1ctu_complex.pdb'

In [30]:
from scipy.spatial import distance_matrix
import numpy as np 

def ContactNumber(pdbfile, target):
  VDWRADII = {'1': 1.1, '2': 1.4, '3': 1.82, '4': 1.53, '5': 1.92, '6': 1.7, '7': 1.55, '8': 1.52, 
    '9': 1.47, '10': 1.54, '11': 2.27, '12': 1.73, '13': 1.84, '14': 2.1, '15': 1.8, 
    '16': 1.8, '17': 1.75, '18': 1.88, '19': 2.75, '20': 2.31, '28': 1.63, '29': 1.4, 
    '30': 1.39, '31': 1.87, '32': 2.11, '34': 1.9, '35': 1.85, '46': 1.63, '47': 1.72, 
    '48': 1.58, '50': 2.17, '51': 2.06, '53': 1.98, '54': 2.16, '55': 3.43, '56': 2.68, 
    '78': 1.75, '79': 1.66, '82': 2.02, '83': 2.07
  }
  traj = pt.load(pdbfile)
  traj.top.set_reference(traj[0])
  target_atoms = traj.top.select(target); 
  
  atoms = np.array([*traj.top.atoms])[target_atoms]; 
  coords = traj.xyz[0][target_atoms]; 
  
  ccontact = 0;
  for i, coord in enumerate(coords):
    partners = [atoms[i].index]
    for j in list(atoms[i].bonded_indices()):
      if j in target_atoms:
        partners.append(j)
    partners.sort()
    otheratoms = np.setdiff1d(target_atoms, partners)
    ret = distance_matrix([coord], traj.xyz[0][otheratoms])
    thisatom = atoms[i].atomic_number
    vdw_pairs = np.array([VDWRADII[str(i.atomic_number)] for i in np.array([*traj.top.atoms])[otheratoms]]) + VDWRADII[str(thisatom)]
    ccontact += np.count_nonzero(ret < vdw_pairs + 0.4)
  ccontact = ccontact/2; 
  print(f"{ccontact} contacts are identified")
    
def ClashNumber(pdbfile, target):
  VDWRADII = {'1': 1.1, '2': 1.4, '3': 1.82, '4': 1.53, '5': 1.92, '6': 1.7, '7': 1.55, '8': 1.52, 
    '9': 1.47, '10': 1.54, '11': 2.27, '12': 1.73, '13': 1.84, '14': 2.1, '15': 1.8, 
    '16': 1.8, '17': 1.75, '18': 1.88, '19': 2.75, '20': 2.31, '28': 1.63, '29': 1.4, 
    '30': 1.39, '31': 1.87, '32': 2.11, '34': 1.9, '35': 1.85, '46': 1.63, '47': 1.72, 
    '48': 1.58, '50': 2.17, '51': 2.06, '53': 1.98, '54': 2.16, '55': 3.43, '56': 2.68, 
    '78': 1.75, '79': 1.66, '82': 2.02, '83': 2.07
  }
  traj = pt.load(pdbfile)
  traj.top.set_reference(traj[0])
  target_atoms = traj.top.select(target); 
  
  atoms = np.array([*traj.top.atoms])[target_atoms]; 
  coords = traj.xyz[0][target_atoms]; 
  
  cclash = 0;
  for i, coord in enumerate(coords):
    partners = [atoms[i].index]
    for j in list(atoms[i].bonded_indices()):
      if j in target_atoms:
        partners.append(j)
    partners.sort()
    otheratoms = np.setdiff1d(target_atoms, partners)
    ret = distance_matrix([coord], traj.xyz[0][otheratoms])
    thisatom = atoms[i].atomic_number
    vdw_pairs = np.array([VDWRADII[str(i.atomic_number)] for i in np.array([*traj.top.atoms])[otheratoms]]) + VDWRADII[str(thisatom)]
    
    cclash += np.count_nonzero(ret < vdw_pairs - 1.25)
  cclash = cclash/2; 
  print(f"{cclash} contacts are identified")

  
a="/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/1ctu/1ctu_complex.pdb"
ClashNumber(a, ":LIG<:5")
ContactNumber(a, ":LIG<:5")

1.0 contacts are identified
1870.0 contacts are identified


In [4]:
import pytraj as pt 
c="/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/1ctu/1ctu_complex.pdb"
traj = pt.load(c, top=c, mask=":LIG")
sel = traj.top.select(":LIG")
surf = pt.surf(traj, ":LIG")
print(surf)

17
[ 0  1  2 ... 14 15 16]
[310.02231376]


In [1]:
import subprocess
from BetaPose import utils
import tempfile
import os


pdbfile="/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/1ctu/1ctu_complex.pdb"
lig_mask = ":LIG"

basepath = "/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/"
pdbcode = "4ll3"

def EmbeddingFactor(basepath, pdbcode, mask=":LIG"):
  """
  Embedding factor is measured by the accessible surface area (ASA) contribution of ligand in a complex
  to the pure ligand ASA
  """
  pdbcode = pdbcode.lower()
  basepath = os.path.abspath(basepath)
  ligfile = os.path.join(basepath, f"{pdbcode}/{pdbcode}_ligand.mol2"); 
  pdbfile = os.path.join(basepath, f"{pdbcode}/{pdbcode}_protein.pdb");
  outfile = os.path.join(basepath, f"{pdbcode}/{pdbcode}_complex.pdb");
  
  if (os.path.isfile(ligfile)) and (os.path.isfile(pdbfile)):
    outfile = combineMOL2PDB(ligfile, pdbfile, outfile)
    slig_0 = utils.ASALig(outfile, mask)
    slig_1 = utils.ASALigOnly(outfile, mask)
  elif not os.path.isfile(ligfile):
    print(f"Cannot find the ligand file in the database {pdbcode} ({ligfile})")
  elif not os.path.isfile(ligfile):
    print(f"Cannot find the protein file in the database {pdbcode} ({pdbfile})")
  print(f"Surface contribution: {slig_0}; Surface pure: {slig_1}")
  return 1-slig_0/slig_1


utils.EmbeddingFactor(basepath, pdbcode, mask=lig_mask)

Surface contribution: 42.7691; Surface pure: 621.734


0.9312099708235355

In [15]:
def :
  """
  Method 2: Get the mask of protein via a PDB file name 
  """