In [39]:
import time, os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import re
from BetaPose import session_prep


######### Load session
class ACGUI_KIT:
  def __init__(self, url): 
    self.JOBID = ""
    self.cofactors = ["SO4", "ACT", "EDO"]
    self.driver = webdriver.Firefox(); 
    self.driver.get(acgui_url)
    self.driver.set_window_size(2400,1000)

    assert "ACGUI" in self.driver.title; 
    if "session_id" in url: 
      elem = self.driver.find_element(By.ID, "Session_loader"); 
      elem.click(); 
      tmp = [i for i in acgui_url.split('?') if "session_id" in i]
      self.JOBID = tmp[0].split("=")[1]; 
    else: 
      print("No job id found, you might be initiating a new session")

  def __check(self):
    assert len(self.JOBID) == 8, "Not a valid JOBID";
    
  def __clickSimulateDropdown(self):
    self.__check(); 
    simbutton = self.driver.find_element(By.ID, "simulatemenubutton"); 
    simbutton.click(); 
    time.sleep(0.1);
    
  def __clickDockingDropdown(self):
    self.__check(); 
    simbutton = self.driver.find_element(By.ID, "dockmenubutton"); 
    simbutton.click(); 
    time.sleep(0.1);
    
  def __ClickResidue(self, value): 
    resbuttons = self.driver.find_elements(By.CLASS_NAME, "resbutton"); 
    for res in resbuttons:
      if res.get_property("value") == value: 
        res.click(); 
        break
      if res.get_property("value") == resbuttons[-1].get_property("value"):
        print(f"{self.JOBID}: Warning, not found matched residue; "); 
  
  def __ClickUIButton(self, text):
    buttons = self.driver.find_elements(By.CLASS_NAME, "ui-button"); 
    for idx, btn in enumerate(buttons): 
      if btn.text == "Close":
        btn.click(); 
        break
      if idx == len(buttons)-1:
        print("Not found close button"); 
  
  def SelectResidue(self, resids):
    if isinstance(resids, str):
      self.ClickByID("showseq"); 
      self.__ClickResidue(resids);
      self.ClickByID("togglemenubutton"); 
      self.ClickByID("togglesel"); 
    elif isinstance(resids, list):
      for res in resids:
        print(f"Selecting {res}")
        self.ClickByID("showseq"); 
        self.__ClickResidue(res);
        self.__ClickUIButton("Close"); 
        self.ClickByID("togglemenubutton"); 
        self.ClickByID("togglesel"); 
    else: 
      print("Please provide a valid resid argument"); 
      
        
  def Fetch(self, pdbcode, lig=None): 
    pdbenter = self.driver.find_element(By.ID, "pdbenter")
    pdbenter.send_keys(pdbcode); 
    time.sleep(2)
    if lig != None: 
      if isinstance(lig, str) and lig == "auto":
        lig_menu = self.driver.find_element(By.ID, "pdbligand-button"); 
        lig_menu.click(); 
        lig_lst = self.driver.find_element(By.ID, "pdbligand-menu").find_elements(By.TAG_NAME, "li"); 
        print(f"{self.JOBID}: All available ligands: ", [i.text for i in lig_lst])
        for item in lig_lst: 
          if item.text != "none": 
            if item.text.split("_")[0] in self.cofactors:
              continue
            time.sleep(1); 
            ActionChains(self.driver).move_to_element(item).click().perform(); 
            break; 
        wat_range = self.driver.find_element(By.ID, "waterradius"); 
        wat_range.clear();
        wat_range.send_keys("0");
        time.sleep(1); 
    pdbsubmit = self.driver.find_element(By.ID, "pdbsubmit")
    pdbsubmit.click(); 
    time.sleep(4); 
    
  def OpenPrepareForm(self, settings={}): 
    simbutton = self.driver.find_element(By.ID, "simulatemenubutton"); 
    simbutton.click(); 
    time.sleep(0.1); 
    perpbutton = self.driver.find_element(By.ID, "simulateprep"); 
    perpbutton.click(); 
    if len(settings.keys()) > 0: 
      for key, val in settings.items():
        simparm = self.driver.find_element(By.ID, key); 
        simparm.clear(); 
        simparm.send_keys(val); 
        time.sleep(0.1)
        
  def SubmitPrepareForm(self): 
    self.__check(); 
    submit_btn = self.driver.find_elements(By.CLASS_NAME, "ui-button"); 
    
    for idx, btn in enumerate(submit_btn): 
      if btn.text == "Submit CAMPARI job":
        print(f"{self.JOBID}: submiting the preparation job"); 
        btn.click(); 
        break
      else: 
        continue
      if idx == len(submit_btn)-1:
        print("Not found CAMPARI job submission button")

  def UploadMol2(self, mol2token): 
    self.__check(); 
    if os.path.isfile(mol2token):
      session_prep.SubmitMOL2(mol2token, self.JOBID, mode="file")
    elif "@<TRIPOS>MOLECULE" in mol2token: 
      session_prep.SubmitMOL2(mol2token, self.JOBID, mode="str")
    else: 
      print("Fatal: Not found a valid mol2 file nor a valid mol2 string;")
      
  def ClickByID(self, ID):
    item = self.driver.find_element(By.ID, ID); 
    try:
      item.click(); 
    except: 
      ActionChains(self.driver).move_to_element(item).click().perform(); 
    time.sleep(1); 
    
  def OpenMDForm(self):
    self.__clickSimulateDropdown(); 
    perpbutton = self.driver.find_element(By.ID, "simulatemd"); 
    perpbutton.click(); 
    time.sleep(0.1); 
    
  def SetMDForm(self, settings={}):
    if len(settings.keys()) > 0: 
      for key, val in settings.items():
        if key in [
          'simTimestep', 'simNrSteps', 'simTrajOutInterval', 'sim_prodensemble', 
          'sim_prodengine', 'sim_prodpress', 'sim_prodtemp', 'simNrCopy', 'simOutFormat', 'simOutputGroup',
          "sim_batchselection"
                  ]:
          self.ClickByID("SMDSet4"); 
          time.sleep(0.1)
        elif key == "equilnr":
          self.ClickByID("SMDSet3"); 
          time.sleep(0.1)
          self.ClickByID("sim_removeequil"); 
          for i in range(int(val)):
            self.ClickByID("sim_addequil"); 
          continue
        elif re.match(r"equil[0-9]_", key) or key in [""]:
          self.ClickByID("SMDSet3"); 
          time.sleep(0.1)
        
        item = self.driver.find_element(By.ID, key); 
        if item.tag_name == "select":
          options = item.find_elements(By.TAG_NAME, "option")
          for i in options: 
            if i.get_property("value") == val:
              item.click();
              time.sleep(0.1); 
              i.click();
              break; 
            if i.get_property("value") == options[-1].get_property("value"):
              print(f"Not found a valid choice for {key} valued {val}")

        elif item.tag_name == "input" and item.find_elements(By.XPATH, ".[@type='checkbox']"):
          print(f"As checkbox ({item.text})")
          if int(val) > 0: 
            item.click(); 
        elif item.tag_name == "input" and not item.find_elements(By.XPATH, ".[@type='checkbox']"):
          item.clear(); 
          item.send_keys(val); 
        time.sleep(0.1)
        
  def SubmitMDForm(self):
    self.__check(); 
    submit_btn = self.driver.find_elements(By.CLASS_NAME, "ui-button"); 
    
    for idx, btn in enumerate(submit_btn): 
      if btn.text == "Submit simulation job":
        print(f"{self.JOBID}: submiting the MD simulation job"); 
        btn.click(); 
        break
      else: 
        continue
      if idx == len(submit_btn)-1:
        print("Not found <Submit simulation job> submission button")
      
  def OpenSeedForm(self, refset, settings={}):
    self.SelectResidue(refset); 
    self.ClickByID("dockmenubutton"); 
    time.sleep(0.1); 
    self.ClickByID("dockseed"); 
    time.sleep(0.1); 
    if len(settings.keys()) > 0: 
      for key, val in settings.items():
        parm_input = self.driver.find_element(By.ID, key); 
        parm_input.clear(); 
        parm_input.send_keys(val); 
        time.sleep(0.1)
    
  def SubmitSeedForm(self):
    pass

PDBBind_path = "/home/yzhang/Downloads/refined-set"
PDBBind_path = "/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined"

pdbcode = "1Q65"; 
sess_id = "thistest"; 
acgui_url = f"http://130.60.168.149/ACyang/search2.html?session_id={sess_id}"
# acgui_url = f"http://130.60.168.149/ACdev/search2.html"
# mol2file = f"{PDBBind_path}/{pdbcode.lower}/{pdbcode}_ligand.mol2"; 

testcase = ACGUI_KIT(acgui_url); 
#################Submit Preparation
# parms = {
#   "MAXLOOPL" : 0, 
#   "NRSTEPS" : 500, 
#   "MINI_MODE" : 1,
#   "PDB_TOLERANCE_A" : 50, 
#   "PDB_TOLERANCE_B" : "0.5 1.5"
# }
# testcase.Fetch(pdbcode, lig="auto"); 
# testcase.OpenPrepareForm(settings=parms); 
# testcase.SubmitPrepareForm();  

# testcase.UploadMol2(mol2file); 

################Submit MD
simsettings = {
  "sim_batchselection": 0, 
  
  "simTimestep": 2, 
  "simNrSteps" : 10000, 
  "simTrajOutInterval":5000, 
  "sim_prodensemble": "npt",
  "sim_prodengine": "gmxmd", 
  "sim_prodpress": 1,
  "sim_prodtemp" : 300,
  "simNrCopy": 4, 
  
  "simOutFormat": "netcdf", 
  "simOutputGroup": "all", 
  
  "equilnr":5,
  "equil0_nrsteps": 4000, 
  'equil1_ensemble': 'nvt',
  'equil1_temp': 370,
  'equil1_timestep': 0.1,
  'equil1_nrsteps': 8000,
  'equil1_tctime': 0.0005,
  'equil2_ensemble': 'nvt',
  'equil2_temp': 345,
  'equil2_timestep': 0.5,
  'equil2_nrsteps': 8000,
  'equil2_tctime': 0.0025,
  'equil3_ensemble': 'nvt',
  'equil3_temp': 300,
  'equil3_timestep': 0.75,
  'equil3_nrsteps': 4000,
  'equil3_tctime': 0.01,
  'equil4_ensemble': 'npt',
  'equil4_temp': 298,
  'equil4_timestep': 1,
  'equil4_nrsteps': 3000,
  'equil4_tctime': 0.05,
  'equil5_ensemble': 'npt',
  'equil5_temp': 298,
  'equil5_timestep': 2,
  'equil5_nrsteps': 2000,
  'equil5_tctime': 0.1,
}



print(list(simsettings.keys()))

testcase.OpenMDForm()
testcase.SetMDForm(simsettings)
testcase.SubmitMDForm()

#################Submit Docking
# POCKET = ["VAL 74", "THR 91"]; 
# seed_parms = {
#   "OPMODE" : "d",           # Docking mode 
#   "VECCHOICE" : "b",        # Vector type 
#   "ENTOTFILTER" : "-14",    # Energy filter 
#   "PROTDIEL" : "2",         # Dielectric constant 
#   "NRMOLPO" : "1",          # Top N poses per mol 
#   "NRCLUPO" : "0",          # Top N poses per cluster 
#   "SEEDSD_CHOICE" : "n",    # Whether or not to play steepest descent; 
#   "SEEDVF_MODE" : 1,        # Post run filtering; OFF 0, OR 1, AND 2; 
#   "SEEDVF_TOT" : "0.1",
#   "SEEDVF_ELE" : "0.1",
#   "SEEDVF_VDW" : "0.1",
#   "SEEDVF_TOTE" : "0.1",
#   "SEEDVF_ELEE" : "0.1",
#   "SEEDVF_VDWE" : "0.1",
#   "SEEDVF_RDES" : "0.1",
#   "SEEDVF_FDES" : "0.1",
#   "PSQLPDBCODE" : "5UUK",   # PDB code; 
#   "CONFO_RMS1" : 0.5,       # RMSD-like similarity shreshold; 
#   "CONFO_RMS2" : 0.05,      # Non-RMSD-like similarity shreshold; 
#   "CONFO_ATTS" : 10,        # Embeding attempts; 
#   "LOOKUP_DB" : "zinc_2020",   # Source database; 
# }

# testcase.OpenSeedForm(POCKET, settings=seed_parms); 




['sim_batchselection', 'simTimestep', 'simNrSteps', 'simTrajOutInterval', 'sim_prodensemble', 'sim_prodengine', 'sim_prodpress', 'sim_prodtemp', 'simNrCopy', 'simOutFormat', 'simOutputGroup', 'equilnr', 'equil0_nrsteps', 'equil1_ensemble', 'equil1_temp', 'equil1_timestep', 'equil1_nrsteps', 'equil1_tctime', 'equil2_ensemble', 'equil2_temp', 'equil2_timestep', 'equil2_nrsteps', 'equil2_tctime', 'equil3_ensemble', 'equil3_temp', 'equil3_timestep', 'equil3_nrsteps', 'equil3_tctime', 'equil4_ensemble', 'equil4_temp', 'equil4_timestep', 'equil4_nrsteps', 'equil4_tctime', 'equil5_ensemble', 'equil5_temp', 'equil5_timestep', 'equil5_nrsteps', 'equil5_tctime']
As checkbox ()
thistest: submiting the MD simulation job


1000.0

In [None]:
sess = ['1SWR','1Q65','2D3Z:A','2FLR','3D0B','2QE4',
        '2J7E:A','1K21','1K22','1P57','1E2K','3F8C',
        '3BGZ','1H1S','1ZGI','1M48','1N46','1MU6',
        '2ZB1','1O5E','1LKE','2V95','2J7G','1E2L',
        '1TA6']
# sess = [i.lower() for i in sess]
print(sess)

solved = ["1SWR", "2QE4", "2FLR", "3D0B", "2ZB1",
          "1M48", "3BGZ", "1E2K", "3F8C", "2V95",
          "1E2L", "2D3Z",  
         ]
Failed = ["C4001P57", "C4001K21", "C4001Q65", "C4001K22", "C4001O5E", 
          "C4001TA6", 
         ]

for pdb in sess: 
  if "1O5E" not in pdb:
    continue
  pdbcode = pdb; # .lower(); 
  PDBCODE = pdbcode.split(":")[0].upper()
  if PDBCODE in solved: 
    continue
  sess_id = f"C400{PDBCODE}"; 
  acgui_url = f"http://130.60.168.149/ACyang/search2.html?session_id={sess_id}"
  mol2file = f"{PDBBind_path}/{pdbcode}/{pdbcode}_ligand.mol2"; 

  testcase = ACGUI_KIT(acgui_url); 
  try: 
    testcase.Fetch(pdbcode, lig="auto"); 
    testcase.OpenPrepareForm(settings=parms); 
    testcase.SubmitPrepareForm();  
  except: 
    print(f"Skipping the operation on {sess_id}")


In [239]:
atoms[traj.top.select("@?H=")]
try:
  print(residues[1].__getattribute__("chainid"))
except: 
  print("no")
try: 
  print(atoms.__getattribute__("chainid"))
except:
  print("no")

no
no


In [6]:
from pdbfixer import PDBFixer; 
from openmm.app import PDBFile; 
import os.path, re; 
import pytraj as pt 
import numpy as np 
from scipy.spatial import distance_matrix
import tempfile

# PDBFixer("4kng")
pdbcode = "1O5E"
pdbfile = f"{PDBBind_path}/{pdbcode.lower()}/{pdbcode.lower()}_protein.pdb"; 
print(pdbfile)



proteinResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL']
if os.path.isfile(pdbfile):
  with tempfile.NamedTemporaryFile(suffix=".pdb") as file1: 
    with open(pdbfile, "r") as file2: 
      retained_lines = [i for i in file2.read().strip("\n").split("\n") if ("ATOM" in i or "HETATM" in i) and i[26].strip() == ""]
    with open(file1.name, "w") as file2: 
      file2.write("\n".join(retained_lines)); 
    fixer = PDBFixer(filename=file1.name); 
    fixer.findNonstandardResidues()
    fixer.replaceNonstandardResidues()
    fixer.removeHeterogens(False)
    fixer.addMissingHydrogens(7.0)
  
  with tempfile.NamedTemporaryFile(suffix=".pdb") as file1: 
    PDBFile.writeFile(fixer.topology, fixer.positions, open(file1.name, 'w'))
    traj = pt.load(file1.name); 
    
    with open(pdbfile, "r") as file2: 
      pdblines = file2.read().strip("\n").split("\n"); 
      remarks = [i for i in pdblines if "SEQRES" in i or "SSBOND" in i or "REMARK" in i]; 
      remark_str = "\n".join(remarks); 
    with open(file1.name, "r") as file2: 
      pdblines = file2.read().strip("\n").split("\n"); 
      chainids = [i[17:26] for i in pdblines if "ATOM" in i or "HETATM" in i]; 
#       print(chainids) 
      c = 0; 
      processed = []; 
      chainindexes = []; 
      for rec in chainids: 
        if rec not in processed:
          processed.append(rec); 
#           print(rec[4])
          if len(rec[4].strip()) == 1:
            chainindexes.append(rec[4].strip())
          c += 1; 
      print(len(chainindexes), " Residues found", chainindexes)
      chainids = chainindexes; 
#       chainids = list(np.unique(chainids))
#       print(chainids)
#       print(remark_str)
  
    # After the fix of the PDBfile, find clashes
    traj.top.set_reference(traj[0]);
    atoms = np.array([i for i in traj.top.atoms]); 
    residues = np.array([i for i in traj.top.residues]); 
  #   print(len(residues))
  #   for i, j in enumerate(residues): 
  #     print(i+1,j)
    c_conflicts = 0; 
    excludes = ["N-C", "C-N", "SG-SG"]; 
    atoms_to_remove = []; 
    for res in traj.top.residues: 
      resmask = "@"+",".join([str(i) for i in range(res.first+1, res.last+1)])
      # idx1 = traj.top.select(f"{resmask}&!@H=,?H=")
      idx1 = traj.top.select(f"{resmask}&!@H=,?H=")
      mtx1 = traj.xyz[0][idx1]; 

      # closest = traj.top.select(f"{resmask}<:6&!{resmask}&!@H=,?H=")
      closest = traj.top.select(f"{resmask}<:6&!{resmask}")
      mtx2 = traj.xyz[0][closest]; 
      dist_mtx = distance_matrix(mtx1, mtx2); 
      len_check = (dist_mtx < 1.6); 

      if np.count_nonzero(len_check) > 0:
        # print(f"Residue {res.name} {res.index} identified {np.count_nonzero(len_check)} conflicts")
        group1 = idx1[np.where(len_check)[0]]
        group2 = closest[np.where(len_check)[1]]
        partners = [f"{atoms[i].name}-{atoms[j].name}" for i, j in zip(group1, group2)]; 
        distinct_partners = [i for i in partners if i not in excludes]; 
        if len(distinct_partners) > 0: 
          for i, j in zip(group1, group2): 
            if f"{atoms[i].name}-{atoms[j].name}" not in excludes: 
              if residues[atoms[i].resid].name in proteinResidues: 
                lst1 = traj.top.select(f":{atoms[i].resid+1}&!@C,H,N,O,CA,HA"); 
              else: 
                lst1 = traj.top.select(f":{atoms[i].resid+1}"); 
              if residues[atoms[j].resid].name in proteinResidues: 
                lst2 = traj.top.select(f":{atoms[j].resid+1}&!@C,H,N,O,CA,HA"); 
              else: 
                lst2 = traj.top.select(f":{atoms[j].resid+1}"); 
              tmplst = list(lst1) + list(lst2); 
              atoms_to_remove += tmplst
              c_conflicts += 1
    atoms_to_remove = list(set(atoms_to_remove)); 
    atoms_to_remove.sort(); 

    if (len(atoms_to_remove)/len(atoms)) > 0.1: 
      print(f"Warning: The atoms to remove ({len(atoms_to_remove)}) is over 10% of the total atom number {len(atoms_to_remove)/len(atoms)}"); 
      print("Please be VERY CAUTIOUS about the structure or adjust the threshold."); 
    elif len(atoms_to_remove) > 100: 
      print(f"Warning: The atoms to remove ({len(atoms_to_remove)}) is over 100"); 
      print("Please be VERY CAUTIOUS about the structure or adjust the threshold."); 
    else: 
      print(f"Atoms to remove: {len(atoms_to_remove)}"); 

    ofile = f"{PDBBind_path}/{pdbcode.lower()}/{pdbcode.lower()}_processed.pdb";
  #   pt.save(ofile, traj, overwrite=True)

    with open(file1.name, "r") as file2: 
      finalstr = f""; 
      c = 0; 
      pdblines = file2.read().strip("\n").split("\n")
      for line in pdblines: 
        if re.match("^ATOM|^HETATM", line):
          if c in atoms_to_remove: 
            c += 1  
            continue
          else: 
            finalstr += f"{line}\n"
            c += 1  
        else: 
          finalstr += f"{line}\n"
  #   finalstr = f"{remark_str}\n"; 
  #   for idx, atom in enumerate(atoms): 
  #     if idx not in atoms_to_remove: 
  #       point = traj.xyz[0][idx]; 
  #       tmpstr = "".join([f"{i:>8.3f}" for i in point]); 
  #       print(atom.chain, atom.resname)
  #       thisline = f"ATOM  {idx+1:>5} {atom.name:^4} {residues[atom.resid].name} {chainids[atom.resid]}{atom.resid+1:>4d}    {tmpstr}{np.round(1.0,2):>6}{round(0.0,2):>6}\n"
  #       finalstr += thisline; 
  #   print(finalstr)
    with open(ofile, "w") as file1: 
      file1.write(finalstr+"END\n")
    print(f"Conflict identified: {c_conflicts}")
    print(dir(fixer))
  
else:
  print("No")

# for i in atoms:
#   print(i)

/home/miemie/Dropbox/PhD/project_MD_ML/PDBbind_v2020_refined/1o5e/1o5e_protein.pdb
342  Residues found ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', '

In [246]:
x=[i for i in traj.top.mols]
print(dir(x[0]))
print(x[1].n_atoms)

['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', 'is_solvent', 'n_atoms', 'set_no_solvent', 'set_solvent']
1277
