In [2]:
import os,sys
sys.path.append('./misc/lib/python3.7/site-packages')

import numpy as np
import requests
import nglview as nv
import ipywidgets as widgets
import matplotlib.pyplot as plt
from IPython.display import display
import parmed as pmd
import re

import hublib.use
from hublib.ui import FileUpload, Download
from hublib.cmd import runCommand

%use gromacs-5.1.4

<IPython.core.display.Javascript object>

In [3]:

def fix_missing_atoms(mdstruc, resnumber):
    for r in mdstruc.residues:
        if(r.number==resnumber):
            foundC = False
            foundO = False
            foundN = False
            foundCA = False
            for at in r:    
                if at.name=='C':
                    foundC = True
                elif at.name=='O':
                    foundO = True
                elif at.name=='N':
                    foundN = True
                elif at.name=='CA':
                    foundCA = True
                
            if foundC * foundN * foundO * foundCA:
                print('Replacing with GLY')
                mdstruc.strip('(:'+str(r.idx+1)+')&!(@C,CA,N,O)')
                mdstruc.residues[r.idx].name = 'GLY'
            else:
                print('Eliminating from the structure.')
                mdstruc.strip(':'+str(resnumber))
            break

def run_pdb2gmx(mdstruc, fname, lastPass):
    
    bondmax = 1.5*2
    
    success = True
    MissingAtomRegEx = re.compile('.*Residue(.*)named(.*)of a molecule in the input file was mapped to an entry in the topology database, but the atom(.*)used in that entry is not found in the input file')
    ResMisMatchRegEx = re.compile('.*Atom(.*)in residue[ ]+([a-zA-Z]+)[ ]+([0-9]+)[ ]+was not found in rtp entry(.*)with(.*)atoms while sorting atoms')
    ResNotFoundRegEx = re.compile('.*Residue(.*)not found in residue topology database')
    #out=!{"cd gmx; cp gmx/input.pdb.19 gmx/input.pdb; export GMX_MAXBACKUP=-1; gmx pdb2gmx -f input.pdb -o input.gro -ignh -water spce <<EOF\n8\nEOF"}
    out=!{"cd gmx; rm input.gro; export GMX_MAXBACKUP=-1; gmx pdb2gmx -f "+fname+" -o input.gro -ignh -water spce <<EOF\n8\nEOF"}
    msg = ''
    
    madeTop = False
    MadeTopRegEx = re.compile('You have successfully generated a topology')
    for line in out:
        if re.match(MadeTopRegEx, line):
            madeTop = True
    
    WarnNonSeqChain = re.compile('.*Chain identifier \'(.*)\' is used in two non-sequential blocks.*')
    WarnOccup = re.compile('.*there were (.*) atoms with zero occupancy and (.*) atoms.*')
    #WarnLongBond = re.compile('.*Long bond ((.*)-(.*) = (.*) nm).*')
    WarnLongBond = re.compile('.*Long Bond.*\(([0-9]+)-([0-9]+).*\).*')
    WarnAtomMissing = re.compile('.*atom (.*) is missing in residue ([a-zA-Z]+)[ ]+([0-9]+) in the pdb file.*')

    
    for line in out:
        if(success and re.search('warning', line, flags=re.IGNORECASE)):
            if re.match(WarnAtomMissing, line):
                hit = re.match(WarnAtomMissing, line)
                resname = hit.group(2).strip()
                resnumber = int(hit.group(3).strip())
                print('Atoms were missing from residue '+resname+ ' ' + str(resnumber))
                fix_missing_atoms(mdstruc, resnumber)
                mdstruc.write_pdb('gmx/'+fname)
                success = False
            elif re.match(WarnLongBond, line):
                if madeTop:
                    hit = re.match(WarnLongBond, line)
                    at1 = int(hit.group(1))
                    at2 = int(hit.group(2))
                    
                    grostruc = pmd.load_file('gmx/input.gro')
                    x1 = grostruc.coordinates[at1-1]
                    x2 = grostruc.coordinates[at2-1]
                    dist = np.linalg.norm(x1 - x2)
                    
                    # If this is an amide bond, and the distance is unreasonably large, insert a 
                    # chain break between the residues.
                    if (dist>bondmax) and (grostruc[at1-1].name=='C') and (grostruc[at1-1].name=='N'):
                        res1 = grostruc[at1-1].residue.number
                        res2 = grostruc[at2-1].residue.number
                        print('Bond between atoms ' + str(at1) + ' and ' + str(at2) + ' was too long (' + str(dist) +').')
                        print('Identified atoms as belonging to residues ' + str(res1) + ' and ' + str(res2))
                        print('Inserting chain break between them.')
                        if res1<res2:
                            grostruc[at1-1].residue.ter = True
                        else:
                            grostruc[at2-1].residue.ter = True
                        grostruc.write_pdb('gmx/'+fname)
                        success = False
            elif re.match(WarnNonSeqChain, line):
                if lastPass:
                    print('Non-sequential chain warning')
            elif re.match(WarnOccup, line):
                if lastPass:
                    print('Occupancy warning')
            else:
                if lastPass:
                    print(line)
                    print('We are here')
    
    if success: 
        for line in out:
            
            if re.search('error', line, flags=re.IGNORECASE) and (len(msg)==0):
                success = False
                msg = line
                
            elif len(msg)>0:
                if len(line.strip())>0:
                    msg = msg + line + " "
                else:
                    if re.match(MissingAtomRegEx, msg):
                        hit = re.match(MissingAtomRegEx, msg)
                        resnumber = int(hit.group(1))
                        resname = hit.group(2).strip()
                        print('Atoms were missing from residue '+resname+ ' ' + str(resnumber))
                        fix_missing_atoms(mdstruc, resnumber)
                        mdstruc.write_pdb('gmx/'+fname)

                    elif re.match(ResNotFoundRegEx, msg):
                        hit = re.match(ResNotFoundRegEx, msg)
                        resname = hit.group(1).strip().strip('\'')
                        print('Residue '+resname+ ' not available in the force field database.')
                        print('Eliminating from the structure.')
                        mdstruc.strip(':'+resname)
                        mdstruc.write_pdb('gmx/'+fname)

                    elif re.match(ResMisMatchRegEx, msg):
                        hit = re.match(ResMisMatchRegEx, msg)
                        resname = hit.group(2).strip()
                        resnum = int(hit.group(3).strip())
                        print('Residue ' +resname+ ' ' + str(resnum) + ' did not match the force field database entry.')
                        print('Eliminating from the structure.')
                        mdstruc.strip(":"+str(resnum))
                        mdstruc.write_pdb('gmx/'+fname)
                    else:
                        if lastPass:
                            print(msg)

                    msg = ''
    return success
    
def mdgo_onclick(b):
    
    
    chainList = list()
    for r in struc.residues:
        if chainList.count(r.chain)==0:
            chainList.extend(r.chain)
            
    # First identify which chains should be displayed
    chainList = []
    # Loop through chain-selection check-boxes
    for cb in accordion.children[0].children[1:]:
        if cb.value==True:
            chainList.extend(cb.description)
    
    failed = False
    for chain in chainList:
        fname = 'input_'+ chain + '.pdb'
        struc.write_pdb('gmx/'+fname)
        mdstruc = pmd.load_file('gmx/'+fname)
        print("")
        print("********************************************")
        print('Preparing chain ' + str(chain) + ' for MD...')
        print("")
        mdstruc[chain,:,:].write_pdb('gmx/'+fname)
        maxTries = 100
        tries = 0
        tryAgain = True
        while tryAgain:
            tries += 1
            mdstruc = pmd.load_file('gmx/'+fname)
            mdstruc.write_pdb('gmx/' + fname + '.' + str(tries))
            if(run_pdb2gmx(mdstruc, fname, False)):
                tryAgain = False
                run_pdb2gmx(mdstruc, fname, True)
                print('Success!')
                
            elif tries >= maxTries:
                tryAgain = False
                print('Failed to generate gmx input in ' + str(maxTries) + ' attempts')
                failed = True
        if failed:
            break


In [4]:

class ExcStruc:
    def __init__(self):
        self.nres = 0
        self.resnums = []
        self.restypes = []
        self.chains = []
        self.ham = []
        self.dips = []
        self.cents = []
            
    def reset(self):
        self.nres = 0
        self.resnums = []
        self.restypes = []
        self.chains = []
        self.ham = []
        self.dips = []
        self.cents = []

class NGLRepList:
    def __init__(self):
        self.component_id = ''
        self.params = []
        self.names = []
        self.reps = list()
        
    def reset(self):
        self.component_id = ''
        self.params = []
        self.names = []
        self.reps = list()
        
    def append(self, nrep):
        self.names.append(nrep.name)
        ptext = {"type": nrep.type, "params": {"color": nrep.color, "sele": nrep.selection, "opacity": str(nrep.opacity)}}
        self.params.append(ptext)
        self.reps.append(nrep)
        
class NGLRep:
    def __init__(self, name, rtype, sel, col, opac):
        self.name = name
        self.type = rtype
        self.selection = sel
        self.color = col
        self.opacity = opac

def check_atoms(RefAtNames, QuerAtNames):
    ismatch = True
    for atnm in RefAtNames:
        if(QuerAtNames.count(atnm)!=1):
            ismatch = False
    return ismatch

def find_porph(pmdstruc, xtruc):
    PORatList = list(['NA', 'NB', 'NC', 'ND'])
    for n in range(0, len(pmdstruc.residues)):
        r = pmdstruc.residues[n]
        nmlist = list()
        for at in r:
            nmlist.append(at.name)
        if(check_atoms(PORatList, nmlist)):
            if(nmlist.count('MG')==1):
                xstruc.resnums.append(n)
                xstruc.nres += 1 
                xstruc.restypes.append('CHL')
                
            else:
                xstruc.resnums.append(n)
                xstruc.nres += 1 
                xstruc.restypes.append('PHO')
            xstruc.chains.append(r.chain)

    return


def find_dipoles(struc,xtruc):
    ResNums = xtruc.resnums
    DipMat = np.zeros((len(ResNums),3))
    for n in range(0, len(ResNums)):
        r = struc.residues[ResNums[n]]
        for at in r:
            if at.name=='NB':
                NB = struc.coordinates[at.idx]
            if at.name=='ND':
                ND = struc.coordinates[at.idx]
        DipMat[n,:] = NB - ND
    xtruc.dips = DipMat

def find_centers(struc, xtruc):
    ResNums = xtruc.resnums
    CentMat = np.zeros((len(ResNums),3))
    for n in range(0, len(ResNums)):
        r = struc.residues[ResNums[n]]
        cent = 0.0
        for at in r:
            if at.name=='NB':
                cent += 0.5*struc.coordinates[at.idx]
            if at.name=='ND':
                cent += 0.5*struc.coordinates[at.idx]
        CentMat[n,:] = cent
    xtruc.cents = CentMat

def sync_widgets_to_rep(rep):
    seldrop.value = rep.name
    styledrop.value = rep.type
    colordrop.value = rep.color
    opacslide.value = int(rep.opacity*100)
    
def sync_rep_to_widgets():
    global repList
    num = repList.names.index(seldrop.value)
    rep = repList.reps[num]
    rep.type = styledrop.value
    rep.name = seldrop.value
    rep.color = colordrop.value
    rep.opacity = opacslide.value*0.01
    ptext = {"type": rep.type, "params": {"color": rep.color, "sele": rep.selection, "opacity": str(rep.opacity)}}
    repList.params[num] = ptext
    pdbview.set_representations(repList.params)
    

def std_rep(nglview, struc, xstruc):
    global repList
    pdbview.add_trajectory(struc)
    pdbview.clear(0)
    chltxt = ''
    photxt = ''
    for n in range(0, len(xstruc.resnums)):
        if xstruc.restypes[n]=='CHL':
            if len(chltxt)>0:
                chltxt += ' or '
            chltxt += str(xstruc.resnums[n]+1)
        if xstruc.restypes[n]=='PHO':
            if len(photxt)>0:
                photxt += ' or '
            photxt += str(xstruc.resnums[n]+1)
            
    chainList = list()
    chainString = ''
    for r in struc.residues:
        if chainList.count(r.chain)==0:
            chainList.extend(r.chain)
            if len(chainString)>0:
                chainString += " OR "
            chainString += ":" + r.chain
            
    
    repList.append(NGLRep("Protein", "cartoon", "(protein)" + "AND (" + chainString + ")", "grey", 0.2))
    if len(chltxt)>0:
        repList.append(NGLRep("Chlorophyll", "licorice", "(" + chltxt + ")" + "AND (" + chainString + ")", "green", 1.0))
    if len(photxt)>0:
        repList.append(NGLRep("Pheophytin", "licorice", "(" + photxt + ")" + "AND (" + chainString + ")", "blue", 1.0))
    
    pdbview.set_representations(repList.params)
    
    seldrop.options=repList.names
    seldrop.disabled=False
    
    styledrop.disabled=False
    
    colordrop.disabled=False
    opacslide.disabled=False
    sync_widgets_to_rep(repList.reps[0])
    
    for n in range(0, np.shape(xstruc.dips)[0]):
        cent = xstruc.cents[n,:]
        dip = xstruc.dips[n,:]
        v1 = cent - 1.5*dip
        v2 = cent + 1.5*dip
        pdbview.shape.add_arrow(v1.tolist(), v2.tolist(), [1,0,0 ], 1.0 )
        
        
pdbid = widgets.Text(
    value='3EOJ',
    placeholder='Type something',
    description='PDB ID:',
    layout = widgets.Layout(width='4cm'),
    disabled=False
)

pdbgo = widgets.Button(
    description='Display',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to display the pdb file',
    icon='' # (FontAwesome names without the `fa-` prefix)
)

pdboutput = widgets.HTML(
    value="",
    placeholder='',
    description='',
)

Output = widgets.Output()


mdgo = widgets.Button(
    description='Prepare MD',
    disabled=True,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to prepare structure for molecular dynamics (MD)',
    icon='' # (FontAwesome names without the `fa-` prefix)
)


def clear_stage(view, xstruc):
    
    for n in range(xstruc.nres, 0, -1):
        view._remote_call('removeComponent',
        target='Stage',
        args=[n,])
    
    view._clear_component_auto_completion()
    if view._trajlist:
        for traj in view._trajlist:
            view._trajlist.remove(traj)
    for component_id in view._ngl_component_ids:
        component_index = view._ngl_component_ids.index(component_id)
        view._ngl_component_ids.remove(component_id)
        view._ngl_component_names.pop(component_index)
        view._remote_call('removeComponent',
            target='Stage',
            args=[component_index,])
    view._update_component_auto_completion()


def pdbgo_onclick(b):
    global struc
    global estruc
    url = 'http://files.rcsb.org/download/'+pdbid.value+'.pdb'
    r = requests.get(url, allow_redirects=True)
    if(r.status_code==200):
        fname = 'pdb/' + pdbid.value + '.pdb'
        wfd = open(fname, 'wb')
        wfd.write(r.content)
        wfd.close()
        pdboutput.value = ''
        
        clear_stage(pdbview, xstruc)
        
        accordion.children = []
        repList.reset()
        xstruc.reset()
        
        struc = pmd.load_file(fname)
        find_porph(struc, xstruc)
        find_dipoles(struc, xstruc)
        find_centers(struc, xstruc)
        std_rep(pdbview, struc, xstruc)
        mdgo.disabled=False
        chainList = list()
        for r in struc.residues:
            if chainList.count(r.chain)==0:
                chainList.extend(r.chain)
                
        widgList = [widgets.HBox([widgets.Button(description='All',tooltip='Click to select all chains', layout = widgets.Layout(width='1.5cm')),
                    widgets.Button(description='None',tooltip='Click to deselect all chains',layout = widgets.Layout(width='1.5cm'),)])]
        
        widgList[0].children[0].on_click(select_all_chains)
        widgList[0].children[1].on_click(deselect_all_chains)
        
        for chain in chainList:
            widgList.append(widgets.Checkbox(description=chain, value=True,indent=False))
            widgList[-1].observe(chain_box_on_change)
        accordion.children = [widgets.VBox(widgList)]

    else:
        pdboutput.value = 'Please enter a valide PDB ID code.'

def select_all_chains(b):
    for cb in accordion.children[0].children[1:]:
        cb.value = True
        
def deselect_all_chains(b):
    for cb in accordion.children[0].children[1:]:
        cb.value = False
        
        
def chain_box_on_change(b):
    global repList
    if b['type'] == 'change' and b['name'] == 'value':
        
        # First identify which chains should be displayed
        chainString = ''
        chainList = []
        # Loop through chain-selection check-boxes
        for cb in accordion.children[0].children[1:]:
            if cb.value==True:
                chainList.extend(cb.description)
                if len(chainString)>0:
                    chainString += " OR "
                chainString += ":" + cb.description
                
        # First update dipoles
        for n in range(xstruc.nres, 0, -1):
            pdbview._remote_call('removeComponent',
                target='Stage',
                args=[n,])
            
        for n in range(0, np.shape(xstruc.dips)[0]):
            if chainList.count(xstruc.chains[n]):
                cent = xstruc.cents[n,:]
                dip = xstruc.dips[n,:]
                v1 = cent - 1.5*dip
                v2 = cent + 1.5*dip
                pdbview.shape.add_arrow(v1.tolist(), v2.tolist(), [1,0,0 ], 1.0 )
        
        

            
        for num in range(0, len(repList.reps)):
            rep = repList.reps[num]
            selList = rep.selection.split('AND')
            selText = ''
            for item in selList:
                if item.find(":")==-1:
                    if len(selText)>0:
                        selText += " AND "
                    selText += item.strip()
            
            if len(chainString)>0:
                selText += " AND (" + chainString + ")"
            else:
                # We assume no structure has chain ID XXXX...
                selText += " AND (:XXXX)"
            rep.selection = selText
            ptext = {"type": rep.type, "params": {"color": rep.color, "sele": rep.selection, "opacity": str(rep.opacity)}}
            repList.params[num] = ptext
            pdbview.set_representations(repList.params)
            
    
def seldrop_on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        rep = repList.reps[repList.names.index(seldrop.value)]
        sync_widgets_to_rep(rep)
        
def styledrop_on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        num = repList.names.index(seldrop.value)
        rep = repList.reps[num]
        rep.type = styledrop.value
        ptext = {"type": rep.type, "params": {"color": rep.color, "sele": rep.selection, "opacity": str(rep.opacity)}}
        repList.params[num] = ptext
        pdbview.set_representations(repList.params)
        
def colordrop_on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        num = repList.names.index(seldrop.value)
        rep = repList.reps[num]
        rep.color = colordrop.value
        ptext = {"type": rep.type, "params": {"color": rep.color, "sele": rep.selection, "opacity": str(rep.opacity)}}
        repList.params[num] = ptext
        pdbview.set_representations(repList.params)
        
def opacslide_on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
        num = repList.names.index(seldrop.value)
        rep = repList.reps[num]
        rep.opacity = opacslide.value*0.01
        ptext = {"type": rep.type, "params": {"color": rep.color, "sele": rep.selection, "opacity": str(rep.opacity)}}
        repList.params[num] = ptext
        pdbview.set_representations(repList.params)

        
        
xstruc = ExcStruc()
pdbgo.on_click(pdbgo_onclick)
pdbid.on_submit(pdbgo_onclick)
pdbview = nv.NGLWidget()
pdbview._set_size('500px', '500px')
pdbbox = widgets.HBox([pdbid, pdbgo])
repList = NGLRepList()

seldrop = widgets.Dropdown(
    options=repList.names,
    #value='Protein',
    description='Selection:',
    disabled=True,
)

styledrop = widgets.Dropdown(
    options=['cartoon', 'licorice', 'spacefill'],
    description='Style:',
    disabled=True,
)

colordrop = widgets.Dropdown(
    options=['chain', 'red', 'green', 'blue', 'grey'],
    description='Color:',
    disabled=True,
)

opacslide = widgets.IntSlider(
    min=0,
    max=100,
    step=1,
    description='Opacity:',
    disabled=True,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

from ipywidgets import Layout
accordion = widgets.Accordion(children=[])
accordion.set_title(0, 'Chain Selection')

dispbox = widgets.VBox([seldrop, styledrop, colordrop, opacslide, accordion, mdgo])
viewbox = widgets.HBox([pdbview, dispbox])

seldrop.observe(seldrop_on_change)
styledrop.observe(styledrop_on_change)
colordrop.observe(colordrop_on_change)
opacslide.observe(opacslide_on_change)

display(pdbbox)
display(pdboutput)
display(viewbox)
struc = pmd.structure
        
mdgo.on_click(mdgo_onclick)


HBox(children=(Text(value='3EOJ', description='PDB ID:', layout=Layout(width='4cm'), placeholder='Type somethi…

HTML(value='', placeholder='')

HBox(children=(NGLWidget(), VBox(children=(Dropdown(description='Selection:', disabled=True, options=(), value…