In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import os,sys
sys.path.append('./misc/lib/python3.7/site-packages')

import math
import numpy as np
import requests
import nglview as nv
import ipywidgets as widgets
%matplotlib notebook
import matplotlib.pyplot as plt
from IPython.display import display, display_markdown
from ipywidgets import Layout, HTML
from pathlib import Path

import parmed as pmd
import re

import hublib.use
from hublib.ui import FileUpload, Download
from hublib.cmd import runCommand

from scipy.ndimage import gaussian_filter

from scipy import spatial

import time

%matplotlib notebook
%use gromacs-2018.4
%use pymol-1.8.4

np.set_printoptions(precision=8)
np.set_printoptions(suppress=True)


HTMLButtonPrompt = '''<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<a href="{link}" target="_blank" >
<button class="p-Widget jupyter-widgets jupyter-button widget-button mod-warning" style="width:150px; background-color:#CCCCCC; font-size:10pt; color:black">{text}</button>
</a>
</body>
</html>
'''   


In [None]:
def mutate_residue(mdstruc, resnumber, mutresname):
    
    # Exit code:
    #   0 -- mutation successful
    #   1 -- replaced with Gly
    #   2 -- deleted from structure
    exit_code = 0
    
    resndx = -1
    chain = ''
    for r in mdstruc.residues:
        if(r.number==resnumber):
            
            foundC = False
            foundO = False
            foundN = False
            foundCA = False
            for at in r:    
                if at.name=='C':
                    foundC = True
                elif at.name=='O':
                    foundO = True
                elif at.name=='N':
                    foundN = True
                elif at.name=='CA':
                    foundCA = True
                
            if foundC * foundN * foundO * foundCA:
                chain = r.chain
                resndx = r.idx
            else:
                mdstruc.strip(':'+str(r.idx+1))
                exit_code = 2
            break
            
    if resndx>=0:
        r = mdstruc.residues[resndx]
        
        # First replace with Gly: sometimes PyMol gets confused by incomplete side chains.
        mdstruc.strip('(:'+str(r.idx+1)+')&!(@C,CA,N,O)')
        mdstruc.residues[r.idx].name = 'GLY'
        mdstruc.write_pdb('tmp.pdb', altlocs='occupancy')
        out = !{'pymol -qc mutate.py -- tmp.pdb ' + chain + '/' + str(resnumber) + '/ ' + mutresname}

        # Check for errors
        error = False
        for line in out:
            if line.find('Error')>=0:
                error = True
                break
        if error:
            if(error):
                exit_code = 1
        else:
            mdstruc = pmd.load_file('tmp.pdb.mut')
            exit_code = 0
            
    if exit_code==0:
        report = 'Mutation successful'
    elif exit_code==1:
        report = 'PyMol error.\nReplacing with GLY'
    else:
        report = 'Missing backbone atoms.\nEliminating from structure'
        
    return mdstruc, exit_code, report

In [None]:
def locate_long_bond(a1, a2, dist, gstruc):
    
    tol = 0.1
    flist = !{"ls topol*.itp *.top"}
    for line in flist:
        if 'ls: cannot access' in line:
            del flist[flist.index(line)]
    
    PairList = []
    
    # Loop through itp files
    for fname in flist:
        
        # At the beginning, we haven't found any bond between the atoms yet
        found_bond = False
        with open(fname) as fd:
        
            # Watch to see whether we've entered the [ bonds ] directive
            in_bond_section = False
            for line in fd:
                
                # If we're currently in the [ bonds ] section and another 
                # directive starts, stop monitoring input.
                if in_bond_section and line[0]=='[':
                    in_bond_section = False
                    
                # If this is where the [ bonds ] section starts:
                if line[0:9]=='[ bonds ]':
                    in_bond_section = True
                
                # If we're in the bonds record
                if in_bond_section:
                    if len(line.split())>1:
                        # If this line defines a bond between the desired atoms, make note and break
                        if line.split()[0].strip()==str(a1) and line.split()[1].strip()==str(a2):
                            found_bond = True
                            break
                        # Just in case the atom order is reversed. 
                        elif line.split()[0].strip()==str(a2) and line.split()[1].strip()==str(a1):
                            found_bond = True
                            break
            
            # If we found the desired bond, go back to the start of the file
            # and re-process to locate bonding atom residues and names
            if found_bond:
                
                # Lat1 and Lat2 will be lists of atom properties, initially
                # empty since we haven't located either atom record
                Lat1 = []
                Lat2 = []
                
                # Go back to the start of the file
                fd.seek(0,0)
                
                # File starts before the [ atoms ] directive
                in_atom_section = False
                for line in fd:

                    # If we're currently in the [ atoms ] section and another 
                    # directive starts, stop monitoring input.
                    if in_atom_section and line[0]=='[':
                        in_atom_section = False

                    # If this is where the [ atoms ] section starts:
                    if line[0:9]=='[ atoms ]':
                        in_atom_section = True

                    # If we're in the atoms record, check for matches
                    if in_atom_section:
                        # If the line contains enough data to be an atom record
                        if len(line.split())>=5:
                            
                            # Check if it matches at1
                            if line.split()[0].strip()==str(a1):
                                
                                Lat1 = [
                                    int(line.split()[0]), # atom number
                                    int(line.split()[2]), # res number
                                    line.split()[3],      # res name
                                    line.split()[4]       # atom name
                                ]
                                
                            # Check if it matches at2
                            if line.split()[0].strip()==str(a2):
                                
                                Lat2 = [
                                    int(line.split()[0]), # atom number -- in chain!
                                    int(line.split()[2]), # res number
                                    line.split()[3],      # res name
                                    line.split()[4]       # atom name
                                ]
                                
                    # If we've located both atoms, append to master list and break
                    if len(Lat1)>0 and len(Lat2)>0:
                        PairList.append([Lat1, Lat2])
                        break
            fd.close()
    
    # Now check whether bond distance matches input
    for pair in PairList:
        
        # Initially, we don't know the atom identities
        atom1 = []
        atom2 = []
        
        for res in gstruc.residues:
            
            # If this is atom1's residue
            if res.number==pair[0][1]:
                # If the name matches
                for at in res:
                    if at.name==pair[0][3]:
                        atom1 = at
                
            # If this is atom2's residue
            if res.number==pair[1][1]:
                # If the name matches
                for at in res:
                    if at.name==pair[1][3]:
                        atom2 = at
        
        if atom1!=[] and atom2!=[]:
            x1 = gstruc.coordinates[atom1.idx]
            x2 = gstruc.coordinates[atom2.idx]
            gdist = np.linalg.norm(x1-x2)
            if np.abs(np.linalg.norm(x1-x2)-dist)<tol:
                return atom1, atom2
            
    # If we haven't already located the atoms and returned,
    # return empty lists
    return [], []


def identify_by_position(x, qstruc):
    dlist = np.sqrt(np.sum(np.power(x-qstruc.coordinates, 2), 1))
    ndx = np.argmin(dlist)
    return qstruc.atoms[ndx]

In [None]:
def update_mod_views(b):
    global disp_ndcs
    
    if list(ModBox.children).count(b)>0:
        ndx = list(ModBox.children).index(b) + 1
        if disp_ndcs.count(ndx)>0:
            del disp_ndcs[disp_ndcs.index(ndx)]
        else:
            disp_ndcs.append(ndx)
    pdbview.show_only(indices=disp_ndcs)
    


def run_pdb2gmx():
    
    global disp_ndcs
    global pdbview
    global MutList
    
    pdbview.add_trajectory(pmd.load_file('protein.pdb'))
    pdbview.set_representations([
        {"type": "cartoon", "params": {"color": "grey", "sele": "(protein)", "opacity": "0.2"}},
        {"type": "licorice", "params": {"color": "grey", "sele": "(not protein)", "opacity": "0.2"}}
    ])
    disp_ndcs = [0]
    
    ffstr = ''
    wmstr = ''
    with open("ffparams.txt") as fd:
        for line in fd:
            if line.split(':')[0]=='FF':
                ffstr = line.split(':')[1].strip()
            if line.split(':')[0]=='WATER':
                wmstr = line.split(':')[1].strip()


    MutCodes = ['Repair', 'Replacement', 'Deletion']
    ColorCodes = ['green', 'orange', 'red']
    
    # First, we do mutations
    if len(MutList)==0:
        out = !{"cp protein.pdb temp.pdb"}
    else:
        out = !{'cp system.pdb mutant.pdb'}
        prostruc = pmd.load_file('protein.pdb')    
        for mut in MutList:

            # Load protein structure
            mutstruc = pmd.load_file('mutant.pdb')

            # residue index in prostruc
            idx = int((mut.split(' to ')[0]).split(':')[-1])

            # New amino acid
            newaa = mut.split(' to ')[1].strip()

            # Find coordinates of the first atom in this residue in prostruc
            x0 = prostruc.coordinates[prostruc.residues[idx].atoms[0].idx]

            # Identify the corresponding atom in mutstruc by its coordinates.
            # Note that residue indices may be different in prostruc and mutstruc
            at0 = identify_by_position(x0, mutstruc)

            newstruc, code, mut_txt = mutate_residue(mutstruc, at0.residue.number, newaa)
            if code!=0:
                print('Error: Unhandled error during mutation: ' + mut)
                print('Return message: ' + mut_txt)
                return
            else:
                newstruc.write_pdb('mutant.pdb')
                
                # Now add view to structure display
                pdbview.add_trajectory(newstruc[':'+str(at0.residue.number)])
                pdbview.add_representation('spacefill',
                       selection='', 
                       color='green',
                       opacity='0.3', 
                       component=len(pdbview._ngl_component_ids)-1)
                viewbt = widgets.Button(
                    description=mut
                )
                viewbt.on_click(update_mod_views)
                ModBox.children = list(ModBox.children) + [viewbt]
                disp_ndcs.append(len(ModBox.children))
                
        # Load the mutant structure
        mutstruc = pmd.load_file('mutant.pdb')
        
        # Load the pigment structure
        pigstruc = pmd.load_file('pigments.pdb')
        
        # Identify pigment residue names
        PigList = []
        for res in pigstruc.residues:
            if PigList.count(res.name)==0:
                PigList.append(res.name)
                
        # Remove all pigments from file
        for pig in PigList:
            mutstruc.strip(':'+pig)
            
        # This is the file that will be used for input by pdb2gmx
        mutstruc.write_pdb('temp.pdb')
        
    
    MaxTries = 50
    count = 0
    success = False
    while success==False:

        # Text summarizing outcome of this iterations
        report_text = ''

        # Header for output report
        head_text = ''

        # Assume success until proven otherwise 
        success = True
        
        # Remove all protein itp and top data
        out=!{"rm *Protein*.itp; rm *.top"}
        
        # Try to convert the pdb
        out=!{"export GMX_MAXBACKUP=-1; gmx pdb2gmx -ff " + ffstr + " -f temp.pdb -o protein.gro -ignh -water " + wmstr}

        # Check for errors
        msg = ''
        for line in out:
            # If the line contains the keyword "error", 
            # throw error and start recording the message
            if len(msg)==0 and re.search('error', line, flags=re.IGNORECASE):
                msg = 'PDB2GMX MESSAGE: '
                success = False

            # If a message is being recorded:
            if len(msg)>0:
                # If the current line is empty, stop recording
                if len(line.strip())==0:
                    report_text = msg
                    break
                # Otherwise, append the new line to the message
                else:
                    msg = msg + line + " "

        if success==False:

            found_error = False

            #**********************************************************
            # Handled Error: Residue missing from database -- Delete 
            #**********************************************************
            RegEx = re.compile('.*Residue(.*)not found in residue topology database')
            if re.match(RegEx, msg):

                # First identify problematic residue
                hit = re.match(RegEx, msg)
                resname = hit.group(1).strip().strip('\'')

                # Now add view to structure display
                struc0 = pmd.load_file('temp.pdb')
                pdbview.add_trajectory(struc0[':' + resname])
                pdbview.add_representation('spacefill',
                       selection='', 
                       color='red', 
                       opacity='0.3', 
                       component=len(pdbview._ngl_component_ids)-1)
                viewbt = widgets.Button(
                    description=resname
                )
                viewbt.on_click(update_mod_views)
                ModBox.children = list(ModBox.children) + [viewbt]
                disp_ndcs.append(len(ModBox.children))
                

                # Finally, delete from the structure
                struc0.strip(':'+resname)
                struc0.write_pdb('temp.pdb', altlocs='first')
                found_error = True
                report_text += '\n\nResidue '+resname+ ' not available in the force field database.\n'
                report_text += 'Eliminating from the structure.\n'
                head_text = resname + ' deletion'

            #**********************************************************
            # Handled Error: Residue missing atoms (Error) -- Repair using PyMol
            #**********************************************************
            RegEx = re.compile('.*Residue(.*)named(.*)of a molecule in the input file was mapped to an entry in the topology database, but the atom(.*)used in that entry is not found in the input file')
            if re.match(RegEx, msg):
                
                # Identify problematic residues
                hit = re.match(RegEx, msg)
                resnumber = int(hit.group(1))
                resname = hit.group(2).strip()
                
                # Add view to structure display
                struc0 = pmd.load_file('temp.pdb')
                pdbview.add_trajectory(struc0[':' + str(resnumber)])
                
                # Repair
                struc, code, mut_txt = mutate_residue(struc0, resnumber, resname)
                struc.write_pdb('temp.pdb', altlocs='first')
                found_error = True
                report_text += '\n\nAtoms were missing from residue '+resname+ ' ' + str(resnumber) + '\n'
                report_text += 'Attempting repair with PyMol mutagenesis wizard.\n'
                report_text += mut_txt
                head_text = resname + str(resnumber) + ' ' + MutCodes[code]
                pdbview.add_representation('spacefill',
                       selection='', 
                       color=ColorCodes[code], 
                       opacity='0.3', 
                       component=len(pdbview._ngl_component_ids)-1)
                viewbt = widgets.Button(
                    description=resname + ' ' + str(resnumber)
                )
                viewbt.on_click(update_mod_views)
                ModBox.children = list(ModBox.children) + [viewbt]
                disp_ndcs.append(len(ModBox.children))

            #**********************************************************
            # Handled Error: Residue missing atoms (Warning) -- Repair using PyMol
            #**********************************************************
            RegEx = re.compile('.*There were (.*) missing atoms in molecule.*')
            if re.match(RegEx, msg):
                WarnRegEx = re.compile('.*atom (.*) is missing in residue ([a-zA-Z]+)[ ]+([0-9]+) in the pdb file.*')
                found_error = True
                for line in out:
                    if re.match(WarnRegEx, line):
                        
                        # Identify problematic residues
                        hit = re.match(WarnRegEx, line)
                        resname = hit.group(2).strip()
                        resnumber = int(hit.group(3).strip())
                        
                        # Add structure view
                        struc0 = pmd.load_file('temp.pdb')
                        pdbview.add_trajectory(struc0[':' + str(resnumber)])
                        
                        # Repair 
                        struc, code, mut_txt = mutate_residue(struc0, resnumber, resname)
                        struc.write_pdb('temp.pdb', altlocs='first')
                        report_text += '\n\nAtoms were missing from residue '+resname+ ' ' + str(resnumber) + '\n'
                        report_text += 'Attempting repair with PyMol mutagenesis wizard.\n'
                        report_text += mut_txt
                        
                        head_text = resname + str(resnumber) + ' ' + MutCodes[code]
                        
                        pdbview.add_representation('spacefill',
                           selection='', 
                           color=ColorCodes[code], 
                           opacity='0.3', 
                           component=len(pdbview._ngl_component_ids)-1)
                        viewbt = widgets.Button(
                            description=resname+ ' ' + str(resnumber)
                        )
                        viewbt.on_click(update_mod_views)
                        ModBox.children = list(ModBox.children) + [viewbt]
                        disp_ndcs.append(len(ModBox.children))
                        break
            
            if found_error==False:
                report_text += '\nUnhandled error from pdb2gmx. Aborting MD model building.'
                count = MaxTries

        # If no errors, double-check against known warnings
        if success:
            for line in out:
                
                #**********************************************************
                # Handled Warning: Long bond -- Insert chain break 
                #**********************************************************
                
                RegEx = re.compile('.*Warning: Long Bond.*\(([0-9]+)-([0-9]+) = (.*)nm.*\).*')
                if re.match(RegEx, line):
                    
                    report_text += line + '\n'
                    
                    bondmax = 1.5*2.0
                    
                    # Identify problematic residues
                    hit = re.match(RegEx, line)
                    at1tmp = int(hit.group(1))
                    at2tmp = int(hit.group(2))
                    gdist = 10.0*float(hit.group(3))
                    
                    at1 = min(at1tmp,at2tmp)
                    at2 = max(at1tmp,at2tmp)
                    
                    # Identify atoms in gro structure
                    grostruc = pmd.load_file('protein.gro')
                    gatom1, gatom2 = locate_long_bond(at1, at2, gdist, grostruc)
                    x1 = grostruc.coordinates[gatom1.idx]
                    x2 = grostruc.coordinates[gatom2.idx]
                    
                    # Identify equivalent atoms in pdb structure
                    struc0 = pmd.load_file('temp.pdb')
                    atom1 = identify_by_position(x1, struc0)
                    atom2 = identify_by_position(x2, struc0)
                    
                    # If we failed to locate the atom
                    if atom1==[]:
                        head_text = 'Long bond: Failed repair'
                        report_text += '\n\nBond between atoms ' + str(pat1.idx) + ' and ' + str(pat2.idx) + ' was too long (' + '{:.2f}'.format(gdist) + ').\n'
                        report_text += 'Failed to identify atoms in PDB representation'
                        count = MaxTries
                        break
                        
                    # Otherwise, deal with bond
                    else:
                        
                        # If this is an amide bond,  insert chain break
                        if atom1.name=='C' and atom2.name=='N' and gdist>bondmax:
                            
                            struc0[atom1.idx].residue.ter = True
                            struc0.write_pdb('temp.pdb', altlocs='first')

                            res1 = atom1.residue
                            res2 = atom2.residue
                            head_text = 'Res ' + str(res1.number) + ' -- ' + str(res2.number) + ' long bond: Chain break'
                            report_text += '\n\nBond between atoms ' + str(atom1.idx) + ' and ' + str(atom2.idx) + ' was long (' + '{:.2f}'.format(gdist) + ').\n'
                            report_text += 'Identified atoms as belonging to residues ' + str(res1.number) + ' and ' + str(res2.number) + '\n'
                            report_text += 'Inserting chain break between them.\n'
                            pdbview.add_trajectory(struc0[':' + str(res1.number) + ',' + str(res2.number)])
                            pdbview.add_representation('spacefill',
                               selection='', 
                               color='blue', 
                               opacity='0.3', 
                               component=len(pdbview._ngl_component_ids)-1)
                            
                            viewbt = widgets.Button(
                                description=str(res1.number) + '--' + str(res2.number)
                            )
                            viewbt.on_click(update_mod_views)
                            ModBox.children = list(ModBox.children) + [viewbt]
                            disp_ndcs.append(len(ModBox.children))
                            success = False
                            break
                            
                        # Otherwise, if both atoms are in the same residue, delete it
                        elif atom1.residue.idx==atom2.residue.idx:
                            resname = atom1.residue.name
                            resnum = atom1.residue.number
                            struc0.strip('(:'+str(atom1.residue.idx+1) + ')')
                            struc0.write_pdb('temp.pdb', altlocs='first')
                            report_text += '\n\nResidue '+resname+ ' ' + str(resnum) + ' contained a long bond\n'
                            report_text += 'Deleting from structure.\n'
                            head_text = resname + str(resnum) + ' long bond: Deletion'

                            pdbview.add_representation('spacefill',
                               selection='', 
                               color='red', 
                               opacity='0.3', 
                               component=len(pdbview._ngl_component_ids)-1)
                            viewbt = widgets.Button(
                                description=resname+ ' ' + str(resnum)
                            )
                            viewbt.on_click(update_mod_views)
                            ModBox.children = list(ModBox.children) + [viewbt]
                            disp_ndcs.append(len(ModBox.children))
                            
                            success = False
                            break

        if success: 
            head_text = 'Success!'
            
            report_text = 'pdb2gmx exited successfully. \nFile topol.top contains protein topology.\n' + report_text
            report = 'File protein.gro contains final protein structure.\n\n' + report_text
            
            for line in out:
                if re.search('error', line, flags=re.IGNORECASE):
                    report_text += line + '\n'
                if re.search('warning', line, flags=re.IGNORECASE):
                    report_text += line + '\n'
                    
        output = ''
        for line in out:
            output += line + '\n'
        
        newbox = widgets.VBox([
            widgets.Textarea(value=report_text,layout=Layout(width='15cm',height='5cm')),
            widgets.Textarea(value=output,layout=Layout(width='15cm',height='10cm'))
        ])
        outacc.children = list(outacc.children) + [newbox]
        outacc.set_title(len(outacc.children)-1, 'Round ' + str(len(outacc.children)) + ': ' + head_text)
        outacc.selected_index = len(outacc.children)-1

        count += 1
        if count>MaxTries:
            print('Too many attempt: aborting pdb2gmx')
            break


In [None]:
AAList = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
ResList = []
tstruc = pmd.load_file('system.pdb')
for res in tstruc.residues:
    if AAList.count(res.name)!=0:
        ResList.append(res.name + '  ' + res.chain + ":" + str(res.idx))
MutList = []

mutsel = widgets.Dropdown(
    options=ResList,
    description='Mutate Residue:',
    disabled=False,
#     rows = 10,
    layout=Layout(width='6cm'),
)
mutsel.style.description_width = '3cm'

aadrop = widgets.Dropdown(
    options=AAList,
    description='to:',
    disabled=False,
    layout=Layout(width='2.5cm')
)
aadrop.style.description_width = '0.5cm'


mutbt = widgets.Button(
    description = 'Mutate',
    layout=Layout(width='2cm')
)


mutdelsel = widgets.Select(
    options=[],
    description='Mutations:',
    disabled=False,
    rows = 0
)

mutdelbt = widgets.Button(
    description = 'Remove',
    layout=Layout(width='2cm')
)

MutBox = widgets.VBox([
        widgets.HBox([
            mutsel,
            aadrop,
            mutbt, 
        ]),
        widgets.HBox([
            mutdelsel,
            mutdelbt
        ])
    ])


def mutbt_onclick(b):
    global MutList
    mut = mutsel.value
    mut_exists = False
    for mutx in MutList:
        if mutx[:-7]==mut:
            mut_exists = True
    if mut_exists:
        print('Error: Residue is already mutated. Please delete and try again.')
        return
    else:
        MutList.append(mut + ' to ' + aadrop.value)
        
    mutdelsel.options = MutList
    mutdelsel.rows = len(MutList)
    
def mutdelbt_onclick(b):
    global MutList
    if len(MutList)>0:
        sel = mutdelsel.value
        ndx = MutList.index(sel)
        del MutList[ndx]
        mutdelsel.options = MutList
        mutdelsel.rows = len(MutList)
    
mutbt.on_click(mutbt_onclick)
mutdelbt.on_click(mutdelbt_onclick)

In [None]:
def build_topology(b):
    run_pdb2gmx()

In [None]:
pdbview = nv.NGLWidget()
pdbview._set_size('500px', '500px')

gobt = widgets.Button(
    description='Build Topology'
)
gobt.on_click(build_topology)

ModBox = widgets.VBox([])

outacc = widgets.Accordion(children=[])

display(widgets.HBox([
    pdbview, 
    ModBox
]))

display(MutBox)

display(gobt)
display(outacc)