In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import os,sys
sys.path.append('./misc/lib/python3.7/site-packages')

%matplotlib notebook

import math
import numpy as np
import requests
import ipywidgets as widgets

import matplotlib.pyplot as plt
from IPython.display import display, display_markdown
from ipywidgets import Layout, HTML
from pathlib import Path
import uuid

NGL_DEF = False
try:
    import nglview as nv
    NGL_DEF = True
except:
    NGL_DEF = False

import parmed as pmd
import re

from scipy.ndimage import gaussian_filter

from scipy import spatial

import time

import hublib.use
%use gromacs-2018.4
%use pymol-1.8.4

np.set_printoptions(precision=8)
np.set_printoptions(suppress=True)


HTMLButtonPrompt = '''<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<a href="{link}" target="_blank" >
<button class="p-Widget jupyter-widgets jupyter-button widget-button mod-warning" style="width:150px; background-color:#CCCCCC; font-size:10pt; color:black">{text}</button>
</a>
</body>
</html>
'''   


In [None]:
def writelog(msg, leadchar):
    with open("./logfile.txt", 'a') as fd:
        for line in msg:
            fd.write(leadchar + " " + line + "\n")
            
def runbash(cmd, comment):
    out = !{cmd}
    writelog(["*****************************************************"], '')
    writelog([comment], '# ')
    writelog(["Running bash command:"], '# ')
    writelog([cmd+'\n'], 'bash >> ')
    writelog(out, 'out: ')
    writelog(["*****************************************************"], '')
    return out

In [None]:
# Returns a new structure in which residue #resnumber has been mutated to type mutresname
def mutate_residue(mdstruc, residx, mutresname):
    
    # Exit code:
    #   0 -- mutation successful
    #   1 -- replaced with Gly
    #   2 -- deleted from structure
    exit_code = 0
    
    res = mdstruc.residues[residx]

    foundC = False
    foundO = False
    foundN = False
    foundCA = False
    for at in res.atoms:    
        if at.name=='C':
            foundC = True
        elif at.name=='O':
            foundO = True
        elif at.name=='N':
            foundN = True
        elif at.name=='CA':
            foundCA = True

    # If any backbone atoms are missing, delete the residue and return an error code
    if (foundC*foundN*foundO*foundCA)==0:
        mdstruc.strip(':'+str(res.idx+1))
        exit_code = 2

    # Otherwise, make the mutation
    else:
        
        # First replace with Gly: sometimes PyMol gets confused by incomplete side chains.
        mdstruc.strip('(:'+str(res.idx+1)+')&!(@C,CA,N,O)')
        mdstruc.residues[res.idx].name = 'GLY'
        
        # Set all chain markers to 'X' to avoid confusing PyMol
        for tres in mdstruc.residues:
            tres.chain = 'X'
        mdstruc.write_pdb('tmp.pdb', altlocs='occupancy')
        out = runbash('pymol -qc mutate.py -- tmp.pdb X/' + str(res.idx+1) + '/ ' + mutresname, 'PyMol script for mutagenesis:')

        # Check for errors
        error = False
        for line in out:
            if line.find('Error')>=0:
                error = True
                break
        if error:
            if(error):
                exit_code = 1
        else:
            mdstruc = pmd.load_file('tmp.pdb.mut')
            exit_code = 0
            
    if exit_code==0:
        report = 'Mutation successful'
        
        # Remove chain markers again
        for tres in mdstruc.residues:
            tres.chain = ''
    elif exit_code==1:
        report = 'PyMol error.\nReplacing with GLY'
    else:
        report = 'Missing backbone atoms.\nEliminating from structure'
        
    return mdstruc, exit_code, report

def fix_missing_atoms(struc, resname, missatom):
    
    # repair_list will be a list of indices of repaired residues
    repair_list = []
    
    # repair_code is a list of exit codes from mutate_residue
    repair_code = []
    
    # Step through residues in the structure
    for res in struc.residues:
        
        # If the resname matches, check if it contains the missing atom
        if res.name==resname:
            foundatom = False
            for at in res.atoms:
                if at.name==missatom:
                    foundatom = True
            
            # If not, repair using PyMol mutagenesis tool
            if not foundatom:

                # Note: need to store res.idx *before* repair since
                # it may get deleted by mutate_residue. 
                repair_list.append(res.idx)
                
                # Repair
                struc, code, mut_txt = mutate_residue(struc, res.idx, resname)
                
                # Store result to inform user
                repair_code.append(code)
                
                # Write results to logfile
                writelog(['PyMol script exited with code ' + str(code) + ':\n' + mut_txt], '# ')
                
    return struc, repair_list, repair_code

In [None]:
# Find bond referred to in gromacs "long bond" error.
# The difficulty here is that gromacs prints the atom numbers
# corresponding to the *topology* file, not the *.gro structure. 
# These are generally different since multiple molecules are stored
# in the same *.gro structure. So we have first to identify the
# correct atoms in the toplogy file, and then match them to 
# entries in the *.gro structure. 
def locate_long_bond(a1, a2, dist, gstruc):
    
    tol = 0.1
    flist = !{"ls topol*.itp *.top"}
    for line in flist:
        if 'ls: cannot access' in line:
            del flist[flist.index(line)]
    
    PairList = []
    
    # We're going to look through all the accessible itp and top files to see
    # if we can find atoms with the correct numbers that are actually 
    # bonded to each other. Without additional information, we have to check 
    # through each itp file separately, since we don't know which protein chain
    # the long bond is in. 
    for fname in flist:
        
        # At the beginning, we haven't found any bond between the atoms yet
        found_bond = False
        with open(fname) as fd:
        
            # Watch to see whether we've entered the [ bonds ] directive
            in_bond_section = False
            for line in fd:
                
                # If we're currently in the [ bonds ] section and another 
                # directive starts, stop monitoring input.
                if in_bond_section and line[0]=='[':
                    in_bond_section = False
                    
                # If this is where the [ bonds ] section starts:
                if line[0:9]=='[ bonds ]':
                    in_bond_section = True
                
                # If we're in the bonds record
                if in_bond_section:
                    if len(line.split())>1:
                        
                        # If this line defines a bond between the desired atoms, make note and break
                        if line.split()[0].strip()==str(a1) and line.split()[1].strip()==str(a2):
                            found_bond = True
                            break
                            
                        # Just in case the atom order is reversed. 
                        elif line.split()[0].strip()==str(a2) and line.split()[1].strip()==str(a1):
                            found_bond = True
                            break
            
            # If we found the desired bond, go back to the start of the file
            # and re-process to locate bonding atom residues and names
            if found_bond:
                
                # Lat1 and Lat2 will be lists of atom properties, initially
                # empty since we haven't located either atom record
                Lat1 = []
                Lat2 = []
                
                # Go back to the start of the file
                fd.seek(0,0)
                
                # File starts before the [ atoms ] directive
                in_atom_section = False
                for line in fd:

                    # If we're currently in the [ atoms ] section and another 
                    # directive starts, stop monitoring input.
                    if in_atom_section and line[0]=='[':
                        in_atom_section = False

                    # If this is where the [ atoms ] section starts:
                    if line[0:9]=='[ atoms ]':
                        in_atom_section = True

                    # If we're in the atoms record, check for matches
                    if in_atom_section:
                        # If the line contains enough data to be an atom record
                        if len(line.split())>=5:
                            
                            # Check if it matches at1
                            if line.split()[0].strip()==str(a1):
                                
                                Lat1 = [
                                    int(line.split()[0]), # atom number -- in molecule!
                                    int(line.split()[2]), # res number
                                    line.split()[3],      # res name
                                    line.split()[4]       # atom name
                                ]
                                
                            # Check if it matches at2
                            if line.split()[0].strip()==str(a2):
                                
                                Lat2 = [
                                    int(line.split()[0]), # atom number -- in molecule!
                                    int(line.split()[2]), # res number
                                    line.split()[3],      # res name
                                    line.split()[4]       # atom name
                                ]
                                
                    # If we've located both atoms, append to master list and break
                    if len(Lat1)>0 and len(Lat2)>0:
                        PairList.append([Lat1, Lat2])
                        break
            fd.close()
    
    # Now check whether bond distance matches input
    # Now check whether the candidate bonds identified match the reported bond distance.
    # It's quite possible we found matching atom numbers in other itp files, but 
    # it's very unlikely that the bond length is the same. (And if so, that bond 
    # presumably needs to be fixed too...)
    for pair in PairList:
        
        # Initially, we don't know the atom identities in the parmed struc
        atom1 = []
        atom2 = []
        
        for res in gstruc.residues:
            
            # These residue numbers come from gromacs input file,
            # so they should match the numbers from atom entry in top file.
            # If this is atom1's residue
            if (res.number)==pair[0][1]:
                
                # Check if the atom name matches
                for at in res.atoms:
                    if at.name==pair[0][3]:
                        atom1 = at
                
            # If this is atom2's residue
            if (res.number)==pair[1][1]:
                
                # Check if the name matches
                for at in res.atoms:
                    if at.name==pair[1][3]:
                        atom2 = at
        
        if atom1!=[] and atom2!=[]:
            x1 = gstruc.coordinates[atom1.idx]
            x2 = gstruc.coordinates[atom2.idx]
            gdist = np.linalg.norm(x1-x2)
            if np.abs(np.linalg.norm(x1-x2)-dist)<tol:
                return atom1, atom2
            
    # If we haven't located the atoms, return empty lists
    print('Error: Could not locate bond')
    return [], []


def identify_by_position(x, qstruc):
    dlist = np.sqrt(np.sum(np.power(x-qstruc.coordinates, 2), 1))
    ndx = np.argmin(dlist)
    return qstruc.atoms[ndx], np.min(dlist)


In [None]:
def update_mod_views(b):
    global disp_ndcs
    
    if list(ModBox.children).count(b)>0:
        ndx = list(ModBox.children).index(b) + 1
        if disp_ndcs.count(ndx)>0:
            del disp_ndcs[disp_ndcs.index(ndx)]
        else:
            disp_ndcs.append(ndx)
    if NGL_DEF:
        pdbview.show_only(indices=disp_ndcs)
    

class strucChange:
    def __init__(self, xstruc, color):
        self.struc = xstruc
        self.color = color
    

def log_change(xstruc, color):
    global ChangeList
    ChangeList.append(strucChange(xstruc, color))

def show_changes():
    
    rstruc = pmd.load_file('reference.pdb')
    fstruc = pmd.load_file('protein.gro')
    
    # This should be a list of residues in rstruc
    rreslist = []
    
    # This should be a list of residues in the final struc
    freslist = []
    
    collist = []
    for change in ChangeList:
        for res in change.struc.residues:
            
            if len(res.atoms)>0:
                collist.append(change.color)
            
            # Have we found this in reference structure?
            foundref = False
            
            # Have we found this in final structure?
            foundfin = False
            for at in res.atoms:
            
                # get atom coordinates
                x = change.struc.coordinates[at.idx]

                # Identify corresponding atom in rstruc
                if len(rreslist)<len(collist):
                    refat, er = identify_by_position(x, rstruc)
                    if er<0.01:
                        foundref = True
                        rreslist.append(refat.residue)
                    
                if len(freslist)<len(collist):
                    finat, er = identify_by_position(x, fstruc)
                    if er<0.01:
                        foundfin = True
                        freslist.append(finat.residue)
                    
                if foundfin and foundref:
                    break
                    
                if at==res.atoms[-1] and foundfin==False:
                    freslist.append([])
                
                if at==res.atoms[-1] and foundref==False:
                    rreslist.append([])
    
    if NGL_DEF:
        for n in range(0, len(collist)):
            rres = rreslist[n]
            fres = freslist[n]
            
            # Show the corresponding residue in the original (reference) structure
            if rres!=[]:
                
                # If multiple atoms in the selection, add entire residue
                if (rres.atoms[-1].idx - rres.atoms[0].idx)>0:
                    pdbview.add_trajectory(rstruc[rres.atoms[0].idx:rres.atoms[-1].idx])
                else:
                    # If only one atom, no second array index
                    pdbview.add_trajectory(rstruc[rres.atoms[0].idx:rres.atoms[0].idx+1])
                    
                pdbview.set_representations([
                    {"type": "ball+stick", "params": {"color": collist[n], "sele": "all", "opacity": "1.0", "radius": 0.1}},
                ], component=len(pdbview._ngl_component_ids)-1)
                
#                 # Add a label (original residue numbering)
                offset = 0.25*np.ones((3,))
                pdbview.shape.add_label((offset + rstruc.coordinates[rres.atoms[0].idx,:]).tolist(), 
                                        [0,0,0], 2.5, rres.name + str(rres.number) + '/' + rres.chain)
            pdbview._ngl_component_ids.append(str(uuid.uuid4()))
            pdbview._update_component_auto_completion()
                
            # Show the corresponding residue in the final structure
            if fres!=[]:
                
                # If multiple atoms in the selection, add entire residue
                if fres.atoms[-1].idx-fres.atoms[0].idx>0:
                    pdbview.add_trajectory(fstruc[fres.atoms[0].idx:fres.atoms[-1].idx])
                else:
                    # If only one atom, no second array index
                    pdbview.add_trajectory(fstruc[fres.atoms[0].idx:fres.atoms[0].idx+1])
                    
                pdbview.set_representations([
                    {"type": "licorice", "params": {"color": "element", "sele": "all", "opacity": "0.5", "radius": 0.3}},
                ], component=len(pdbview._ngl_component_ids)-1)

def run_pdb2gmx():
    
    global disp_ndcs
    global pdbview
    global MutList
    global ChangeList
    
    # First we copy all inputs to an "prot_in" folder in case we want to see 
    # the original starting materials later. 
    runbash('mkdir prot_in; cp *.* prot_in/', "Copying all inputs to prot_in/ folder.")
    
    
    # This is the input protein PDB file. It should have sequential residue numbering.
    infile = 'protein.pdb'
    
    # If NGL viewer is available, display input structure
    if NGL_DEF:
        pdbview.add_trajectory(pmd.load_file(infile))
        pdbview.set_representations([
            {"type": "cartoon", "params": {"color": "grey", "sele": "(protein)", "opacity": "0.2"}},
            {"type": "licorice", "params": {"color": "grey", "sele": "(not protein)", "opacity": "0.2"}}
        ])
    disp_ndcs = [0]
    
    # Check force field and water model selections
    ffstr = ''
    wmstr = ''
    with open("ffparams.txt") as fd:
        for line in fd:
            if line.split(':')[0]=='FF':
                ffstr = line.split(':')[1].strip()
            if line.split(':')[0]=='WATER':
                wmstr = line.split(':')[1].strip()


    # These are output codes for attempted modifications to the structure. 
    #  -- "Repair" means that we keep the same amino acid but add missing atoms
    #  -- "Replacement" means that one amino acid has been replaced with another
    #  -- "Deletion" means that an amino acid has been deleted from the chain
    # ColorCodes will be used for visualizing the results in NGLViewer (if available)
    MutCodes = ['Repair', 'Replacement', 'Deletion']
    ColorCodes = ['green', 'orange', 'red']
    
    # First, we do requested mutations. 
    if len(MutList)==0:
        # temp.pdb will be the input file for pdb2gmx. If there are no 
        # mutations requested, we can just copy this directly. 
        out = runbash("cp " + infile + " temp.pdb", "Copying file " + infile + " to temp.pdb.")
        
    # If mutations are requested, do those first before writing temp.pdb
    # and proceeding to pdb2gmx routine. 
    else:
        
        # First copy input protein structure to mutant.pdb
        # Then iteratively make mutations one at a time, updating
        # and re-loading mutant.pdb each time.
        out = runbash('cp ' + infile + ' mutant.pdb', "Copying " + infile + ' to mutant.pdb')
        
        # Load the input protein structure. We'll use this structure to
        # identify which amino acids are being requested for mutation. 
        # This structure will *not* be modified in what follows. 
        prostruc = pmd.load_file(infile)
        
        # Now step through mutations sequentially
        for mut in MutList:
            
            # Note in logfile which mutation we're working on
            writelog(["Working on requested mutation " + mut], '# ')

            # Load the current *mutant* structure
            mutstruc = pmd.load_file('mutant.pdb')

            # residue index in prostruc (input protein structure)
            idx = int((mut.split(' to ')[0]).split(':')[-1].strip(')'))

            # New amino acid
            newaa = mut.split(' to ')[1].strip()

            # Find coordinates of the first atom in this residue in prostruc
            x0 = prostruc.coordinates[prostruc.residues[idx].atoms[0].idx]

            # Identify the corresponding atom in mutstruc by its coordinates.
            # Note that residue indices may be different in prostruc and mutstruc
            at0, er = identify_by_position(x0, mutstruc)
            
            # Note residue number in logfile
            writelog(["Identified residue in current mutant structure as #" + str(at0.residue.idx+1) + ' (indexing starts at 1)'], '# ')
            
            # Make the mutation, stored now in parmed structure newstruc
            newstruc, code, mut_txt = mutate_residue(mutstruc, at0.residue.idx, newaa)
            if code!=0:
                writelog(['Error: mutation ' + mut + ' was unsuccessful'], '# ')
                writelog(['Return message: ' + mut_txt], '# ')
                writelog(['Please repair input file or run again without the mutation request'], '# ')
                
                print('Error: mutation ' + mut + ' was unsuccessful')
                print('Return message: ' + mut_txt)
                print('Please repair input file and start over or refresh the page and run again without the mutation request.')
                
                return
            else:
                
                # If successful, write output to logfile
                writelog(['PyMol script exited with code ' + str(code) + ':', mut_txt], '# ')
                
                # Update mutant.pdb
                newstruc.write_pdb('mutant.pdb')
                writelog(['Writing mutant structure to mutant.pdb'], '# ')
                
                log_change(mutstruc[at0.residue.idx,:], ColorCodes[code])
                
        # Copy mutant.pdb to temp.pdb, input file for pdb2gmx
        out = runbash("cp mutant.pdb temp.pdb", "Copying mutant.pdb to temp.pdb for input to pdb2gmx")
        
    
    MaxTries = 100
    count = 0
    success = False
    while success==False:
        
        # Note beginning of new round
        writelog(["*****************************************************"], '')

        # Text summarizing outcome of this iterations
        report_text = ''

        # Header for output report
        head_text = ''

        # Assume success until proven otherwise 
        success = True
        
        # Remove all protein itp and top data
        out = runbash("rm *Protein*.itp; rm *.top", "Purging old itp and top files")
        
        out = runbash("cp temp.pdb temp.pdb." + str(count), "Backing up input for current round to temp.pdb."+str(count)+" for reference")
        
        # Try to convert the pdb
        out = runbash("export GMX_MAXBACKUP=-1; gmx pdb2gmx -ff " + ffstr + " -f temp.pdb -o protein.gro -ignh -water " + wmstr,
                     "Executing pdb2gmx on current structure")

        # Check for errors. The variable msg will store the 
        # text associated with the error. 
        msg = ''
        for line in out:
            # If the line contains the keyword "error",
            # throw error and start recording the message
            if len(msg)==0 and re.search('error', line, flags=re.IGNORECASE):
                # First check if this is just a "GROMACS reminds you" message containing the word "error".
                # If not, throw an error and start recording the message. 
                if line.find('GROMACS reminds you:')==-1:
                    msg = 'PDB2GMX MESSAGE: '
                    success = False

            # If a message is being recorded:
            if len(msg)>0:
                # If the current line is empty, stop recording
                if len(line.strip())==0:
                    report_text = msg
                    writelog([msg], '# ')
                    break
                # Otherwise, append the new line to the message
                else:
                    msg = msg + line + " "

        if success==False:

            found_error = False

            #**********************************************************
            # Handled Error: Residue missing from database -- Delete 
            #**********************************************************
            RegEx = re.compile('.*Residue(.*)not found in residue topology database')
            if re.match(RegEx, msg):

                # First identify problematic residue
                hit = re.match(RegEx, msg)
                resname = hit.group(1).strip().strip('\'')
                
                writelog(['Error triggered: Residue '+resname+ ' not available in the force field database.',
                          'Eliminating from the structure.'], '# ')
                
                struc0 = pmd.load_file('temp.pdb')
                log_change(struc0[':'+resname], 'red')
                
                # Finally, delete from the structure
                struc0.strip(':'+resname)
                struc0.write_pdb('temp.pdb', altlocs='first')
                writelog(['Overwriting structure file temp.pdb.\n'], '# ')
                
                found_error = True
                report_text += '\n\nResidue '+resname+ ' not available in the force field database.\n'
                report_text += 'Eliminating from the structure.\n'
                head_text = resname + ' deletion'

            #*******************************************************************************
            # Handled Error: Residue missing atoms (Error or Warning) -- Repair using PyMol
            #*******************************************************************************
            
            RegExWarn = re.compile('.*There were (.*) missing atoms in molecule.*')
            RegExErr = re.compile('.*Residue(.*)named(.*)of a molecule in the input file was mapped to an entry in the topology database, but the atom(.*)used in that entry is not found in the input file')

            if re.match(RegExWarn, msg) or re.match(RegExErr, msg):
                found_error = True
                
                # If this appears as a warning
                if re.match(RegExWarn, msg):
                    RegExWarnFull = re.compile('.*atom (.*) is missing in residue ([a-zA-Z]+)[ ]+([0-9]+) in the pdb file.*')
                    
                    # Identify problematic residues
                    for line in out:
                        if re.match(RegExWarnFull, line):
                            hit = re.match(RegExWarnFull, line)
                            
                            # Only pay attention if the missing atom is NOT an H
                            if hit.group(1).strip()[0]!='H':
                                missatom = hit.group(1).strip()
                                resname = hit.group(2).strip()
                                resnumber = int(hit.group(3).strip())
                
                # If this appears as an error
                elif re.match(RegExErr, msg):

                    # Identify problematic residues
                    hit = re.match(RegExErr, msg)
                    resnumber = int(hit.group(1).strip())
                    resname = hit.group(2).strip()
                    missatom = hit.group(3).strip()
                
                head_text = 'Missing atoms: ' + resname
                
                # At least one atom, named $missatom, are missing from some residue of type $resname. 
                # There's no need to figure out *which* $resname was missing atom $missatom. All residues
                # of this type that miss the same atom will trigger the same error, so we may as well
                # replace all of them now. So now we make a list of all residues of name $resname that do *not* 
                # contain atom $missatom. 
                
                writelog(['Error triggered: Atom ' + missatom + ' missing from residue '+resname+ ' ' + str(resnumber),
                          'Will attempt to repair this and all similar errors using PyMol mutagenesis tool'], '# ')
                
                # Repair missing atoms
                struc0 = pmd.load_file('temp.pdb')
                newstruc, rlist, rcodes = fix_missing_atoms(struc0, resname, missatom)
                
                # Update structure file
                newstruc.write_pdb('temp.pdb', altlocs='first')
                writelog(['Overwriting temp.pdb with new structure'], '# ')
                
                # And reload for visualization
                struc0 = pmd.load_file('temp.pdb')
                
                # Log the results
                report_text += '\n\nAtom ' + missatom + ' was missing from residue '+resname+ ' ' + str(resnumber) + '\n'
                report_text += 'Used PyMol to repair replace ' + str(len(rlist)) + ' similar issues:'
                for n in range(0, len(rlist)):
                    if rcodes[n]==0:
                        report_text += 'Successfully repaired ' + resname + str(rlist[n]+1) + '.\n'
                    elif rcodes[n]==1:
                        report_text += 'Replaced ' + resname + str(rlist[n]+1) + ' with glycine.\n'
                    elif rcodes[n]==2:
                        report_text += 'Deleted ' + resname + str(rlist[n]+1) + ' from structure.\n'
                                        
                    log_change(struc0[rlist[n],:], ColorCodes[rcodes[n]])

            
            if found_error==False:
                report_text += '\nUnhandled error from pdb2gmx. Aborting MD model building.'
                writelog(['Unhandled error from pdb2gmx. Aborting MD model building.'], '# ')
                
                # Setting count = MaxTries breaks us out of the pdb2gmx loop
                count = MaxTries

        # If no errors, double-check against known warnings
        if success:
            for line in out:
                
                #**********************************************************
                # Handled Warning: Long bond -- Insert chain break 
                #**********************************************************
                
                RegEx = re.compile('.*Warning: Long Bond.*\(([0-9]+)-([0-9]+) = (.*)nm.*\).*')
                if re.match(RegEx, line):
                    
                    writelog(['GROMACS Warning triggered: ' + line], '# ')
                    report_text += line + '\n'
                    
                    bondmax = 1.5*2.0
                    
                    # Identify problematic residues
                    hit = re.match(RegEx, line)
                    at1tmp = int(hit.group(1))
                    at2tmp = int(hit.group(2))
                    gdist = 10.0*float(hit.group(3))
                    
                    at1 = min(at1tmp,at2tmp)
                    at2 = max(at1tmp,at2tmp)
                    
                    # Identify atoms in gro structure
                    grostruc = pmd.load_file('protein.gro')
                    gatom1, gatom2 = locate_long_bond(at1, at2, gdist, grostruc)
                    x1 = grostruc.coordinates[gatom1.idx]
                    x2 = grostruc.coordinates[gatom2.idx]
                    
                    # Identify equivalent atoms in pdb structure
                    struc0 = pmd.load_file('temp.pdb')
                    atom1, er = identify_by_position(x1, struc0)
                    atom2, er = identify_by_position(x2, struc0)
                    
                    # If we failed to locate the atom
                    if atom1==[]:
                        head_text = 'Long bond: Failed repair'
                        report_text += '\n\nBond between atoms ' + str(at1) + ' and ' + str(at2) + ' was too long (' + '{:.2f}'.format(gdist) + ').\n'
                        report_text += 'Failed to identify atoms in PDB representation'
                        
                        writelog(['Long-bond repair failed: Failed to identify atoms in PDB representation'], '# ')
                        count = MaxTries
                        
                        # Break gets us out of "for line in out" loop
                        break
                        
                    # Otherwise, deal with bond
                    else:
                        
                        # Note culprits in logfile
                        writelog(['Identified atoms: Bond between atoms ' + str(at1) + ' and ' + str(at2) + ' was too long (' + '{:.2f}'.format(gdist) + ').'], '# ')
                        
                        # If this is an amide bond,  insert chain break
                        if atom1.name=='C' and atom2.name=='N' and gdist>bondmax:
                            
                            # Identify residues
                            res1 = atom1.residue
                            res2 = atom2.residue
                            
                            writelog(['Identified atoms as belonging to Amide bond joining residues ' + str(res1.idx+1) + ' and ' + str(res2.idx+1)], '# ')
                            writelog(['Inserting chain-terminate flag between them'], '# ')
                                                        
                            # Introduce chain-terminate flag
                            struc0[atom1.idx].residue.ter = True
                            
                            # Overwrite structure
                            struc0.write_pdb('temp.pdb', altlocs='first')
                            writelog(['Overwriting temp.pdb'], '# ')
                            
                            head_text = 'Res ' + str(res1.idx+1) + ' -- ' + str(res2.idx+1) + ' long bond: Chain break'
                            report_text += '\n\nBond between atoms ' + str(atom1.idx) + ' and ' + str(atom2.idx) + ' was long (' + '{:.2f}'.format(gdist) + ').\n'
                            report_text += 'Identified atoms as belonging to residues ' + str(res1.idx+1) + ' and ' + str(res2.idx+1) + '\n'
                            report_text += 'Inserting chain break between them.\n'
                            
                            log_change(struc0[res1.idx,:] + struc0[res2.idx,:], 'blue')
                
                            success = False
                            
                            # Break gets us out of "for line in out" loop
                            break
                            
                        # Otherwise, if both atoms are in the same residue, delete it
                        elif atom1.residue.idx==atom2.residue.idx:
                            
                            resname = atom1.residue.name
                            residx = atom1.residue.idx
                            
                            writelog(['Atoms belong to the same residue: ' + resname + ' ' + str(residx+1)], '# ')
                            writelog(['Eliminating from the structure'], '# ')
                            
                            log_change(struc0[atom1.residue.idx,:], 'red')
                            
                            # Delete residue
                            struc0.strip('(:'+str(atom1.residue.idx+1) + ')')
                            
                            # Overwrite structure
                            struc0.write_pdb('temp.pdb', altlocs='first')
                            writelog(['Overwriting temp.pdb'], '# ')
                            
                            report_text += '\n\nResidue '+resname+ ' ' + str(residx+1) + ' contained a long bond\n'
                            report_text += 'Deleting from structure.\n'
                            head_text = resname + str(residx+1) + ' long bond: Deletion'
                            
                            success = False
                            
                            # Break gets us out of "for line in out" loop
                            break
                        else:
                            writelog(['Unhandled long bond error: Aborting'], '# ')
                            writelog(['Problematic bond was between: ', atom1.name + ' atom of residue ' + atom1.residue.name + str(atom1.residue.idx+1) + ' of chain ' + atom1.residue.chain + '\n and \n' + atom2.name + ' atom of residue ' + atom2.residue.name + str(atom2.residue.idx+1) + ' of chain ' + atom2.residue.chain], '# ')
                            
                            head_text = 'Unhandled long bond error'
                            report_text += 'Problematic bond was between:\n ' + atom1.name + ' atom of residue ' + atom1.residue.name + str(atom1.residue.idx+1) + ' of chain ' + atom1.residue.chain + '\n and \n' + atom2.name + ' atom of residue ' + atom2.residue.name + str(atom2.residue.idx+1) + ' of chain ' + atom2.residue.chain
                            count = MaxTries
                            success = False
                            
                            # Break gets us out of "for line in out" loop
                            break

        if success:
            head_text = 'Success!'
            
            report_text = 'pdb2gmx exited successfully. \nFile topol.top contains protein topology.\n' + report_text
            report_text = 'File protein.gro contains final protein structure. \n\n' + report_text
            
            for line in out:
                if re.search('error', line, flags=re.IGNORECASE):
                    report_text += line + '\n'
                if re.search('warning', line, flags=re.IGNORECASE):
                    report_text += line + '\n'
                    
            writelog(['pdb2gmx exited successfully.', 'File topol.top contains protein topology.'], '# ')
            writelog(['Output summary: ', report_text], '# ')
                    
        output = ''
        for line in out:
            output += line + '\n'
        
        newbox = widgets.VBox([
            widgets.Textarea(value=report_text,layout=Layout(width='15cm',height='5cm')),
            widgets.Textarea(value=output,layout=Layout(width='15cm',height='10cm'))
        ])
        outacc.children = list(outacc.children) + [newbox]
        outacc.set_title(len(outacc.children)-1, 'Round ' + str(len(outacc.children)) + ': ' + head_text)
        outacc.selected_index = len(outacc.children)-1

        count += 1
        if count==MaxTries:
            print('Warning: approaching maximum number of pdb2gmx attempts (' + str(MaxTries) + ')')
        if count>MaxTries:
            print('Aborting pdb2gmx. Please check input file.')
            break
            
    show_changes()

    # Last step is file cleanup
    runbash('mkdir tmp', "Making directory tmp to store temporary files.")
    runbash('mv mutant.pdb tmp/; mv temp.pdb* tmp/; mv tmp.pdb tmp/; mv tmp.pdb.mut tmp/', 'Moving all temporary files to tmp/ folder.')
    runbash('rm mutate.py; rm ffparams.txt; rm reference.pdb; rm protein.pdb;', 'Removing all now-unnecessary input files. (Copies stored in prot_in/ folder).')
    

In [None]:
AAList = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
ResList = []

# This is the structure to be processed
pstruc = pmd.load_file('protein.pdb')

# This is a reference structure 
rstruc = pmd.load_file('reference.pdb')

# We start by matching atom numbers in reference and input protein structures. 
# pstruc is from the pdb file that will be passed to pdb2gmx. rstruc is from
# a reference structure whose residue numbers will likely be more meaningful
# to the user. (Before residues were stripped and renumbered by parmed.)
# Step through residues in the input protein structure
for res in pstruc.residues:
    
    # If it's a recognized amino-acid, add it to the list
    if AAList.count(res.name)!=0:
        
        # First identify which residue in rstruc it corresponds to.
        # Find coordinates of the first atom in this residue in pstruc.
        x0 = pstruc.coordinates[res.atoms[0].idx]

        # Identify the corresponding atom in rstruc by its coordinates.
        # Note that residue indices may be different in pstruc and rstruc
        at0, er = identify_by_position(x0, rstruc)
        
        # This is PDB-assigned number in the original file.
        # These numbers may not be consecutive!
        resnum = at0.residue.number
        chain = at0.residue.chain
        
        # Note that res.idx reflects the sequence index in the *input* structure, not the reference structure
        ResList.append(chain + '/' + res.name + ' ' + str(resnum)+'  (' + res.chain + ":" + str(res.idx) + ')')
            
MutList = []

mutsel = widgets.Dropdown(
    options=ResList,
    description='Mutate Residue:',
    disabled=False,
#     rows = 10,
    layout=Layout(width='6cm'),
)
mutsel.style.description_width = '3cm'

aadrop = widgets.Dropdown(
    options=AAList,
    description='to:',
    disabled=False,
    layout=Layout(width='2.5cm')
)
aadrop.style.description_width = '0.5cm'


mutbt = widgets.Button(
    description = 'Mutate',
    layout=Layout(width='2cm')
)


mutdelsel = widgets.Select(
    options=[],
    description='Mutations:',
    disabled=False,
    rows = 0
)

mutdelbt = widgets.Button(
    description = 'Remove',
    layout=Layout(width='2cm')
)

MutBox = widgets.VBox([
        widgets.HBox([
            mutsel,
            aadrop,
            mutbt, 
        ]),
        widgets.HBox([
            mutdelsel,
            mutdelbt
        ])
    ])


def mutbt_onclick(b):
    global MutList
    mut = mutsel.value
    mut_exists = False
    for mutx in MutList:
        if mutx[:-7]==mut:
            mut_exists = True
    if mut_exists:
        print('Error: Residue is already mutated. Please delete and try again.')
        return
    else:
        MutList.append(mut + ' to ' + aadrop.value)
        
    mutdelsel.options = MutList
    mutdelsel.rows = len(MutList)
    
def mutdelbt_onclick(b):
    global MutList
    if len(MutList)>0:
        sel = mutdelsel.value
        ndx = MutList.index(sel)
        del MutList[ndx]
        mutdelsel.options = MutList
        mutdelsel.rows = len(MutList)
    
mutbt.on_click(mutbt_onclick)
mutdelbt.on_click(mutdelbt_onclick)

In [None]:
ChangeList = []
def build_topology(b):
    run_pdb2gmx()

In [None]:
if NGL_DEF:
    pdbview = nv.NGLWidget()
    pdbview._set_size('500px', '500px')
    pdbview.camera = 'orthographic'
else:
    pdbview = widgets.HTML(value='<p style=\"text-align:center; font-size:20px\"><br><br>Install NGLView library<br>to view structures.</p>', 
                          layout=widgets.Layout(width='500px', height='500px'))

gobt = widgets.Button(
    description='Build Topology'
)
gobt.on_click(build_topology)

ModBox = widgets.VBox([])

outacc = widgets.Accordion(children=[])

display(widgets.HBox([
    pdbview, 
    ModBox
]))

display(MutBox)

display(gobt)
display(outacc)