In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
import os,sys
sys.path.append('./misc/lib/python3.7/site-packages')

import math
import numpy as np
import requests
import ipywidgets as widgets
import matplotlib.pyplot as plt
from IPython.display import display, display_markdown
from ipywidgets import Layout, HTML
from pathlib import Path
from scipy import spatial
import string
from IPython.display import FileLink


NGL_DEF = False
try:
    import nglview as nv
    NGL_DEF = True
except:
    NGL_DEF = False
    

import parmed as pmd
import re

from scipy.ndimage import gaussian_filter

np.set_printoptions(precision=8)
np.set_printoptions(suppress=True)


HTMLButtonPrompt = '''<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<a href="{link}" target="_blank" >
<button class="p-Widget jupyter-widgets jupyter-button widget-button mod-warning" style="width:100px; background-color:#E9E9E9; font-size:10pt; color:black">{text}</button>
</a>
</body>
</html>
'''

HTMLDeadPrompt = '''<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
</head>
<body>
<button class="p-Widget jupyter-widgets jupyter-button widget-button mod-warning" style="width:100px; background-color:#E9E9E9; font-size:10pt; color:#D2D2D2">{text}</button>
</body>
</html>
'''

forbidden_strings = ["..", "/", "\\", " ", "~"]

In [3]:
sys.path.append('./main/')

import pigment
import tresp
import pdc
import nsd
import phutil

# Make data storage directory
HOME = os.environ['HOME']
DATADIR = phutil.get_data_dir()

phutil.build_data_folders(DATADIR)

In [4]:
# These names are considered essential to all Chl/BChl molecules
# Without these, we can do no calculations. 
ChlBaseNames = []
for ring in ['A', 'B', 'C', 'D']:
    for at in ['N', 'C1', 'C2', 'C3', 'C4', 'CH']:
        ChlBaseNames.append(at+ring)
ChlBaseNames.append('CAD')
ChlBaseNames.append('CBD')

# These are the atoms of the phytol tail
PhytolNames = []
for n in range(1, 21):
    PhytolNames.append('C'+str(n))
    
# These atoms are 
ChlOptNames = ['CAA', 'CBA', 'CGA', 'O1A', 'O2A', 'CMA', 'CMB', 'CAB', 'CMC', 'CAC', 'CBC', 'CMD', 'OBD'] + PhytolNames

# The tetpy class describes common tetrapyrrole pigments
tetpy = pigment.category('tetrapyrrole', ChlBaseNames)


# These atoms can be used to distinguish between different pigment types
# Note: 
#    The heavy atoms of Chl a, Chl c1, Chl c2, and BChl g are identical (excluding the phytol tail)
#    The heavy atoms of BChl a and BChl b are identical
CLANames = ['MG', 'CBB', 'CGD', 'O1D', 'O2D', 'CED'] + ChlOptNames
CLBNames = ['MG', 'CBB', 'CGD', 'O1D', 'O2D', 'CED', 'OMC']  + ChlOptNames
CLDNames = ['MG',        'CGD', 'O1D', 'O2D', 'CED',        'OBB'] + ChlOptNames
CLFNames = ['MG', 'CBB', 'CGD', 'O1D', 'O2D', 'CED',               'OMB']  + ChlOptNames
BCANames = ['MG', 'CBB', 'CGD', 'O1D', 'O2D', 'CED',        'OBB'] + ChlOptNames
BCBNames = ['MG', 'CBB', 'CGD', 'O1D', 'O2D', 'CED',        'OBB'] + ChlOptNames
BCCNames = ['MG', 'CBB',                                    'OBB',        'CIB'] + ChlOptNames
BCDNames = ['MG', 'CBB',                                    'OBB',               'CND'] + ChlOptNames
BCENames = ['MG', 'CBB',                             'OMC', 'OBB',        'CIB', 'CND'] + ChlOptNames
BCFNames = ['MG', 'CBB',                             'OMC', 'OBB',               'CND'] + ChlOptNames
BCGNames = ['MG', 'CBB', 'CGD', 'O1D', 'O2D', 'CED'] + ChlOptNames

# The corresponding Pheo names are the same, excluding the first entry (MG)
PHANames = CLANames[1:]
PHBNames = CLBNames[1:]
BPANames = BCANames[1:]

CLA = pigment.species(
    'Chl a',
    'CLA',
    tetpy,
    CLANames,
    4.3e-18 # statC*cm
)

CLB = pigment.species(
    'Chl b',
    'CLB',
    tetpy,
    CLBNames, 
    3.60e-18
)

CLD = pigment.species(
    'Chl d',
    'CLD',
    tetpy,
    CLDNames, 
    0.0
)

CLF = pigment.species(
    'Chl f',
    'CLF',
    tetpy,
    CLFNames,
    0.0
)

BCA = pigment.species(
    'BChl a',
    'BCA',
    tetpy,
    BCANames, 
    5.477e-18
)

BCB = pigment.species(
    'BChl b',
    'BCB',
    tetpy,
    BCBNames, 
    0.0
)

BCC = pigment.species(
    'BChl c',
    'BCC',
    tetpy,
    BCCNames,
    0.0
)

BCD = pigment.species(
    'BChl d',
    'BCD',
    tetpy,
    BCDNames, 
    0.0
)

BCE = pigment.species(
    'BChl e',
    'BCE',
    tetpy,
    BCENames, 
    0.0
)

BCF = pigment.species(
    'BChl f',
    'BCF',
    tetpy,
    BCFNames, 
    0.0
)

BCG = pigment.species(
    'BChl g',
    'BCG',
    tetpy,
    BCGNames, 
    0.0
)

PHA = pigment.species(
    'Pheo a',
    'PHA',
    tetpy,
    PHANames,
    3.50e-18
)

UNK = pigment.species(
    'Unknown',
    'UNK',
    tetpy,
    [],
    0.0
)


TetPyList = [
    CLA, CLB, #CLD, CLF, 
    BCA, #BCB, BCC, BCD, BCE, BCF, BCG,
    PHA, 
]

In [13]:
def import_repairs(stdname):
    
    # Empty list to store repair data
    repair_list = []
        
    fname = './misc/repair/templates/'+stdname+'.txt'
    if os.path.isfile(fname):
        
        with open(fname) as fd:
            for line in fd:
                dat = line.strip().split()
                if len(dat)!=5:
                    print('Error reading file ' + fname)
                    print('Aborting pigment repair.')
                    return 
                
                else:
                    repair_list.append(dat)
    return repair_list

def implement_atom(struc, pig, tname, xyz):
    
    if tname[0]=='O':
        atomic_number = 8
    elif tname[0]=='N':
        atomic_number = 7
    else:
        atomic_number = 6
    
    # Identify the residue
    res = pig.residue
    
    # Make a copy of coordinates, since this gets deleted
    # when changes are made to struc
    oldcoords = struc.coordinates.copy()
    newat = pmd.topologyobjects.Atom(name=tname, atomic_number=atomic_number)
    struc.add_atom_to_residue(newat, res)
    pig.atnames.append(tname)
    pig.atcoords = np.append(pig.atcoords, np.zeros((1, 1, 3)), 1)
    pig.atcoords[:,-1] = xyz
    newidx = struc.atoms.index(newat)
    newcoords = np.insert(oldcoords, newidx, xyz, axis=0)
    struc.coordinates = newcoords
    return



# at1 is the base atom. at2 is bonded to it. 
# We want all atoms within rcut of at1 that are NOT
# either at1 or at2
def find_neighbors(at1, at2, pig, rcut, struc):
    data_frame = struc.to_dataframe()

    n1 = -1
    n2 = -1
    # First find index of at1 and at2 in struc
    for at in struc:
        if at.name==at1 and at.residue==pig.residue:
            n1 = at.idx
        if at.name==at2 and at.residue==pig.residue:
            n2 = at.idx

    # Calculate distance from at1 to all other atoms
    dist = np.sum(np.power((struc.coordinates.copy()-struc.coordinates[n1,:]),2),1)

    nbrs = list(np.where(dist<rcut)[0])

    if nbrs.count(n1)>0:
        nbrs.remove(n1)
        
    if nbrs.count(n2)>0:
        nbrs.remove(n2)

    return nbrs


# Returns a list of chains to be repaired AND the biomolecule 
# model to be used for symmetry operations
def get_rep_chains():

    # Identify the selected biomolecule transformation (if any)
    selbiom = []
    for biom in BioList:
        # Only one biom widget should have a checkbox marked "True" 
        if biom.widgbox.children[0].value==True:
            selbiom = biom
            break

    # If a biomolecule was selected, make a list of the necessary chains
    bchains = []
    if selbiom!=[]:
        for trans in selbiom.transforms:
            bchains.extend(trans.chains)

    # ChainList should already be updated to include only selected chains
    RepChains = []
    for chain in ChainList:
        # If either the chain is used by the biomt or there isn't a biomt,
        # store a copy of it in the structure list
        if bchains.count(chain)>0 or selbiom==[]:
            RepChains.append(chain)
            
    return RepChains, selbiom
                        
    

# Adds missing pigment atoms 
def run_repairs(b):
    
    global struc
    global PigList
    
    err = False
    
    # get_rep_chains also returns the biomolecule model, but we don't need it here
    RepChains, biom = get_rep_chains()
    Notes = []
    
    for pig in PigList:
        
        
        # If the chain or pigment is NOT selected, delete it from the structure
        if RepChains.count(pig.residue.chain)==0 or pig.widget.children[0].value==False:
            struc.strip(':' + str(pig.residue.idx+1))
        
        # Otherwise, fix any errors
        else:
            stdname = pig.species.stdname

            bond_length = 1.45

            # Import list of known repairs
            repair_list = import_repairs(stdname)
            for rep in repair_list:    
                # This is a list of target atom(s) (to be added if missing).
                # Multiple atoms are delimited by colons. 
                target_list = rep[0].split(':')

                # This is hybridization (sp2, sp3, etc.) at the target site
                hybr = rep[1]

                # This is the atom to which the target is bonded
                at1 = rep[2]

                # This atom is bonded to at1
                at2 = rep[3]

                # This atom (may be empty) is bonded to at1 or at2, depending on hybr
                at3 = rep[4]

                # If there is only one target
                if len(target_list)==1:

                    target = target_list[0]

                    # Check to see whether target atom is missing
                    if pig.atnames.count(target)==0:
                        Notes.append("Pigment " + pig.species.stdname + ' ' + str(pig.residue.chain) + ':' + str(pig.residue.number) + " is missing atom " + target)

                        # Identify atom indices in pig
                        if pig.atnames.count(at1)==1:
                            ndx1 = pig.atnames.index(at1)
                        else:
                            ndx1 = -1

                        if pig.atnames.count(at2)==1:
                            ndx2 = pig.atnames.index(at2)
                        else:
                            ndx2 = -1

                        if pig.atnames.count(at3)==1:
                            ndx3 = pig.atnames.index(at3)
                        else:
                            ndx3 = -1

                        # Check if all required atoms are present in the pigment.
                        # at1 and at2 are always required. at3 is NOT required if the symbol is '-'.
                        if (ndx1==-1) or (ndx2==-1) or ((ndx3==-1) and (at3)!='-'):
                            Notes.append('Could not add atom because required atoms were missing')

                        # If no error, we found all necessary atoms. 
                        # Now determine coordinates for new atom
                        else:
                            # ax1 points from at2 to at1
                            ax1 = pig.atcoords[0][ndx1] - pig.atcoords[0][ndx2]
                            ax1 /= np.linalg.norm(ax1)

                            # First case: sp3 hybridization with free rotation.
                            if hybr=='sp3' and ndx3==-1:

                                nvec = np.cross(ax1, np.ones((3,)))
                                nvec /= np.linalg.norm(nvec)

                                # Rot1 will rotate ax1 by 60 degrees around nvec.
                                Rot1 = spatial.transform.Rotation.from_rotvec(nvec*(180.0-109.5)*(np.pi/180.0))

                                # d0 is as displacement vector away from the position of at1. 
                                # It will produce a 109.5-degree angle between the at1 -- target and 
                                # at1 -- at2 bond axes. 
                                d0 = Rot1.apply(ax1)*bond_length

                                # Find neighbors within 20.0 Angstrom
                                nbrs = find_neighbors(at1, at2, pig, 20.0, struc)

                                # Now get their coordinates
                                nbrcoords = struc.coordinates.copy()[nbrs,:]

                                # Since we have free rotation, d0 can be rotated around ax1.
                                finxyz = d0 + pig.atcoords[0][ndx1]
                                mndist = 0.0
                                for theta in np.arange(0.0, 360.0, 1.0):

                                    # Try another rotation
                                    Rot2 = spatial.transform.Rotation.from_rotvec(ax1*theta*(np.pi/180.0))
                                    d1 = Rot2.apply(d0)
                                    newxyz = d1 + pig.atcoords[0][ndx1]

                                    # Check for clashes with neighbors. 
                                    # We choose the rotation with the largest separation between the 
                                    # new atom and the surroundings. 
                                    nbrdist = np.sum(np.power((nbrcoords-newxyz),2),1)
                                    if np.min(nbrdist)>mndist:
                                        mndist = np.min(nbrdist)
                                        finxyz = newxyz

                                implement_atom(struc, pig, target, finxyz)
                                Notes.append('Successfully added atom<br>')
                                
                            # Second case: sp2 hybridization with rotation
                            if hybr=='sp2' and ndx3==-1:

                                nvec = np.cross(ax1, np.ones((3,)))
                                nvec /= np.linalg.norm(nvec)

                                # Rot1 will rotate ax1 by 60 degrees around nvec.
                                Rot1 = spatial.transform.Rotation.from_rotvec(nvec*(180.0-120)*(np.pi/180.0))

                                # d0 is as displacement vector away from the position of at1. 
                                # It will produce a 120-degree angle between the at1 -- target and 
                                # at1 -- at2 bond axes. 
                                d0 = Rot1.apply(ax1)*bond_length

                                # Find neighbors within 20.0 Angstrom
                                nbrs = find_neighbors(at1, at2, pig, 20.0, struc)

                                # Now get their coordinates
                                nbrcoords = struc.coordinates.copy()[nbrs,:]
                                
                                # Since we have free rotation, d0 can be rotated around ax1.
                                finxyz = d0 + pig.atcoords[0][ndx1]
                                mndist = 0.0
                                for theta in np.arange(0.0, 360.0, 1.0):

                                    # Try another rotation
                                    Rot2 = spatial.transform.Rotation.from_rotvec(ax1*theta*(np.pi/180.0))
                                    d1 = Rot2.apply(d0)
                                    newxyz = d1 + pig.atcoords[0][ndx1]

                                    # Check for clashes with neighbors. 
                                    # We choose the rotation with the largest separation between the 
                                    # new atom and the surroundings. 
                                    nbrdist = np.sum(np.power((nbrcoords-newxyz),2),1)
                                    if np.min(nbrdist)>mndist:
                                        mndist = np.min(nbrdist)
                                        finxyz = newxyz

                                implement_atom(struc, pig, target, finxyz)
                                Notes.append('Successfully added atom<br>')
                                
                            # Third case: sp3 hybridization without rotation
                            # The at1 - target bond axis is determined exactly
                            # by the positions of at1, at2, and at3. 
                            # Specifically, suppose the axis at1 --> at2 points
                            # in the -x direction, and that at1 --> at3 is in the 
                            # (+x,-y) quadrant of the xy plane. Then the at1 --> target
                            # axis should point out of the xy plane *toward* the viewer. 
                            # Its position is determined as follows: 
                            #  1. Take the average of the at2 and at3 atom positions, call it x0
                            #  2. Let v1 be the unit vector pointing from at1 position to x0. 
                            #  3. Rotate v1 by 120 degrees around the at2 -- at3 bond axis
                            #  4. The new atom position is the rotated v1, scaled by the bond length
                            #     and added to the position of at1. 
                            if hybr=='sp3' and ndx3!=-1:
                                
                                # x0 is the average position of at3 and at2
                                x0 = 0.5*(pig.atcoords[0][ndx3] + pig.atcoords[0][ndx2])
                                
                                # d0 points from at1 to x0 and has length bondlength
                                d0 = x0 - pig.atcoords[0][ndx1]
                                d0 /= np.linalg.norm(d0)
                                d0 *= bond_length
                                
                                # ax23 points from at2 to at3
                                ax23 = pig.atcoords[0][ndx3] - pig.atcoords[0][ndx2]
                                ax23 /= np.linalg.norm(ax23)
                                
                                # The rotation object will rotate by -120 degrees around ax23
                                Rot = spatial.transform.Rotation.from_rotvec(-ax23*120.0*(np.pi/180.0))
                                
                                finxyz = Rot.apply(d0) + pig.atcoords[0][ndx1]
                                
                                implement_atom(struc, pig, target, finxyz)
                                Notes.append('Successfully added atom<br>')
                            
                            # Fourth case: sp2 hybridization without rotation
                            # This is the easiest case: The new atom location should be 
                            # just the average location of at2 and at3, reflected through the 
                            # position of at1 and scaled by the bond length. 
                            if hybr=='sp2' and ndx3!=-1:
                                
                                # x0 is the average position of at3 and at2
                                x0 = 0.5*(pig.atcoords[0][ndx3] + pig.atcoords[0][ndx2])
                                
                                # d0 points from at1 to x0 and has length bondlength
                                d0 = x0 - pig.atcoords[0][ndx1]
                                d0 /= np.linalg.norm(d0)
                                d0 *= bond_length
                                
                                finxyz = pig.atcoords[0][ndx1] - d0
                                implement_atom(struc, pig, target, finxyz)
                                Notes.append('Successfully added atom<br>')

                # If there are two targets
                if len(target_list)==2:
                    tar1 = target_list[0]
                    tar2 = target_list[1]

                    # Check to see whether target atoms are missing
                    if pig.atnames.count(tar1)==0:
                        Notes.append("Pigment " + pig.species.stdname + ' ' + str(pig.residue.chain) + ':' + str(pig.residue.number) + " is missing atom " + tar1)
                        t1miss = True
                    else:
                        t1miss = False

                    if pig.atnames.count(tar1)==0:
                        Notes.append("Pigment " + pig.species.stdname + ' ' + str(pig.residue.chain) + ':' + str(pig.residue.number) + " is missing atom " + tar2)
                        t2miss = True
                    else:
                        t2miss = False


                    # If both are missing, we rotate freely around the bond axis
                    if t1miss and t2miss:

                        # Identify atom indices in pig
                        if pig.atnames.count(at1)==1:
                            ndx1 = pig.atnames.index(at1)
                        else:
                            ndx1 = -1

                        if pig.atnames.count(at2)==1:
                            ndx2 = pig.atnames.index(at2)
                        else:
                            ndx2 = -1

                        if (ndx1==-1) or (ndx2==-1):
                            Notes.append('Could not add atom because required atoms were missing<br>')
                            err = True

                        # Otherwise, we have all we need to add the missing atoms.
                        else:
                            # ax1 points from at2 to at1
                            ax1 = pig.atcoords[0][ndx1] - pig.atcoords[0][ndx2]
                            ax1 /= np.linalg.norm(ax1)

                            # We'll rotate the atom placement around ax1 to minimize 
                            # clashes neighboring atoms

                            # If hybridization is sp2, tar1 and tar2 should be 120 degrees
                            # from each other and from at1.
                            if hybr=='sp2':
                                nvec = np.cross(ax1, np.ones((3,)))
                                nvec /= np.linalg.norm(nvec)

                                # Rot1 will rotate ax1 by 60 degrees around nvec.
                                Rot1 = spatial.transform.Rotation.from_rotvec(nvec*60.0*(np.pi/180.0))
                                Rot2 = spatial.transform.Rotation.from_rotvec(-nvec*60.0*(np.pi/180.0))

                                # d0 is as displacement vector away from the position of at1. 
                                # It will produce a 120-degree angle between the at1 -- target and 
                                # at1 -- at2 bond axes. 
                                d1 = Rot1.apply(ax1)*bond_length

                                # d2 is 120-degrees from d1
                                d2 = Rot2.apply(ax1)*bond_length

                                # Find neighbors within 20.0 Angstrom
                                nbrs = find_neighbors(at1, at2, pig, 20.0, struc)

                                # Now get their coordinates
                                nbrcoords = struc.coordinates.copy()[nbrs,:]

                                # Since we have free rotation, d0 can be rotated around ax1.
                                finxyz1 = d1 + pig.atcoords[0][ndx1]
                                finxyz2 = d2 + pig.atcoords[0][ndx1]
                                mndist = 0.0
                                for theta in np.arange(0.0, 360.0, 1.0):

                                    # Try another rotation
                                    Rot3 = spatial.transform.Rotation.from_rotvec(ax1*theta*(np.pi/180.0))
                                    d1x = Rot2.apply(d1)
                                    d2x = Rot2.apply(d2)

                                    # Rotated coordinates for tar1 and tar2
                                    newxyz1 = d1 + pig.atcoords[0][ndx1]
                                    newxyz2 = d2 + pig.atcoords[0][ndx1]

                                    # Check for clashes with neighbors. 
                                    # We choose the rotation with the largest separation between the
                                    # new atom and the surroundings. 
                                    nbrdist1 = np.sum(np.power((nbrcoords-newxyz1),2),1)
                                    nbrdist2 = np.sum(np.power((nbrcoords-newxyz2),2),1)
                                    if min(np.min(nbrdist1), np.min(nbrdist2))>mndist:
                                        mndist = min(np.min(nbrdist1), np.min(nbrdist2))
                                        finxyz1 = newxyz1
                                        finxyz2 = newxyz2

                                implement_atom(struc, pig, tar1, finxyz1)
                                implement_atom(struc, pig, tar2, finxyz2)
                                Notes.append('Successfully added 2 atoms<br>')
                    elif (t1miss==False and t2miss==True) or (t1miss==True and t2miss==False):
                        Notes.append('Sorry, this repair is not yet implemented<br>')
                        err = True
    
    
    # Determine file name for output
    fname = DATADIR + "/pdb/" + writetxt.value
    if fname[-4:]!='.pdb':
        fname += '.pdb'
    
    # Finally, generate a new structure, using the 
    # user-requested biomolecular symmetry
    gen_struc(fname)
    
    if err==True:
        outtxt = "<p style=\"font-size:14pt;\">Not all repairs were completed successfully. See details below.</p>"
    else:
        outtxt = "<p style=\"font-size:14pt;\">All repairs successful. See details below.</p>"
    
    outtxt += "<p style=\"font-size:14pt;\">Click <a href=\"" + fname + "\" download>here</a> to download structure.<br><br></p>"
    for line in Notes:
        outtxt += "<p style=\"font-size:10pt;\">" + line + "</p>"
        
    
    stattxt.value = outtxt
    

In [14]:
def gen_struc(fname):
    
    # First identify which chains are to be used
    chains, biom = get_rep_chains()
    
    # Make a list of new chain characters that are available. 
    # We first try capital letters, then lowercase, then numbers 0 - 9
    chain_chars = [char for char in string.ascii_uppercase]
    chain_chars.extend([char for char in string.ascii_lowercase])
    chain_chars.extend([str(num) for num in range(0, 10)])
        
    # Create an empty list of structures to store results in
    strucList = []
    
#     For each chain, generate all new structures built from it
    for chain in chains:
        
        # If a biomolecule symmetry transform is specified, apply it
        used_chains = []
        if biom != []:
            for transf in biom.transforms:
                if transf.chains.count(chain)>0:
                    for mat in transf.matrices:
                        strucList.append(struc[chain,:,:])
                        oldcoords = strucList[-1].coordinates
                        newcoords = np.transpose(mat[0:3,0:3]@(oldcoords.T))
                        newcoords += mat[:,3]
                        strucList[-1].coordinates = newcoords
                        
                        # Assign chain character
                        ch = strucList[-1].residues[0].chain
                        if ch in used_chains:
                            newchain = chain_chars[0]
                            del chain_chars[0]
                            for res in strucList[-1].residues:
                                res.chain = newchain
                        
                        # Add to used-chains list
                        used_chains.append(ch)
                        
                        # remove from chain_chars so it won't be used again
                        if ch in chain_chars:
                            chain_chars.remove(ch)
                            
        # If no biomolecule symmetry transform is indicated, just copy all selected chains
        else:
            strucList.append(struc[chain,:,:])
                        
    # If anything is selected, add 
    if len(strucList)>0:
        selstruc = strucList[0]
        
        # Add the remaining substrucs
        for n in range(1, len(strucList)):
            selstruc = selstruc + strucList[n]
    
    selstruc.write_pdb(fname, altlocs='first')
    init_struc(fname)
        
        

In [15]:
class biotrans:
    def __init__(self, text):
        self.text = text
        chains = []
        matrices = []
        cmat = []
        for line in text:
            if 'REMARK 350 APPLY THE FOLLOWING TO CHAINS:' in line:
                chtext = line.split(':')[1].strip()
                chains = chtext.split(', ')

            if re.search("REMARK 350[ ]*AND CHAINS:", line):
                chtext = line.split(':')[1].strip()
                chains.extend(chtext.split(', '))
            
            if re.search("REMARK 350[ ]*BIOMT[0-9]*", line):
                data = line.split()
                num = int(data[3])
                col1 = float(data[4])
                col2 = float(data[5])
                col3 = float(data[6])
                shift = float(data[7])
                row = np.array([col1, col2, col3, shift])
                
                cmat.append(row)
                
                if len(cmat)==3:
                    matrices.append(np.array(cmat))
                    cmat = []
                    
        self.matrices = matrices
        self.chains = chains



class biomolecule:
    def __init__(self, text):
        self.text = text
        self.number = -1
        self.transforms = []
        self.quat = ''
        self.widgbox = []
        
        # We divide the text into blocks describing different transforms
        blocks = []
        cblock = []
        for line in text:
            
            # Set quaternary structure
            if 'SOFTWARE DETERMINED QUATERNARY STRUCTURE:' in line:
                # Use software-determined value only if author-determined
                # value is missing
                if len(self.quat)==0:
                    self.quat = line.split(':')[1].strip()
                
            # Always use author-determined value if present
            if 'AUTHOR DETERMINED BIOLOGICAL UNIT:' in line:
                self.quat = line.split(':')[1].strip()
            
            # Assign the biomolecule number
            if 'REMARK 350 BIOMOLECULE:' in line:
                self.number = int(line.split(':')[1])
                
            # Start a new cblock
            if 'REMARK 350 APPLY THE FOLLOWING TO CHAINS:' in line:
                
                # If currently recording, stash and restart
                if len(cblock)>0:
                    # append the current block to the block list
                    blocks.append(cblock)

                    # reset cblock to empty
                    cblock = []
                
                # Either way, start storing a new block
                cblock.append(line)
            
            # If we're currently recording a block, append new lines to it
            elif len(cblock)>0:
                cblock.append(line)
                
        # When we run out of data, append any in-progress blocks
        if len(cblock)>0:
            blocks.append(cblock)
                
        for block in blocks:
            self.transforms.append(biotrans(block))

            
        

def find_biomt(fname):
    
    # List of biomolecules
    global BioList
    BioList = []
    
    #out = !{'grep \"REMARK 350\"' + fname}
    out = !{'grep REMARK\ 350 ' + fname}
    
    # List of all biomolecule entries
    biotext = []
    
    # Current biomolecule entry
    ctext = []
    for line in out:
        
        # When we find a new biomolecule entry
        if 'REMARK 350 BIOMOLECULE:' in line:
            
            # If we're already recording a biomolecule entry, 
            # stash the current one and start again
            if len(ctext)>0:
                biotext.append(ctext)
                ctext = []
                
            # Either way, append the new line to ctext
            ctext.append(line)
        else:
            # If we're currently adding a biomolecule entry,
            # add any new lines
            if len(ctext)>0:
                ctext.append(line)
                
    # When we run out of text, stash any unfinished cmol's
    if len(ctext)>0:
        biotext.append(ctext)
    
    # Identify biological units from REMARK 350 entries
    for biot in biotext:
        BioList.append(biomolecule(biot))
    
    cb = widgets.Checkbox(True, description='<b>None</b>', width='0cm')
    cb.style.description_width='0cm'
    cb.observe(update_biobox)
    boxlist = [cb]
    
    for biol in BioList:
        cb = widgets.Checkbox(False, description='<b>Unit '+str(biol.number)+'</b>', width='0cm')
        cb.style.description_width='0cm'
        cb.observe(update_biobox)
        descr = widgets.HTML(value='Structure: ' + biol.quat + '<br>')

        for biom in biol.transforms:
            descr.value = descr.value + 'Chains: ' + ', '.join(biom.chains) + '<br>'
            
        # Just for spacing
        buffer = widgets.HTML(value='', 
                     layout=Layout(width='0.5cm'))
        
        bbox = widgets.Box([cb, widgets.Box([buffer, descr])], 
                   layout=Layout(flex_flow='column', align_items='flex-start'))
        boxlist.append(bbox)
        biol.widgbox = bbox
        
    symbox.children = boxlist
    
    
def update_biobox(b):
    global BioList
    
    # Uncheck "none" if another box has been checked
    if b.owner != symbox.children[0]:
        symbox.children[0].value = False
    
    # Uncheck all options except the newly selected one
    for biol in BioList:
        if biol.widgbox.children[0]!=b.owner:
            biol.widgbox.children[0].value = False
    


In [16]:
# Global variables:

# Main ParmEd structure
struc = pmd.structure.Structure()

# File name frm which struc was loaded
struc_fname = ''

# Representation list for main structure
mainreps = list()

# Representation list for dipoles
dipreps = list()

# List of chains in struc
ChainList = []

# List of identified pigments
PigList = []

# List of identified biological units
BioList = []

##################################################################
# Main frame layout:
##################################################################

# mainbox is the top widget. It contains:
#  pdbview -- an NGLWidget used to display the loaded structures
#  mainacc -- an accordion widget containing the following VBoxes:
#    strucbox -- contains the "Load" interface
#    selbox -- contains the "Select" interface

# Structure viewer:
if NGL_DEF:
    pdbview = nv.NGLWidget()
    pdbview._set_size('500px', '500px')
    pdbview.camera = 'orthographic'
    pdbview.background = 'white'
else:
    pdbview = widgets.HTML(value='<p style=\"text-align:center; font-size:20px\"><br><br>Install NGLView library<br>to view structures.</p>', 
                          layout=widgets.Layout(width='500px', height='500px'))


##################################################################
# strucbox: Interface for loading molecular structures
##################################################################

# pdbid: Text entry box for PDB ID 
# pdbidlbl: Label for pdbid
# pdbfetch: Button to fetch PDB from the RCSB databank
# pdbup: Button to upload PDB file
# pdbuplbl: Label for pdbup


pdbid = widgets.Text(
    value='2DRE',
    placeholder='',
    layout = widgets.Layout(width='1.5cm'),
    disabled=False
)

pdbidlbl = widgets.Label(value='Enter PDB ID:', layout=Layout(width='2.5cm'))

pdbfetch = widgets.Button(
    description='Fetch',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to display the pdb file',
    layout = widgets.Layout(width='2.25cm'),
    icon='' # (FontAwesome names without the `fa-` prefix)
)

pdbup = widgets.FileUpload(
    accept='.pdb, .gro',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)

pdbuplbl = widgets.Label(value='Or upload file:', layout=Layout(width='2.5cm'))

# Executed with pdbfetch is clicked
def pdbfetch_onclick(b):

    url = 'http://files.rcsb.org/download/' + pdbid.value + '.pdb'
    r = requests.get(url, allow_redirects=True)
    if(r.status_code!=200):
        print('Invalid PDB code. Please try again.')
    else:
        fname = DATADIR + '/pdb/'+pdbid.value+'.pdb'
        wfd = open(fname, 'wb')
        wfd.write(r.content)
        wfd.close()
        init_struc(fname)
        stattxt.value = ''

# Executed when pdbup is clicked
def pdbup_on_value_change(change):
    
    for item in pdbup.value:
            fname = item
        
    with open(DATADIR + "/pdb/"+fname, "wb") as fp:
        fp.write(pdbup.value[fname]["content"])
    fp.close()
    init_struc(DATADIR + "/pdb/"+fname)  
        
pdbup.observe(pdbup_on_value_change, 'value')



strucbox = widgets.VBox([
    widgets.HBox([pdbidlbl, pdbid, pdbfetch]), 
    widgets.HBox([pdbuplbl, pdbup]),
])

# For now the displayed dipole structure is completely
# independent of the classified pigments. 
def build_dipstruc(struc):
    
    dipstruc = pmd.structure.Structure()
    
    dipcoords = []
    porphlist = list()
    porphtxt = '('
    PorphAts = ['NA', 'NB', 'NC', 'ND']
    for res in struc.residues:

        # Check if it's a porphyrrin
        foundNs = np.zeros((len(PorphAts),))
        patnums = np.zeros((len(PorphAts),), dtype='int')
        for at in res:
            for n in range(0, len(PorphAts)):
                if at.name==PorphAts[n]:
                    foundNs[n] = 1
                    patnums[n] = at.idx
        
        # If we located all four ring N atoms
        if np.sum(foundNs)==4:
            cenvec = 0.5*(struc.coordinates[patnums[1]] + struc.coordinates[patnums[3]])
            dipvec = (struc.coordinates[patnums[3]] - struc.coordinates[patnums[1]])
            vStart = cenvec - 1.25*dipvec
            vStop = cenvec + 1.25*dipvec
            Nats = 10
            for a in range(0, Nats+1):
                xyz = (float(a)/float(Nats))*vStart + (1 - float(a)/float(Nats))*vStop
                dipstruc.add_atom(pmd.topologyobjects.Atom(name='N'), 'Dip', res.idx, chain=res.chain)
                dipcoords.append(xyz)

            porphlist.append(res.idx)
            if len(porphtxt)>1:
                porphtxt += ' OR '
            porphtxt += str(res.idx+1)
    porphtxt += ')'

    # If any porphyrrins have been located, add the coordinates to the dipole list. 
    if len(dipcoords)>0:
        dipstruc.coordinates = np.array(dipcoords)
    else: 
        porphtxt = ""
        
    return dipstruc, porphtxt
    
    
    

# init_struc() loads a structure from the provided file name
# and initializes the structure view representations and chainlists
#
# Outside references:
#   sets pigbox.children and chainbox.children each time a new structure is loaded
#
# Relies on environment variables:
#   UNK
#   tetpy
#   TetPyList
def init_struc(fname):

    global mainreps
    global dipreps
    global struc
    global struc_fname
    global ChainList
    global chainbox
    global pigbox
    global PigList
    
    symbox.children = []
    
    if NGL_DEF:
        # Clear pdbview stage
        while len(pdbview._ngl_component_ids)>0:
            pdbview.remove_component(pdbview._ngl_component_ids[0])
    
    # Reset rep and chain lists
    mainreps = list()
    dipreps = list()
    
    # Clear outputs
    repout.value = ''
    
    struc = pmd.load_file(fname)
    struc_fname = fname
    dipstruc, porphtxt = build_dipstruc(struc)
    
    ChainList = []
    for res in struc.residues:
        # If the chain is not already listed, add it
        if ChainList.count(res.chain)==0:
            ChainList.append(res.chain)
    
    chaintxt = '('
    for chain in ChainList:
        if len(chaintxt)>1:
            chaintxt += ' OR '
        chaintxt += ':' + chain
    chaintxt += ')'

    if NGL_DEF:
        pdbview.add_trajectory(struc)
        mainreps = list()
        mainreps.append({"type": "cartoon", "params": {"color": "grey", "sele": "(protein) AND " + chaintxt, "opacity": "0.2"}})
        if len(porphtxt)>0:
            mainreps.append({"type": "licorice", "params": {"color": "green", "sele": porphtxt + ' AND ' + chaintxt, "opacity": "1.0"}})
        pdbview.set_representations(mainreps, component=0)

        if len(dipstruc.atoms)>0:
            pdbview.add_trajectory(dipstruc)
            dipreps = [{"type": "licorice", "params": {"color": "red", "sele": chaintxt, "opacity": "1", "radius": "0.35"}}]
            pdbview.set_representations(dipreps, component=1)

    # Assign tetrapyrrole types:
    # 1. Identify tetrapyrrole rings
    PigNdcs = pigment.find_pigments(tetpy.BaseNames, struc)

    # 2. Check which types are definitely excluded for each pigment
    alist = pigment.eliminate_types(PigNdcs, tetpy, TetPyList, UNK, struc)

    # 3. Now check if all xatoms of each type are present
    mlist = pigment.match_types(PigNdcs, alist, struc)
    
    # 4. Based on this data, assign pigment types
    tlist = pigment.assign_pigments(PigNdcs, mlist, alist, struc)
    
    PigList = []
    # Record pigments:
    for p in range(0, len(PigNdcs)):
        ndx = PigNdcs[p]
        res = struc.residues[ndx]
        atom_ndcs = []
        atom_names = []
        for at in res:
            atom_ndcs.append(at.idx)
            atom_names.append(at.name)
        atom_ndcs = np.array(atom_ndcs, dtype='int')
#         atcoords = struc.coordinates[atom_ndcs]

        # First dimension is frame #, second is atom number, third is x,y,z
        atcoords = struc.get_coordinates().copy()[:,atom_ndcs,:]
        PigList.append(pigment.pigment(ndx, tlist[p], alist[p], res, atom_names, atcoords))
    
    build_pigbox()
    
    chaincbs = []
    for chain in ChainList:
        chaincbs.append(widgets.Checkbox(value=True, description=chain,indent=False, layout=Layout(width='100px')))
    for cb in chaincbs:
        cb.observe(update_chains)
    chainbox.children = chaincbs
    
    if 'data/pdb/' in fname:
        prefix = fname.split('.')[-2].split('/')[-1]
    elif 'data/md/' in fname:
        prefix = fname.split('.')[-2].split('/')[-3]
    else:
        prefix = 'test'
    
    writetxt.value = prefix+'.pdb'
    
    find_biomt(fname)
    
    repbt.disabled = False
    
    mainacc.selected_index = 3
    

# Syncs structure display to selected chains in chainbox
def update_chains(b):
    global dipreps
    global mainreps 
    global ChainList
    
    ChainList = []
    for cb in chainbox.children:
        if cb.value==True:
            ChainList.append(cb.description)
        
    chaintxt = ''
    for chain in ChainList:
        if len(chaintxt)>0:
            chaintxt += " OR "
        chaintxt += ":" + chain
        
    # If no chain is selected, set to a nonsense chain
    # so that none will be displayed.
    if len(chaintxt)==0:
        chaintxt = ':XXXXXXXXXX'

    if NGL_DEF:
        for rep in mainreps:
            #rep['params']['sele'] = re.sub('(:[^)]+)', chaintxt, rep['params']['sele'])
            splt = rep['params']['sele'].split('(')
            newstr = ''
            for n in range(1, len(splt)-1):
                newstr += "(" + splt[n]
            newstr += '(' + chaintxt + ")"
            rep['params']['sele'] = newstr
        pdbview.set_representations(mainreps, component=0)

        #dipreps[0]['params']['sele'] = re.sub('(:[^)]+)', chaintxt, rep['params']['sele'])
        if len(dipreps)>0:
            splt = dipreps[0]['params']['sele'].split('(')
            newstr = ''
            for n in range(1, len(splt)-1):
                newstr += "(" + splt[n]
            newstr += '(' + chaintxt + ")"
            dipreps[0]['params']['sele'] = newstr
            pdbview.set_representations(dipreps, component=1)
    
pdbfetch.on_click(pdbfetch_onclick)

##################################################################
# selbox: Interface for selecting chains and residues
##################################################################

# Label for chain list
chainlbl = widgets.Label(value='Chain:')

# The children (i.e., options) of chainbox are set in pdbfetch_onclick()
# when a new structure is loaded. 
chainbox = widgets.VBox([])

selall = widgets.Button(
    description='All',
    disabled=False,
    tooltip='Click to select all chains',
    layout = widgets.Layout(width='1.5cm'),
)

selnone = widgets.Button(
    description='None',
    disabled=False,
    tooltip='Click to select all chains',
    layout = widgets.Layout(width='1.5cm'),
)

def selall_onclick(b):
    for cb in chainbox.children:
        cb.value = True
selall.on_click(selall_onclick)

def selnone_onclick(b):
    for cb in chainbox.children:
        cb.value = False
selnone.on_click(selnone_onclick)

selbox = widgets.VBox([widgets.HBox([selall, selnone]), chainlbl, chainbox])


##################################################################
# symbox: Interface for generating biological multimers
##################################################################

symbox = widgets.Box([], 
                      layout=Layout(flex_flow='column',
                                   align_items='flex-start',
                                   layout=Layout(width='3cm')))


##################################################################
# repbox: Interface for repairing pigments
##################################################################

writetxt = widgets.Text(value='2DRE.pdb', description='File Name:', disabled=False, layout=widgets.Layout(width='5cm'))

reptext = widgets.HTML('Search for and \"sprout\" missing atoms.')
repbt = widgets.Button(description='Go!', disabled=True)
repbt.on_click(run_repairs)
repout = widgets.HTML('')
repbox = widgets.Box([writetxt, reptext, repbt, repout], 
                      layout=Layout(flex_flow='column',
                                   align_items='center',
                                   layout=Layout(width='3cm')))




#######################################################
# Main Box
#######################################################


def update_pigtypes(change):
    global PigList
    
    for pig in PigList:
        if pig.widget.children[1].value!=pig.species.name:
            for typ in pig.alist:
                if typ.name==pig.widget.children[1].value:
                    pig.species = typ
                    break

def build_pigbox():
    global pigbox
    global PigList
    
    pigbox.layout = Layout(display='flex',
                    flex_flow='row wrap',
                    align_items='stretch',
                    width='100%')

    hlist = []
    for chain in ChainList:
        chwidglist = []
        for p in range(0, len(PigList)):
            pig = PigList[p]
            res = struc.residues[pig.idx]
            if res.chain==chain:
                dropbox = widgets.Dropdown(
                            options=[typ.name for typ in pig.alist],
                            value=pig.species.name,
                            description='',#res.name + ' ' + chain + str(res.number),
                            disabled=False,
                            layout=Layout(width='2cm')
                        )
                dropbox.observe(update_pigtypes, 'value')
                pigcb = widgets.Checkbox(
                    value=True,
                    description=res.name + ' ' + chain + str(res.number),
                    disabled=False,
                    indent=False,
                    layout=Layout(width='2.5cm')
                )
                
                # Note that pigment widget definition here differs from usage in main.ipynb.
                # Here pig.widget includes both the dropbox AND a select/exclude indicator
                pig.widget = widgets.Box([pigcb, dropbox], 
                                      layout=Layout(flex_flow='row',
                                                    align_items='flex-start',
                                                    width='5.5cm'))
                chwidglist.append(pig.widget)
        if len(chwidglist)>0:
            lbl = widgets.Label(value='Chain ' + chain + ":")
            hlist.append(widgets.VBox([lbl] + chwidglist))
    pigbox.children = hlist

mainacc = widgets.Accordion(children=[strucbox, selbox, symbox, repbox], layout=Layout(width='8cm'))
mainacc.set_title(0, 'Load')
mainacc.set_title(1, 'Select')
mainacc.set_title(2, 'Bio Unit')
mainacc.set_title(3, 'Run Repairs')

stattxt = widgets.HTML(value='')

pigbox = widgets.HBox()
pigacc = widgets.Accordion(children=[pigbox])
pigacc.set_title(0, 'Pigment List')


mainbox = widgets.VBox([widgets.HBox([pdbview, mainacc]), stattxt, pigacc])
display(mainbox)


VBox(children=(HBox(children=(NGLWidget(), Accordion(children=(VBox(children=(HBox(children=(Label(value='Ente…