# Removal of Middle Waters from United Atom Bilayers post gmx_solvate

Naveen Mohideen, Cornell University, Feigenson Lab    10.17.18

In [1]:
from __future__ import print_function
%matplotlib inline
import mdtraj as md
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon
#import scipy.cluster.hierarchy
import scipy.spatial

In [None]:
traj=md.load('ua_symRho=ptseven-gmxsolvated.xtc',top='ua_symRho=ptseven-gmxsolvated.gro')

In [None]:
name='ua_symRho=ptseven-gmxsolvated'

In [None]:
traj.n_frames

In [273]:
topology=traj.topology
allhead=topology.select('name P8 or name P11')
water=topology.select('name O')

In [274]:
resnames=[atom.residue.name for atom in topology.atoms]

# 1: Sort Lipids into Upper/Lower Leaflet

In [275]:
#x,y coordinates of all for finding neighbors to determine local midplane z-value
# here, best to use xy radius of neighbors, since Voronoi across 2 leaflets could have complications

allheadxy=list([] for _ in xrange(traj.n_frames))
allheadz=list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames):
    for i in range(len(allhead)):
        allheadi=allhead[i]
        allheadxy[nn].append(traj.xyz[nn][allheadi][0:2:1])
        allheadz[nn].append(traj.xyz[nn][allheadi][2])


In [276]:
# mdtraj can't handle neighbors for gro (rather than xtc) inputs, so do it myself
#cutoffsq = 2.4**2; #square of maximum xy distance for neighbors
cutoffsq=1.0**2
neigh=list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames):
    neigh[nn]=list([] for _ in xrange(len(allhead)))
    for i in range(len(allheadxy[nn])):
        xyi=allheadxy[nn][i]
        for j in range(len(allhead)-i-1): #not self, but will add for midplane finding
            xyj=allheadxy[nn][j+i+1]
            distsq=(xyi[0]-xyj[0])**2 + (xyi[1]-xyj[1])**2
            if (distsq < cutoffsq):
                neigh[nn][i].append(j+i+1)
                neigh[nn][j+i+1].append(i)
  

In [277]:
#new leaflet id method based on tilt angles; PO4-C4A or ROH-C1 (both +6 beads)
num_head=len(allhead)
tiltvectors=list([] for _ in xrange(traj.n_frames))

for nn in range(traj.n_frames):
    tiltvectors[nn]=list([] for _ in xrange(num_head)) #store vector roh-c1 or po4-c4a
    
    for i in range(num_head):
        tiltvectors[nn][i]=traj.xyz[nn][allhead[i]]-traj.xyz[nn][allhead[i]+6]


In [278]:
# must fix periodic boundary condition errors in tiltvectors, then calculate tiltangle
norms=list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames):
    norms[nn]=list([] for _ in xrange(len(allhead)))
    halfx=0.5*traj.unitcell_lengths[nn][0]
    halfy=0.5*traj.unitcell_lengths[nn][1]
    halfz=0.5*traj.unitcell_lengths[nn][2]
    for i in range(len(allhead)):
        norms[nn][i]=np.linalg.norm(tiltvectors[nn][i])
        if (norms[nn][i] > halfz):
            if (np.abs(tiltvectors[nn][i][0]) > halfx):
                if (tiltvectors[nn][i][0]>0): 
                    tiltvectors[nn][i][0]=tiltvectors[nn][i][0]-2*halfx
                else:
                    tiltvectors[nn][i][0]=tiltvectors[nn][i][0]+2*halfx
            if (np.abs(tiltvectors[nn][i][1]) > halfy):
                if (tiltvectors[nn][i][1]>0): 
                    tiltvectors[nn][i][1]=tiltvectors[nn][i][1]-2*halfy
                else:
                    tiltvectors[nn][i][1]=tiltvectors[nn][i][1]+2*halfy
            if (np.abs(tiltvectors[nn][i][2]) > halfz):
                if (tiltvectors[nn][i][2]>0): 
                    tiltvectors[nn][i][2]=tiltvectors[nn][i][2]-2*halfz
                else:
                    tiltvectors[nn][i][2]=tiltvectors[nn][i][2]+2*halfz

    for i in range(len(allhead)):
        norms[nn][i]=np.linalg.norm(tiltvectors[nn][i])


In [279]:
# use neigh to find local average tilt vector, outliers are not in a leaflet
# if pointing neg in z, flip in x,y,z for making average in outer leaflet
localvector=list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames):
    localvector[nn]=list([] for _ in xrange(len(allhead)))
    for i in range(len(allhead)):
        localvectorsx=[]
        localvectorsy=[]
        localvectorsz=[]
        sgn=np.sign(tiltvectors[nn][i][2])
        localvectorsx.append(sgn*tiltvectors[nn][i][0]) #include self
        localvectorsy.append(sgn*tiltvectors[nn][i][1]) #include self
        localvectorsz.append(sgn*tiltvectors[nn][i][2]) #include self
        for j in range(len(neigh[nn][i])):
            sgn=np.sign(tiltvectors[nn][neigh[nn][i][j]][2])
            localvectorsx.append(sgn*tiltvectors[nn][neigh[nn][i][j]][0])
            localvectorsy.append(sgn*tiltvectors[nn][neigh[nn][i][j]][1])
            localvectorsz.append(sgn*tiltvectors[nn][neigh[nn][i][j]][2])
        localvector[nn][i]=[np.mean(localvectorsx),np.mean(localvectorsy),np.mean(localvectorsz)]
       

In [280]:
# find angle between orientation vector and local average orientation vector for each lipid
# in range [0,180]
diffangle=list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames):
    diffangle[nn]=list([] for _ in xrange(len(allhead)))
    for i in range(len(allhead)):
        normlv=np.linalg.norm(localvector[nn][i])
        normtv=np.linalg.norm(tiltvectors[nn][i])
        cos=np.dot(localvector[nn][i],tiltvectors[nn][i])/(normlv*normtv)
        if (cos==0):
            diffangle[nn][i]=90
        elif (cos==1 and np.sign(localvector[nn][i][2])==np.sign(tiltvectors[nn][i][2])):
            diffangle[nn][i]=0
        elif (cos==1 and np.sign(localvector[nn][i][2])==np.sign(tiltvectors[nn][i][2])):
            diffangle[nn][i]=180
        else:
            diffangle[nn][i]=np.arccos(cos)*180./np.pi


In [281]:
#have array with values placing each head in one leaflet: 0=lower, 1=upper, 2=between
head_leaflet=list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames):
    head_leaflet[nn]=list([] for _ in xrange(len(allhead)))
    for i in range(len(allhead)):
        if (diffangle[nn][i]>120):
            head_leaflet[nn][i]=0
        elif (diffangle[nn][i]<60):
            head_leaflet[nn][i]=1
        else:
            head_leaflet[nn][i]=2

In [282]:
# want to identify all midplane chol and remove all "midplane" phospholipids
# tilt angle insufficient
# chol: find distance from ROH to nearest PO4; if beyond threshold, then in midplane; 
# else, copy leaflet ID (do for all phospholipids); only if copying non-midplane

for nn in range(traj.n_frames):
    fixedchol=0
    fixedphos=0
    midplanechol=0
    for i in range(len(allhead)):
        mindist=100.0
        if (head_leaflet[nn][i]==2 and (not resnames[allhead[i]]=='CHOL')): #phospholipids, must assign
            for j in range(len(neigh[nn][i])):
                if (not resnames[allhead[neigh[nn][i][j]]]=='CHOL'):
                    dist=np.linalg.norm(traj.xyz[nn][allhead[i]]-traj.xyz[nn][allhead[neigh[nn][i][j]]])
                    if (dist < mindist and (not head_leaflet[nn][neigh[nn][i][j]]==2)):
                        mindist=dist
                        correct_leaflet=head_leaflet[nn][neigh[nn][i][j]]
            head_leaflet[nn][i]=correct_leaflet
            fixedphos=fixedphos+1
    #must fix all phospholipids before all chols, since some may use fixed phospholipid leaflet id
    # so, must go through allhead twice
    for i in range(len(allhead)):
        mindist=100.0
        if (resnames[allhead[i]]=='CHOL'): #all chol, ignoring angle
            for j in range(len(neigh[nn][i])):
                if (not resnames[allhead[neigh[nn][i][j]]]=='CHOL'):
                    dist=np.linalg.norm(traj.xyz[nn][allhead[i]]-traj.xyz[nn][allhead[neigh[nn][i][j]]])
                    if (dist < mindist and (not head_leaflet[nn][neigh[nn][i][j]]==2)):
                        mindist=dist
                        correct_leaflet=head_leaflet[nn][neigh[nn][i][j]]
            if (mindist<1.4): #close to a headgroup, so in a leaflet
                head_leaflet[nn][i]=correct_leaflet
                fixedchol=fixedchol+1
            else:
                head_leaflet[nn][i]=2
                midplanechol=midplanechol+1

In [283]:
# only need upper leaflet heads for this situation
upperheads=list([] for _ in xrange(traj.n_frames))
lowerheads=list([] for _ in xrange(traj.n_frames))
middle=list([] for _ in xrange(traj.n_frames))
waterhead=list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames):
#    upperheads[nn]=[]
    for i in range(len(allhead)):
        if head_leaflet[nn][i]==1:
            upperheads[nn].append(allhead[i])
        if head_leaflet[nn][i]==0:
            lowerheads[nn].append(allhead[i])
        if head_leaflet[nn][i]==2:
            middle[nn].append(allhead[i])

for nn in range(traj.n_frames): 
    for i in range(len(water)): 
        waterhead[nn].append(water[i])

In [284]:
#find xyz coordinates of water

waterheadxy=list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames):
    for i in range(len(waterhead[nn])):
        waterheadi=waterhead[nn][i]
        waterheadxy[nn].append(traj.xyz[nn][waterheadi][0:3:1])


In [285]:
upperheadxy=list([] for _ in xrange(traj.n_frames)) #[frame][head]
for nn in range(traj.n_frames):
#    headxy[nn]=list([] for _ in xrange(len(upperheads[nn])))
    for i in range(len(upperheads[nn])):
        upperheadsi=upperheads[nn][i]
        upperheadxy[nn].append(traj.xyz[nn][upperheadsi][0:3:1])

lowerheadxy = list([] for _ in xrange(traj.n_frames))
for nn in range(traj.n_frames): 
    for i in range(len(lowerheads[nn])): 
        lowerheadsi=lowerheads[nn][i]
        lowerheadxy[nn].append(traj.xyz[nn][lowerheadsi][0:3:1])
        

# 2: Find Lipid Opposite Leaflet Neighbors

In [286]:
#find closest neighbors between upperheads and lowerheads 
#lipid_opp_distance = list([] for _ in xrange(traj.n_frames))
lipid_opp_neighbors = list([] for _ in xrange(traj.n_frames))

for nn in range(traj.n_frames): 
    for i in range(len(upperheadxy[nn])): 
        pairs=[]
        for j in range(len(lowerheadxy[nn])): 
            r=(upperheadxy[nn][i][0]-lowerheadxy[nn][j][0])**2 + (upperheadxy[nn][i][1]-lowerheadxy[nn][j][1])**2
            if r<5:
                pairs.append(lowerheads[nn][j])
        lipid_opp_neighbors[nn].append([i,pairs])
        

In [361]:
#find closest neighbors between upperheads and lowerheads 
#lipid_opp_distance = list([] for _ in xrange(traj.n_frames))
lipid_opp_neighbors_rev = list([] for _ in xrange(traj.n_frames))

for nn in range(traj.n_frames): 
    for i in range(len(lowerheadxy[nn])): 
        pairs=[]
        for j in range(len(upperheadxy[nn])): 
            r=(upperheadxy[nn][j][0]-lowerheadxy[nn][i][0])**2 + (upperheadxy[nn][j][1]-lowerheadxy[nn][i][1])**2
            if r<5:
                pairs.append(upperheads[nn][j])
        lipid_opp_neighbors_rev[nn].append([i,pairs])
        

In [365]:
#sort through the list of close neighbors and find closest lipid
lipid_pairs = list([] for _ in xrange(traj.n_frames))
lipid_distance= list([] for _ in xrange(traj.n_frames))

for nn in range(traj.n_frames): 
    for i in range(len(upperheads[nn])): 
        lipid_distance[nn].append(10000000)
        lipid_pairs[nn].append([i,0])
        
        
for nn in range(traj.n_frames): 
    for i in range(len(lipid_opp_neighbors[nn])):
        for j in range(len(lipid_opp_neighbors[nn][i][1])):
            upperindex=lipid_opp_neighbors[nn][i][0]
            lowerheadvalue=lipid_opp_neighbors[nn][i][1][j]
            lowerindex=lowerheads[nn].index(lowerheadvalue)
            d = (upperheadxy[nn][upperindex][0] - lowerheadxy[nn][lowerindex][0])**2 + (upperheadxy[nn][upperindex][1] - lowerheadxy[nn][lowerindex][1])**2 + (upperheadxy[nn][upperindex][2] - lowerheadxy[nn][lowerindex][2])**2
           
            if d<lipid_distance[nn][i]: 
                #print('true')
                lipid_distance[nn][i] = d 
                lipid_pairs[nn][i] = [upperindex,lowerindex]

In [384]:
#sort through the list of close neighbors and find closest lipid
lipid_pairs_rev = list([] for _ in xrange(traj.n_frames))
lipid_distance_rev= list([] for _ in xrange(traj.n_frames))

for nn in range(traj.n_frames): 
    for i in range(len(upperheads[nn])): 
        lipid_distance_rev[nn].append(10000000)
        lipid_pairs_rev[nn].append([i,0])
        
        
for nn in range(traj.n_frames): 
    for i in range(len(lipid_opp_neighbors_rev[nn])):
        for j in range(len(lipid_opp_neighbors_rev[nn][i][1])):
            lowerindex=lipid_opp_neighbors_rev[nn][i][0]
            upperheadvalue=lipid_opp_neighbors_rev[nn][i][1][j]
            upperindex=upperheads[nn].index(upperheadvalue)
            d = (lowerheadxy[nn][lowerindex][0] - upperheadxy[nn][upperindex][0])**2 + (lowerheadxy[nn][lowerindex][1] - upperheadxy[nn][upperindex][1])**2 + (lowerheadxy[nn][lowerindex][2] - upperheadxy[nn][upperindex][2])**2
            if d<lipid_distance_rev[nn][i]: 
                #print('true')
                lipid_distance_rev[nn][i] = d 
                lipid_pairs_rev[nn][i] = [lowerindex,upperindex]

# 3: Assign Water to closest Lipid Head

Read Me: Methodology for Reducing Simulation Time of Closest Neighbor Search
1. Put all lipid heads in ascending order. 
2.  Find closest value in headgroup 
3.  Use that closest value as starting point group
4.  Branch out left, and branch out right until
5.  You could find the closest value that is 5 greater and 5 less --> those become your end points
6. Only look for closest neighbors between -5,+5 bounds

In [288]:
#All lipid heads in ascending order
def make_ascend(frames,upperheadxy,unit_vector):
    lipidhead_ascending_index = list([] for _ in xrange(frames))
    lipidhead_ascending_value = list([] for _ in xrange(frames))

    for nn in range(frames): 
        for i in range(len(upperheadxy[nn])): 
            lipidhead_ascending_value[nn].append(upperheadxy[nn][i][int(unit_vector)]) #copy x upperhead values into array
    
    for nn in range(frames): 
        lipidhead_ascending_index[nn] = np.argsort(lipidhead_ascending_value[nn])#find array which takes sorted array and translates to upperhead 
        lipidhead_ascending_value[nn] = np.sort(lipidhead_ascending_value[nn]) #sort array
        
    return lipidhead_ascending_index,lipidhead_ascending_value

In [289]:
#find starting point, left min, right max 
def min_max_value(frames,waterheadxy,lipid_ascending_value,radius,index_array):
    lipid_closest_water = list([] for _ in xrange(frames))
    lipid_leftbound = list([] for _ in xrange(frames))
    lipid_rightbound = list([] for _ in xrange(frames))

    for nn in range(frames): 
        for i in range(len(waterheadxy[nn])): 
            indx = np.abs(lipid_ascending_value[nn]-waterheadxy[nn][i][0]).argmin() #find closest lipid head to waterhead[nn][i]]
            lipid_closest_water[nn].append(indx) #save that value

    for nn in range(frames): 
        for i in range(len(waterheadxy[nn])): 
            low_value=waterheadxy[nn][i][0] - int(radius) #find closest lipid that is -5 away from waterhead[nn][i]
            lipid_leftbound[nn].append(abs(lipid_ascending_value[nn]-low_value).argmin())
            high_value=waterheadxy[nn][i][0] + int(radius)
            lipid_rightbound[nn].append(abs(lipid_ascending_value[nn]-high_value).argmin())#find closest lipid that is +5 away from waterhead[nn][i] 
    return lipid_leftbound,lipid_closest_water,lipid_rightbound


In [290]:
def water_searchneigh(frames,waterheadxy,lipid_leftbound,lipid_rightbound,ascending_index):
    water_searchneigh = list([] for _ in xrange(frames))

    for nn in range(frames): 
        for i in range(len(waterheadxy[nn])): 
            leftbound=lipid_leftbound[nn][i]
            rightbound=lipid_rightbound[nn][i]
            index=leftbound
            local=[]
            while index<=rightbound:
                local.append(ascending_index[nn][index])
                index=index+1
            water_searchneigh[nn].append(local) #place upperheadlipids  which are close in an array. index corresponds to waterheadxy[nn]
    return water_searchneigh
    

In [291]:
upper_ascending_index,upper_ascending_value=make_ascend(traj.n_frames,upperheadxy,0)
lipid_leftbound,lipid_closest_water,lipid_rightbound=min_max_value(traj.n_frames,waterheadxy,upper_ascending_value,5,upper_ascending_index)
water_searchneigh_upper=water_searchneigh(traj.n_frames,waterheadxy,lipid_leftbound,lipid_rightbound,upper_ascending_index)

In [292]:
lower_ascending_index,lower_ascending_value=make_ascend(traj.n_frames,lowerheadxy,0)
lower_lipid_leftbound,lower_lipid_closest_water,lower_lipid_rightbound=min_max_value(traj.n_frames,waterheadxy,lower_ascending_value,5,upper_ascending_index)
water_searchneigh_lower=water_searchneigh(traj.n_frames,waterheadxy,lower_lipid_leftbound,lower_lipid_rightbound,lower_ascending_index)

In [293]:
def water_closest_lipid(frames,water_searchneigh,upperheadxy,waterheadxy,ascending_index,upperheads):
    water_distance = list([] for _ in xrange(frames))
    waters_closest=list([] for _ in xrange(frames))
    
    for nn in range(frames):
        for i in range(len(water_searchneigh[nn])): 
            d=1000
            for j in range(len(water_searchneigh[nn][i])):
                index=ascending_index[nn][water_searchneigh[nn][i][j]]
                a=upperheadxy[nn][index]
                b=waterheadxy[nn][i]
                if d>(np.linalg.norm(b-a)):
                    d=np.linalg.norm(b-a)
                    head_closest = upperheads[nn][index]
            waters_closest[nn].append([waterhead[nn][i],head_closest,d])
    return waters_closest[nn]
        

In [294]:
waters_upper_closest=water_closest_lipid(traj.n_frames,water_searchneigh_upper,upperheadxy,waterheadxy,upper_ascending_index,upperheads)
waters_lower_closest=water_closest_lipid(traj.n_frames,water_searchneigh_lower,lowerheadxy,waterheadxy,lower_ascending_index,lowerheads)

In [487]:
water_grouped = []
for i in range(len(waters_upper_closest)): 
    if waters_upper_closest[i][2]<waters_lower_closest[i][2]: #water is closest to upper leaflet 
        upper_value=upperheads[0].index(waters_upper_closest[i][1])
        water_value=waterhead[0].index(waters_upper_closest[i][0])
        water_grouped.append([water_value,lipid_pairs[0][upper_value][0],lipid_pairs[0][upper_value][1]])
        
    if waters_upper_closest[i][2]>waters_lower_closest[i][2]: #water is closest to lower leaflet 
        lower_value=lowerheads[0].index(waters_lower_closest[i][1])
        #print(lipid_pairs_rev[0][lower_value][0])
        water_value=waterhead[0].index(waters_lower_closest[i][0])
        water_grouped.append([water_value,lipid_pairs_rev[0][lower_value][1],lipid_pairs_rev[0][lower_value][0]])

# 4: Compare Z Values and Produce Index File

In [559]:
middle_waters=[]
outer_waters=[]

for i in range(len(water_grouped)): 
    water_z=waterheadxy[0][water_grouped[i][0]][2]
    upper_z=upperheadxy[0][water_grouped[i][1]][2]
    lower_z=lowerheadxy[0][water_grouped[i][2]][2]
    #print(water_z,upper_z,lower_z)
    if water_z<upper_z and water_z>lower_z: #in the middle 
        middle_waters.append(waterhead[0][water_grouped[i][0]])
    else: outer_waters.append(waterhead[0][water_grouped[i][0]])


In [571]:
file=open(str(name)+'_waters.ndx',"w")
file.write("[Middle_Waters]\n")
for i in range(len(middle_waters)): 
    if i%100==0: 
        file.write("\n")
    file.write(str(middle_waters[i] + 1))
    HW1 = str(middle_waters[i]+2)
    HW2 = str(middle_waters[i]+3)
    file.write(" ")
    file.write(HW1) 
    file.write(" ")
    file.write(HW2) 
    file.write(" ")

file.write("\n\n\n[Outside_Waters]\n")
for i in range(len(outer_waters)): 
    if i%100==0: 
        file.write("\n")
    file.write(str(outer_waters[i]+1))
    if str(outer_waters[i]) == '857859': print('yes')
    HW1 = str(outer_waters[i] +2)
    HW2 = str(outer_waters[i] +3)
    file.write(" ")
    file.write(HW1) 
    file.write(" ")
    file.write(HW2) 
    file.write(" ")
    
file.write("\n")

    

yes


In [None]:
topology=traj.topology
everyatom=topology.select())
#water=topology.select('name O')