In [1]:
import numpy as np
import pandas as pd
import os
import pdb
from random import random
import math
import matplotlib.pyplot as plta
import pylab
import matplotlib.animation as animation
from scipy.spatial import distance
from scipy.spatial.distance import euclidean
from mpl_toolkits.mplot3d import Axes3D
def distance(p0, p1):
    return math.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2 +(p0[2] - p1[2])**2  )
def rotate(origin, point, angle):
    """
    Rotate a point counterclockwise by a given angle around a given origin.

    The angle should be given in radians.
    """
    ox, oy = origin
    px, py = point

    qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
    qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
    return qx, qy

In [2]:
colspecs_pdb = [(0, 6), (6, 11), (12, 16), (16, 17), (17, 20), (21, 22), (22, 26),
            (26, 27), (30, 38), (38, 46), (46, 54), (54, 60), (60, 66), (76, 78),
            (78, 80)]

names_pdb = ['ATOM', 'serial', 'name', 'altloc', 'resname', 'chainid', 'resseq',
         'icode', 'x', 'y', 'z', 'occupancy', 'tempfactor', 'element', 'charge']

# Load all peak coordinates

In [3]:
pdb_path = 'files/peaks_SOL_rms2.69_7_13_22.pdb'
peaks = pd.read_fwf(pdb_path, names=names_pdb, colspecs=colspecs_pdb,skiprows=4)
peaks


Unnamed: 0,ATOM,serial,name,altloc,resname,chainid,resseq,icode,x,y,z,occupancy,tempfactor,element,charge
0,ATOM,2327.0,O,,HOH,X,1.0,,81.653,108.127,20.046,10.81,10.81,O,
1,ATOM,6062.0,O,,HOH,X,2.0,,84.839,43.055,55.808,10.62,10.62,O,
2,ATOM,6398.0,O,,HOH,X,3.0,,93.976,20.540,58.748,10.54,10.54,O,
3,ATOM,5787.0,O,,HOH,X,4.0,,80.996,33.221,53.062,10.50,10.50,O,
4,ATOM,4618.0,O,,HOH,X,5.0,,28.257,175.076,39.768,10.49,10.49,O,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8282,ATOM,3153.0,O,,HOH,X,8283.0,,6.840,218.940,26.383,2.69,2.69,O,
8283,ATOM,2979.0,O,,HOH,X,8284.0,,97.727,143.706,25.266,2.69,2.69,O,
8284,ATOM,979.0,O,,HOH,X,8285.0,,67.252,163.799,7.752,2.69,2.69,O,
8285,ATOM,2746.0,O,,HOH,X,8286.0,,62.696,159.396,23.082,2.69,2.69,O,


# Load all OEC coordinates

In [5]:
pdb_path = 'files/start_45ns.pdb'
pdb1 = pd.read_fwf(pdb_path, names=names_pdb, colspecs=colspecs_pdb)
pdb_OEC = pdb1[(pdb1['resname'] =='OEX') & (pdb1['name'] =='O1') ]
pdb_OEC

Unnamed: 0,ATOM,serial,name,altloc,resname,chainid,resseq,icode,x,y,z,occupancy,tempfactor,element,charge
330437,ATOM,30433,O1,,OEX,,2456,,84.605,40.17,58.555,1.0,0.0,,
330447,ATOM,30443,O1,,OEX,,2457,,28.775,207.095,23.235,1.0,0.0,,
330457,ATOM,30453,O1,,OEX,,2458,,92.41,182.795,212.365,1.0,0.0,,
330467,ATOM,30463,O1,,OEX,,2459,,30.265,15.815,177.12,1.0,0.0,,
330477,ATOM,30473,O1,,OEX,,2460,,33.28,150.905,97.47,1.0,0.0,,
330487,ATOM,30483,O1,,OEX,,2461,,88.79,94.935,132.715,1.0,0.0,,
330497,ATOM,30493,O1,,OEX,,2462,,25.32,72.0,251.43,1.0,0.0,,
330507,ATOM,30503,O1,,OEX,,2463,,87.21,128.025,286.645,1.0,0.0,,


# Load peaks from bulk solvent 

In [8]:
outsidechannel=pd.concat([pd.read_pickle('files/Matches_XTALwater_Rand_%d' %247), pd.read_pickle('files/Matches_XTALwater_Rand_%d' %1685),pd.read_pickle('files/Matches_XTALwater_Rand_%d' %962),pd.read_pickle('files/Matches_XTALwater_Rand_%d' %1566),pd.read_pickle('files/Matches_XTALwater_Rand_%d' %536),pd.read_pickle('files/Matches_XTALwater_Rand_%d' %218)])
outsidechannel=outsidechannel.drop_duplicates('MD ID').reset_index(drop=True)
outsidechannel_renamed = outsidechannel.rename({'MD ID': 'resseq'}, axis=1)  # new method
outsidechannel_renamed

Unnamed: 0,resseq,Closest XTAL Partner,Dist,Thresh
0,539.0,266.0,0.131244,8.66
1,220.0,13.0,0.143224,9.37
2,639.0,405.0,0.193197,8.44
3,141.0,159.0,0.221077,9.57
4,439.0,228.0,0.235232,8.87
...,...,...,...,...
1316,3514.0,972.0,7.689330,4.86
1317,2082.0,1260.0,7.723985,6.28
1318,3853.0,1376.0,7.891045,4.61
1319,4078.0,1376.0,8.691524,4.46


# Perform merge , check if any waters are < 30 Angstrom away from OEC

In [12]:
merged = pd.merge(outsidechannel_renamed, peaks, on='resseq')

In [13]:
#Closest water is 28.9 Angstrom from OEC
for z, row in merged.iterrows():
    for h, roi in pdb_OEC.iterrows():
        point1=  [float(merged.loc[z,'x']),float(merged.loc[z,'y']),float(merged.loc[z,'z'])]
        point2=  [float(pdb_OEC.loc[h,'x']),float(pdb_OEC.loc[h,'y']),float(pdb_OEC.loc[h,'z'])]
        if distance(point1,point2) < 30:
            print('%s is %s from %s' %(merged.loc[z,'resseq'],distance(point1,point2), pdb_OEC.loc[h,'resseq']))

4849.0 is 28.683259246466392 from 2456
