In [1]:
import re
import nglview as nv
import numpy as np
import pytraj as pt 
import matplotlib.pyplot as plt
import seaborn as sns

from hilbertcurve.hilbertcurve import HilbertCurve
from matplotlib import cm
import imageio



In [2]:
def centerStructure(topFile, coorFile, output_prefix):
    traj1 = pt.load(coorFile, top=topFile)
    traj1.top.set_reference(traj1[0])
    traj1 = pt.autoimage(traj1, mask=":LIG<:5&!:LIG")
    pt.write_traj(output_prefix+".nc", traj1, overwrite=True)
    pt.write_traj(output_prefix+".pdb", traj1, frame_indices=[0], overwrite=True)
    traj1=None
def cleanpdb(filename, outfilename, watcode):
    print('Cleaning the PDB file: '+filename);
    file1=open(filename, 'r'); 
    file2=open(outfilename, 'w');
    
    HETATMdic = {
        'CL':'CL-','NA':'NA+', 'K':'K+ ','CL-':'CL-','NA+':'NA+', 'K+':'K+ ', 
        'MDL':'LIG','LIG':'LIG',
        'SOL':watcode, 'T3P':watcode, 'SPC':watcode, 'HOH':watcode, 'WAT':watcode,'T4P':watcode, 'T3E':watcode
    }

    for i in file1:
        # 1. Uniform the name of the ligand
        # 2. Some pdb generation program do not write HETATM
        if "TER"  in i:
            continue
        if i[17:20].strip().upper() in HETATMdic.keys():
            i='HETATM'+i[6:17]+HETATMdic[i[17:20].strip().upper()]+i[20:];
        # Two special occasions ;
        # 1. Atom name by 4 letters e.g. 1HD2 -> HD21; Ligand should be excluded; This error only occurres in hydrogen atom
        #     In case CAMPARI naming does not associate with the Charmm forcefield naming
        # 2. Correct the terminal hydrogen atoms (not include Ligand)
        if (len(i[12:16].strip()) == 4) and (i[17:20]!='MDL') and (re.match(r' [0-9]H.[0-9] ', i[12:16])):
            i=i[:12]+i[13:16]+i[12]+i[16:];
        elif (i[0:4]=='ATOM') and ((i[12:16].strip()=='H1') or (i[12:16].strip()=='H2') or (i[12:16].strip()=='H3')):
            i=i[:12]+' '+'HT'+i[14]+i[16:];
        if ('FOR' in i) and (i[13:16].strip() == 'HN'):
            i=i[:13]+'H  '+i[16:];
        file2.write(i);
    file1.close();file2.close();

centerStructure("./C003XiF0sMZh/C003XiF0sMZh_PDB.pdb", "./C003XiF0sMZh/C003XiF0sMZh_TRJ.nc", "testout")
cleanpdb("testout.pdb", "test2out.pdb", "T3P")


Cleaning the PDB file: testout.pdb


In [3]:
class DummyPDB:
    def __init__(self):
        self.pdbline = ""
        self.temp = "ATOM      1  Du  TMP     1       0.000   0.000   0.000  1.00  0.00";
    
    def addCoor(self, vecX, vecY, vecZ, elems):
        newtxt = self.convertToPDB(vecX, vecY, vecZ, elems); 
        self.pdbline += newtxt;
        
    def convertToPDB(self, vecX, vecY, vecZ, elems):
        if (len(vecX) != len(vecY)) or (len(vecX) != len(vecZ)):
            print("Lengths of each dimension are not equil")
        vecX = np.round(vecX, decimals=4)
        vecY = np.round(vecY, decimals=4)
        vecZ = np.round(vecZ, decimals=4)
        pdbline = "";
        
        for i in range(len(vecX)):
            tmpcopy=self.temp;
            indlen = len(str(i));
            atmInd = "{:>5}".format(i+1);
            X = "{:>8}".format(vecX[i]);
            Y = "{:>8}".format(vecY[i]);
            Z = "{:>8}".format(vecZ[i]);
            try:
                Elem = elems[i];
            except:
                Elem = "Du";
            Elem = "{:>4}".format(Elem);
            tmpcopy = tmpcopy[:12]+Elem+tmpcopy[16:]; 
            tmpcopy = tmpcopy[:6]+atmInd+tmpcopy[11:]; 
            tmpcopy = tmpcopy[:30]+X+tmpcopy[38:]; 
            tmpcopy = tmpcopy[:38]+Y+tmpcopy[46:]; 
            tmpcopy = tmpcopy[:46]+Z+tmpcopy[54:];
            pdbline += tmpcopy+"\n"
        
        return pdbline
    
    def writeFile(self, outname):
        self.outname = outname;
        with open(outname, 'w') as outfile:
            outfile.write(self.pdbline)
            
    def show(self):
        viewer = nglview.show_file(self.outname)
        return viewer


In [13]:
import re
from numpy import rot90  # FOR rotation of the matrix

# read the file of trajectory output PDB file (It includes the Ligand atoms)

class hilbertize:
    def __init__(self, pdbname, iterNr, dimNr):
#         defaultCutoff = 12
        self.pdbname = pdbname;
        self.trajpdb = pt.load(self.pdbname)
        self.trajpdb.top.set_reference(self.trajpdb[0])
        self.ligNamelst = ["LIG", "MDL", "TRA"]  # remember to use str.upper() function to check ligand name
        self.ligResName = [i.name for i in self.trajpdb.top.residues if i.name in self.ligNamelst][0]
        print("The ligand name is : ", self.ligResName)
        self.from_pdb(self.pdbname);
        self.setCutoff(12); 
        
        
        self.iterNr = iterNr;
        self.dimNr = dimNr; 
        self.len3D = int(2 ** self.iterNr);   # 16 for hilbert(4,3)
        self.len2D = int(np.sqrt(2**(self.iterNr*self.dimNr)));   # 64 for hilbert(4,3)
        self.distances = np.array(range(2**(self.iterNr*self.dimNr)));
        self.curve = HilbertCurve(self.iterNr, self.dimNr); 
        self.points = np.array(self.curve.points_from_distances(self.distances)); 
        
        self.curve2D = HilbertCurve( (self.iterNr*self.dimNr)/2, 2);
        self.points2D = np.array(self.curve2D.points_from_distances(self.distances)); 
        
        
        ########### Not sure if necessary or not ########
        self.ismol2 = False
        self.ispdb = False
        #################################################
        
        # Decline all the results dataset 
        self.ELMDIST = [];
        self.ATNDIST = []; 
        self.RIDDIST = [];
        self.ELMDIST2D = [];
        self.ATNDIST2D = []; 
        self.RIDDIST2D = [];
    
    def from_mol2(self, filename):
        with open(filename,"r") as file2:
            self.mol2file = [i.strip('\n') for i in file2.readlines()]
        inatom = False
        self.mol2x=[];self.mol2y=[];self.mol2z=[];
        for i in self.mol2file:
            if ("<TRIPOS>ATOM" in i):
                inatom = True;
                continue
            elif ("<TRIPOS>BOND" in i):
                inatom = False
            if (inatom):
                self.mol2x.append(float(i.split()[2]))
                self.mol2y.append(float(i.split()[3]))
                self.mol2z.append(float(i.split()[4]))
        self.center = np.array([np.mean(self.mol2x), np.mean(self.mol2y),np.mean(self.mol2z)])
        self.ismol2 = True;
        
    def from_pdb(self, filename):
        print("Loading the PDB file: ", filename)
        with open(filename,"r") as file1:
            self.pdbfile = [i.strip('\n') for i in  file1.readlines()] 
        self.pdbx = []; self.pdby = []; self.pdbz = []; self.ligx=[]; self.ligy=[]; self.ligz=[];
        for i in self.pdbfile:
            if (("ATOM" in i) or ("HETATM" in i)) and (i[17:20] not in self.ligNamelst):
                self.pdbx.append(float(i[30:38].strip())); 
                self.pdby.append(float(i[38:46].strip())); 
                self.pdbz.append(float(i[46:54].strip())); 
            elif ("HETATM" in i) and (i[17:20] in self.ligNamelst):
                self.ligx.append(float(i[30:38].strip())); 
                self.ligy.append(float(i[38:46].strip())); 
                self.ligz.append(float(i[46:54].strip())); 
        self.pdbx = np.array(self.pdbx); self.pdby = np.array(self.pdby); self.pdbz = np.array(self.pdbz); 
        self.ligx = np.array(self.ligx); self.ligy = np.array(self.ligy); self.ligz = np.array(self.ligz);
        self.center = np.array([np.mean(self.pdbx), np.mean(self.pdby), np.mean(self.pdbz)])
        self.ligcenter = np.array([np.mean(self.ligx), np.mean(self.ligy), np.mean(self.ligz)])

        try:
            self.center = np.array([i for i in pt.center_of_geometry(self.trajpdb)[0]])
        except:
            self.center = np.array([np.mean(self.pdbx), np.mean(self.pdby), np.mean(self.pdbz)])
                
        try: 
            AtomLst = [i.name for i in self.trajpdb.top.atoms]
            c=0;
            for i in self.pdbfile:
                if ("ATOM" in i) or ("HETATM" in i):
                    self.elems.append(AtomLst[c])
                    c += 1
        except:
            self.elems = []; 
            for i in self.pdbfile:
                if ("ATOM" in i) or ("HETATM" in i):
                    self.elems.append(i[12:16].strip())
        self.elems = np.array(self.elems)
        self.ispdb = True;
    
    def addTraj(self, trajfilelst):
        AtomArray = []; 
        # For the loading of multiple trajectory files
        for i in trajfilelst:
            print("Adding the trajectory", i)
            tmptraj = pt.load(i, top=self.pdbname)
            tmptraj = pt.autoimage(tmptraj, mask=":LIG, MDL, TRA")
            if len(AtomArray) == 0:
                AtomArray = tmptraj.xyz
            else:
                AtomArray = np.concatenate((ProWatArray, tmptraj.xyz[:,sel1]))
        self.atomxyz = AtomArray; 
        print("Shape protein-water atoms:", self.atomxyz.shape)
    
    def setCutoff(self, cutoff):
        print("Chainging the cutoff value and updating the selected atoms")
        self.cutoff = cutoff;
        cutoff = str(cutoff); 
        self.atomlist = self.trajpdb.top.select(":LIG<:"+cutoff+" &! :LIG, MDL, TRA"); 
        self.atomlist_nowat = self.trajpdb.top.select(":LIG<:"+cutoff+" &! :LIG, MDL, TRA,T3P, HOH,WAT,T4P"); 
        print("Updating cutoff: atom number within cutoff: ", len(self.atomlist))
        print("Updating cutoff: atom number within cutoff not water: ",len(self.atomlist_nowat))
        self.mask = "@"+",".join([str(i) for i in self.atomlist])
        self.mask_nowat = "@"+",".join([str(i) for i in self.atomlist_nowat])
    
    def align(self, refCenter):
        diff = np.array(this.center) - np.array(refCenter); 
        self.points = self.points + diff
    
    def scaleTo(self, refLength):
        LenSc = refLength / self.len3D;
        self.points = self.points * LenSc; 
        
    def scale(self, scaleFactor):
        self.points = self.points * scaleFactor; 
    
    def shift(self, shift):
        self.points = self.points + np.array(shift)
        
    def T(self, direction):   
        # up, down, left, right
        # two up, two down, two left, two right
        pass
    
    def updateResult(self):
        self.res3D = [self.ELMDIST, self.ATNDIST, self.RIDDIST]
        self.res2D = [self.ELMDIST2D, self.ATNDIST2D, self.RIDDIST2D]
    
    
    def assignElem(self, cutoff, framelist, classNr):
        # Read the information about [Element, atomic coordinate, residue ID]
        # TODO: only 10 Frames as a small test. 
        # print(self.atomxyz.shape) (5001, 19903, 3)
        if (classNr == 0):
            # Mask all residues other than ligand
            selection = self.trajpdb.top.select("!:"+self.ligResName);
        elif (classNr == 1):
            # Exclude ligand, water and ions 
            selection = self.trajpdb.top.select("!:WAT,CL-,K+,K,CL,HOH,T3P,SPC,T4P,T3E,"+self.ligResName);
        elif (classNr == 2):
            # only ligand
            selection = self.trajpdb.top.select(":"+self.ligResName); 
        elif (classNr == 3):
            pass
        
        elemlst = np.array([i.name for i in self.trajpdb.top.atoms])[selection]
        atomNrlst = np.array([i.atomic_number for i in self.trajpdb.top.atoms])[selection]
        resIDlst = np.array([i.resid for i in self.trajpdb.top.atoms])[selection]
        
        c=0
        sqVDW = 1.75 ** 2
        sqSAS = 3.75 ** 2
        sdist = lambda p, xyz: np.sum((p - xyz)**2, axis = 1)
        self.framelst = []
        for i in range(*framelist):
            self.framelst.append(i)
            theframe = self.atomxyz[i]
            cutoffAtoms = theframe[selection];
            
            # TODO: why need this? 
            c1=0; c2=0; c3=0; 
            ELMframe = []; ATNframe = []; RIDframe = [];
            
            # Iterate through all points in the 3D hilbert curve. 
            for j in range(len(self.points)):
                dists = sdist(self.points[j], cutoffAtoms);
                distmin = np.min(dists);
                idx = np.squeeze(np.where(dists == distmin));
                
                if (distmin < sqVDW):
                    c1 += 1
                    ELMframe.append(elemlst[idx])
                    ATNframe.append(atomNrlst[idx])
                    RIDframe.append(resIDlst[idx])
                elif (distmin > sqVDW and distmin < sqSAS):
                    c2 += 1
                    ELMframe.append("H")
                    ATNframe.append(0)
                    RIDframe.append(0)
                else :
                    c3 += 1
                    ELMframe.append("Du")
                    ATNframe.append(0)
                    RIDframe.append(0)
            # finished the collection of one frame. 
            self.ELMDIST.append(ELMframe)
            self.ATNDIST.append(ATNframe)
            self.RIDDIST.append(RIDframe)
            
            c+=1
            if (c % 20 == 0):
                print("Assigned 3D hilbert cells: {:d} frames; Found {:d} | {:d} | {:d}".format(c, c1, c2, c3))
        self.ELMDIST = np.array(self.ELMDIST)
        self.ATNDIST = np.array(self.ATNDIST)
        self.RIDDIST = np.array(self.RIDDIST)
        self.updateResult(); 

    def gen2D(self):
        # Only generating the 2D map for the time being
        for ind in range(len(self.res3D)):
            print("Mapping the 3D dataset {}".format(ind))
            c=0;
            frames=[]
            distances = self.res3D[ind]     # the category of hilbert maps
            for frame in distances:
                thisframe = []; 
                for i in range(int(self.len2D)):
                    tmprow = []
                    for j in range(int(self.len2D)):
                        res = np.where(np.all(self.points2D == np.array([i,j]), axis=1) == True)
                        res = np.squeeze(res)
                        tmprow.append(frame[res])
                    thisframe.append(tmprow)
                frames.append(thisframe)
                c+=1
                if (c % 20 == 0):
                    print("Mapping the 3D hilbert curve to 2D for the frame ", c)
            if ind == 0:
                self.ELMDIST2D = np.array(frames)
            elif ind == 1:
                self.ATNDIST2D = np.array(frames) 
            elif ind == 2:
                self.RIDDIST2D = np.array(frames)
        self.res2D = np.array([ self.ELMDIST2D, self.ATNDIST2D, self.RIDDIST2D ])
        print(self.res2D.shape)
        self.updateResult()
        

    def show3D(self, indice, addProtein=False):
        cube = self.cube = DummyPDB();
        pdbstr = cube.convertToPDB(self.points[:,0],self.points[:,1],
                                   self.points[:,2], self.ELMDIST[indice])
        
        with open("/tmp/tmpbpose.pdb","w") as outfile:
            outfile.write(pdbstr)
        viewer = nv.show_file("/tmp/tmpbpose.pdb")
        viewer[0].clear_representations()
        if addProtein:
            nearAtomSel = "@"+",".join([str(i) for i in self.trajpdb.top.select(":LIG<:5 & !:LIG, T3P, HOH, WAT, SPC &!@H*")])
            viewer.add_component(self.pdbname)
            viewer[1].clear_representations()
            viewer[1].add_licorice(selection = nearAtomSel, opacity=0.7)
            viewer[1].add_cartoon(selection = "protein")
            viewer[1].add_hyperball(selection = "LIG")
            
        # Add basic viewer representations
        viewer[0].add_line(selection="all", opacity=0.7)
        viewer[0].add_surface(selection="all", opacity=0.3, surfaceType="sas", 
                              probeRadius=0.001, contour=True, smooth=5)
        viewer.background = "gray"
        return viewer
    
    def show2D(self, indice):
        plane = self.plane = DummyPDB();
        print(len(self.points2D[:,0]))
        pdbstr = plane.convertToPDB(self.points2D[:,0], self.points2D[:,1], 
                                    np.ones(4096), self.res2D[0][indice].reshape(-1))
        with open("/tmp/tmpbpose.pdb","w") as outfile:
            outfile.write(pdbstr)
        viewer = nv.show_file("/tmp/tmpbpose.pdb")
        viewer.clear_representations()
        viewer[0].add_ball_and_stick(selection="all")
        viewer.background = "gray"
        return viewer
    
    def plot2D(self, indice, dataindice):
        sns.heatmap(data=self.res2D[dataindice][indice], cmap="YlGn")
    
    def interactive2D(self, DATA):
        %matplotlib
        limMax = DATA.max(); 
        limMin = DATA.min();
        cbar = False; 
        figlst=[];
        plt.ion();
        plt.clf();
        for i in range(len(DATA)):
            # if the distances are identical, use inshow rather than pcolormesh. 
            plt.imshow(DATA[i], cmap="hot")
            if not cbar:
                cbar = plt.colorbar()
            plt.clim(limMin, limMax)
            plt.xlabel("Frame {:d}".format(self.framelst[i]+1))
            plt.pause(0.5)
            figname = "/tmp/tmpfig_"+str(i)+".png"
            plt.savefig(figname, format="png")
            figlst.append(figname)
            
        plt.ioff()
        plt.show()
        with imageio.get_writer("/tmp/tmpgif.gif", mode="I") as gif1:
            for i in figlst:
                image = imageio.imread(i)
                gif1.append_data(image)
        
    
    def save(self, filename):
        with open(filename, "wb") as tmpfile:
            pickle.dump(self, tmpfile, protocol=pickle.HIGHEST_PROTOCOL)
    
    def load(self, filename):
        with open(filename, 'rb') as handle:
            b = pickle.load(handle)
    

PDBFILE = "test2out.pdb"
TRAJFILES = ["testout.nc"]
CUTOFF = 12
LOADNR = 100

# Main function, construct the hilbert fingerprint tensor of a trajectory topology file 
# 1. Construct a hilbert object with designated PDB topology file and iteration/dimension
HBC_pro = hilbertize(PDBFILE, 4,3); 

# 2. Add trajectory
HBC_pro.addTraj(TRAJFILES)

# 3. Transform the 3D hilbert curve
# print("Ligand center is ", HBC_pro.ligcenter)
HBC_pro.scaleTo(CUTOFF)
HBC_pro.shift(HBC_pro.ligcenter-(CUTOFF/2))

# 3. Assign element 
HBC_pro.assignElem(CUTOFF, [0,5000,50], 1)

# 4. Reduce the dimension of the frame
# get the 2D frame of self.elem2Dframes
HBC_pro.gen2D()

print("reached the end of this block")

The ligand name is :  LIG
Loading the PDB file:  test2out.pdb
Chainging the cutoff value and updating the selected atoms
Updating cutoff: atom number within cutoff:  1900
Updating cutoff: atom number within cutoff not water:  1120
Adding the trajectory testout.nc
Shape protein-water atoms: (5001, 19903, 3)
Assigned 3D hilbert cells: 20 frames; Found 2538 | 1419 | 139
Assigned 3D hilbert cells: 40 frames; Found 2453 | 1562 | 81
Assigned 3D hilbert cells: 60 frames; Found 2356 | 1485 | 255
Assigned 3D hilbert cells: 80 frames; Found 2190 | 1718 | 188
Assigned 3D hilbert cells: 100 frames; Found 2513 | 1569 | 14
Mapping the 3D dataset 0
Mapping the 3D hilbert curve to 2D for the frame  20
Mapping the 3D hilbert curve to 2D for the frame  40
Mapping the 3D hilbert curve to 2D for the frame  60
Mapping the 3D hilbert curve to 2D for the frame  80
Mapping the 3D hilbert curve to 2D for the frame  100
Mapping the 3D dataset 1
Mapping the 3D hilbert curve to 2D for the frame  20
Mapping the 3D

In [14]:
# show the 3D overlap between hilbert 3D and the protein structure
HBC_pro.show3D(0,True)

NGLWidget(background='gray')

In [15]:
HBC_pro.show2D(50)


4096


NGLWidget(background='gray')

In [16]:
HBC_pro.interactive2D(HBC_pro.res2D[1])


Using matplotlib backend: Qt5Agg


In [12]:
print(len(HBC_pro.res2D))
HBC_pro.interactive2D(HBC_pro.res2D[2])
# print(HBC_pro.ATNDIST2D)


# HBC_pro.show2D(1, 0)
# HBC_pro.plot2D(1, 1)
# print([i for i in HBC_pro.res2D[1].max(axis=(-1,-2))])
# print(HBC_pro.res2D[1].min())

# print(HBC_pro.ELMDIST2D)


3
Using matplotlib backend: Qt5Agg


KeyboardInterrupt: 

KeyboardInterrupt: 

KeyboardInterrupt: 

In [35]:
print(HBC_pro.points[:1])
# print(HBC_pro.elemdistance)
print(len(HBC_pro.points))

x=np.array(HBC_pro.elemdistance)
print(HBC_pro.points.shape)
print(x.shape)

[[23.5651 23.0471 23.3919]]
4096
(4096, 3)
(2, 4096)


In [130]:
tmp = pt.load("test2out.pdb")
tmp.top.set_reference(tmp[0])
print(dir(tmp))
# print([i for i in tmp.top.atom_indices])
a=tmp.top.select(":CYS")
print(a)
print(len(tmp.xyz[:,a]))

['__add__', '__call__', '__class__', '__del__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_allocate', '_append_unitcells', '_boxes', '_estimated_GB', '_frame_holder', '_handle_setting_box_force_velocity', '_iterframe_indices', '_life_holder', '_top', '_xyz', 'align_principal_axis', 'append', 'append_xyz', 'autoimage', 'center', 'copy', 'crdinfo', 'forces', 'from_iterable', 'iterframe', 'load', 'n_atoms', 'n_frames', 'rmsfit', 'rotate', 'save', 'scale', 'shape', 'strip', 'superpose', 'time', 'top', 'topology', 'transform', 'translate', 'unitcells', 'velocities', 'view', 'visualize', 'xyz']
[1950 1951 1952 ... 2601 2602 2603]
33


In [39]:
a=range(10)
print(a)
x = lambda val: for i in val: return i*5+11
print(x(a))


SyntaxError: invalid syntax (1020261499.py, line 3)

In [163]:
np.fft.fft([1,2,3,4,5])

b=[i for i in range(100)]

a=np.fft.fft(range(100))

print(len(a))

c=np.fft.ifft(a)

print(b)
print(b==c)
# print(round(c[0]))
d = np.ones((2,3))
e = np.fft.fft(d)

print(e)

plt.clf()
plt.plot(b,b-c)
plt.show()

100
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
[False False False ... False False False]
[[3.+0.j 0.+0.j 0.+0.j]
 [3.+0.j 0.+0.j 0.+0.j]]


  return np.asarray(x, float)


In [35]:
testtraj = pt.load("./C003XiF0sMZh/C003XiF0sMZh_TRJ.nc", top="./C003XiF0sMZh/C003XiF0sMZh_PDB.pdb")

In [186]:
from xml.dom import minidom
import xml.etree.ElementTree as ET

amberxml = "/home/miemie/Win/MLproject/BetaPose/Forcefield/ff14SB.xml"
charmmxml = "/home/miemie/Win/MLproject/BetaPose/Forcefield/charmm36_nowaters.xml"

ff14SB = minidom.parse(amberxml)
charmm36 = minidom.parse(charmmxml)

root = ET.Element("Forcefield")
print(dir(ff14SB))

reslist = ['ALA','ARG','ASN','ASP','CYS','GLU','GLN','GLY','ILE','LEU',
           'LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL']
hislist = {'HID':"HSD",'HIE':"HSE",'HIP':"HSP"}

print("There are ", len(reslist)," residues")

atomlist = ['C','CA','CB','CD','CD1','CD2','CE','CE1','CE2','CE3','CG','CG1',
            'CG2','CH2','CZ','CZ2','CZ3','HA','HA1','HA2','HB','HB1','HB2',
            'HB3','HD1','HD11','HD12','HD13','HD2','HD21','HD22','HD23','HD3',
            'HE','HE1','HE2','HE21','HE22','HE3','HG','HG1','HG11','HG12',
            'HG13','HG2','HG21','HG22','HG23','HH','HH11','HH12','HH2','HH21',
            'HH22','HN','HT1','HT2','HT3','HZ','HZ1','HZ2','HZ3','N','ND1',
            'ND2','NE','NE1','NE2','NH1','NH2','NZ','O','OD1','OD2','OE1',
            'OE2','OG','OG1','OH','SD', "H","SG","HG"]
atomNameMap = {'H': 'HN', 'HG3':'HG1', 'HB3':'HB1', 'HD3':'HD1', "HG": "HG1", "HA3":"HA1", 'HE3':'HE1', 
               'HG13': 'HG11', 'CD1':'CD', 'HD12': 'HD2', 'HD11':'HD1', 'HD13': 'HD3'}

residues  = charmm36.getElementsByTagName("Residue");
residues2 = ff14SB.getElementsByTagName("Residue");

for resname in reslist:
    tmpresidue = ET.SubElement(root, "Residue")
    tmpresidue.set("name", resname)
    tmpres  = [res for res in ff14SB.getElementsByTagName("Residue") if res.attributes["name"].value == resname]; 
    tmpres2 = [res for res in charmm36.getElementsByTagName("Residue") if res.attributes["name"].value == resname];
    
    if (len(tmpres) > 0):
        tmpres = tmpres[0]
    if (len(tmpres2) > 0):
        tmpres2 = tmpres2[0]
        
    a = set([ j.attributes["name"].value for j in tmpres.getElementsByTagName("Atom") ])
    b = set([ j.attributes["name"].value for j in tmpres2.getElementsByTagName("Atom") ])
    intersections = a.intersection(b)
    for j in intersections:
        tmpatom1 = [i for i in tmpres.getElementsByTagName("Atom") if i.attributes["name"].value == j][0]
        tmpatom2 = [i for i in tmpres2.getElementsByTagName("Atom") if i.attributes["name"].value == j][0]
        addatom = ET.SubElement(tmpresidue, "Atom")
        addatom.set('name',    tmpatom1.attributes["name"].value)
        addatom.set('type',    tmpatom1.attributes["type"].value)
        addatom.set('charge',  tmpatom1.attributes["charge"].value)
        addatom.set('charge2', tmpatom2.attributes["charge"].value)
    diffa = a.difference(b)
    diffb = b.difference(a)
    print(diffa,diffb)
    for j in diffa:
        charmmatom = atomNameMap[j]
        tmpatom1 = [i for i in tmpres.getElementsByTagName("Atom") if i.attributes["name"].value == j][0]
        tmpatom2 = [i for i in tmpres2.getElementsByTagName("Atom") if i.attributes["name"].value == charmmatom][0]

        addatom = ET.SubElement(tmpresidue, "Atom")
        addatom.set('name',    tmpatom1.attributes["name"].value)
        addatom.set('type1',   tmpatom1.attributes["type"].value)
        addatom.set('type2',   tmpatom2.attributes["type"].value)
        addatom.set('charge',  tmpatom1.attributes["charge"].value)
        addatom.set('charge2', tmpatom2.attributes["charge"].value)

for resname in hislist.keys():
    amberNM = resname
    charmNM = hislist[amberNM]
    print(amberNM, charmNM)
    charmres = [res for res in charmm36.getElementsByTagName("Residue") if res.attributes["name"].value == charmNM][0];
    amberres = [res for res in ff14SB.getElementsByTagName("Residue") if res.attributes["name"].value == amberNM][0]; 
    
    tmpcharmres = ET.SubElement(root, "Residue")
    tmpcharmres.set("name", charmNM)
    print(len([i for i in charmres.getElementsByTagName("Atom")]))
    for i in [i for i in charmres.getElementsByTagName("Atom")]:
        addatomC = ET.SubElement(tmpcharmres, "Atom")
        addatomC.set('name',   i.attributes["name"].value)
        addatomC.set('type',   i.attributes["type"].value)
        addatomC.set('charge', i.attributes["charge"].value)
    
    tmpamber = ET.SubElement(root, "Residue")
    tmpamber.set("name", amberNM)
    print(len([i for i in amberres.getElementsByTagName("Atom")]))
    for i in [i for i in amberres.getElementsByTagName("Atom")]:
        addatomA = ET.SubElement(tmpamber, "Atom")
        addatomA.set('name',   i.attributes["name"].value)
        addatomA.set('type',   i.attributes["type"].value)
        addatomA.set('charge', i.attributes["charge"].value)


tmpstring = minidom.parseString(ET.tostring(root, encoding="unicode")).toprettyxml()
# print(tmpstring)
with open("/tmp/testcompile.xml", "w") as file1:
    file1.write(tmpstring)


['ATTRIBUTE_NODE', 'CDATA_SECTION_NODE', 'COMMENT_NODE', 'DOCUMENT_FRAGMENT_NODE', 'DOCUMENT_NODE', 'DOCUMENT_TYPE_NODE', 'ELEMENT_NODE', 'ENTITY_NODE', 'ENTITY_REFERENCE_NODE', 'NOTATION_NODE', 'PROCESSING_INSTRUCTION_NODE', 'TEXT_NODE', '__bool__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_call_user_data_handler', '_child_node_types', '_create_entity', '_create_notation', '_elem_info', '_get_actualEncoding', '_get_async', '_get_childNodes', '_get_doctype', '_get_documentElement', '_get_documentURI', '_get_elem_info', '_get_encoding', '_get_errorHandler', '_get_firstChild', '_get_lastChild', '_get_localName', '_get_standalone', '_get_strictErrorChec

In [43]:
import xml.etree.ElementTree as ET
watff = "/home/miemie/Win/MLproject/BetaPose/Forcefield/test_wat.xml"

a = ET.parse(watff)
print(dir(a))
FF = ET.Element('ForceField')
residue1 = ET.SubElement(FF, "Residue")

residue1.set('name','T3P')
residue1.text = "T3P"
print(dir(residue1))

mydata = ET.tostring(FF)
print(mydata)
with open("/tmp/residue.xml", "bw") as file1:
    file1.write(mydata)




watxml=minidom.parse(watff)
residues = watxml.getElementsByTagName("Residue")
for i in residues:
    print(i.attributes["name"].value, i.attributes["source"].value)


# print(dir(watxml))
# watxml.saveXML(open("/tmp/test.xml","w"))


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_root', '_setroot', 'find', 'findall', 'findtext', 'getiterator', 'getroot', 'iter', 'iterfind', 'parse', 'write', 'write_c14n']
['__class__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', 'append', 'attrib', 'clear', 'extend', 'find', 'findall', 'findtext', 'get', 'getchildren', 'getiterator'

In [48]:
data = ET.Element('data')
items = ET.SubElement(data, 'items')
item1 = ET.SubElement(items, 'item')
item2 = ET.SubElement(items, 'item')
item1.set('name','item1')
item2.set('name','item2')
item1.text = 'item1abc'
item2.text = 'item2abc'

# create a new XML file with the results
mydata = ET.tostring(data , encoding="unicode")
print(mydata)
myfile = open("/tmp/items2.xml", "w")
myfile.write(mydata)

<data><items><item name="item1">item1abc</item><item name="item2">item2abc</item></items></data>


96

In [159]:
from datetime import datetime
import time
time.perf_counter()
reslist = ['ALA','ARG','ASN','ASP','CYS','GLU','GLN','GLY','ILE','LEU',
           'LYS','MET','PHE','PRO','SER','THR','TRP','TYR','VAL',
           'HIS','HID','HIE','HIP', "HSD", "HSE", "HSP"]
print(len(reslist))

26


In [190]:
a=np.arange(5000)
print(a[1:50:5])

[ 1  6 11 16 21 26 31 36 41 46]


In [178]:
atomNameMap = {'H': 'HN', 'HG3':'HG1', 'HB3':'HB1', 'HD3':'HD1', "HG": "HG1", "HA3":"HA1", 'HE3':'HE1', 
               'HG13': 'HG11', 'CD1':'CD', 'HD12': 'HD2', 'HD11':'HD1', 'HD13': 'HD3'}

print(atomNameMap.keys())




dict_keys(['H', 'HG3', 'HB3', 'HD3', 'HG', 'HA3', 'HE3', 'HG13', 'CD1', 'HD12', 'HD11', 'HD13'])
