maxscheurer · maxscheurer · May 15, 2023 · May 15, 2023
diff --git a/.gitignore b/.gitignore
@@ -57,3 +57,6 @@ pycontact.egg-info/
 
 #pytest
 .pytest_cache/
+
+*.lock
+.vscode/
diff --git a/.travis.yml b/.travis.yml
diff --git a/PyContact/core/Biochemistry.py b/PyContact/core/Biochemistry.py
@@ -2,8 +2,31 @@
 
 import numpy as np
 
-from ..db.DbReader import read_residue_db
-
+residue_infos = {
+    'gly': {'name': 'gly', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
+ 'ala': {'name': 'ala', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
+ 'leu': {'name': 'leu', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
+ 'ile': {'name': 'ile', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
+ 'val': {'name': 'val', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
+ 'phe': {'name': 'phe', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
+ 'met': {'name': 'met', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
+ 'pro': {'name': 'pro', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
+ 'asp': {'name': 'asp', 'scpolarity': 'negative', 'hbondtype': 'acc'},
+ 'glu': {'name': 'glu', 'scpolarity': 'negative', 'hbondtype': 'acc'},
+ 'lys': {'name': 'lys', 'scpolarity': 'positive', 'hbondtype': 'don'},
+ 'arg': {'name': 'arg', 'scpolarity': 'positive', 'hbondtype': 'don'},
+ 'hsp': {'name': 'hsp', 'scpolarity': 'positive', 'hbondtype': 'don'},
+ 'asn': {'name': 'asn', 'scpolarity': 'polar', 'hbondtype': 'both'},
+ 'gln': {'name': 'gln', 'scpolarity': 'polar', 'hbondtype': 'both'},
+ 'his': {'name': 'his', 'scpolarity': 'polar', 'hbondtype': 'both'},
+ 'hsd': {'name': 'hsd', 'scpolarity': 'polar', 'hbondtype': 'both'},
+ 'hse': {'name': 'hse', 'scpolarity': 'polar', 'hbondtype': 'both'},
+ 'cys': {'name': 'cys', 'scpolarity': 'polar', 'hbondtype': 'don'},
+ 'ser': {'name': 'ser', 'scpolarity': 'polar', 'hbondtype': 'both'},
+ 'thr': {'name': 'thr', 'scpolarity': 'polar', 'hbondtype': 'both'},
+ 'tyr': {'name': 'tyr', 'scpolarity': 'nonpolar', 'hbondtype': 'don'},
+ 'trp': {'name': 'trp', 'scpolarity': 'nonpolar', 'hbondtype': 'don'},
+ }
 
 compare = lambda x, y: collections.Counter(x) == collections.Counter(y)
 
@@ -159,10 +182,13 @@ def hbond_percentage(self):
 
     def total_time(self, ns_per_frame, threshold):
         """Returns the total time, the contact score is above the given threshold value."""
-        time = 0
-        for score in self.scoreArray:
-            if score > threshold:
-                time += ns_per_frame
+        # time = 0
+        # for score in self.scoreArray:
+        #     if score > threshold:
+        #         time += ns_per_frame
+        sa = np.array(self.scoreArray)
+        time = np.sum(np.where(sa > threshold, ns_per_frame, 0))
+        # assert time == time2
         self.ttime = time
         return self.ttime
 
@@ -178,18 +204,13 @@ def median_life_time(self, ns_per_frame, threshold):
 
     def mean_score(self):
         """Returns the mean score of the scoreArray."""
-        mean = 0
-        for score in self.scoreArray:
-            mean += score
-        mean /= len(self.scoreArray)
-        self.meanScore = mean
-        return mean
+        self.meanScore = np.mean(self.scoreArray)
+        return self.meanScore
 
     def median_score(self):
         """Returns the median score of the scoreArray."""
-        med = np.median(self.scoreArray)
-        self.medianScore = med
-        return med
+        self.medianScore = np.median(self.scoreArray)
+        return self.medianScore
 
     def life_time(self, ns_per_frame, threshold):
         """Computes the life time of a contact in ns, with the given threshold."""
@@ -239,15 +260,15 @@ def determine_ctype(self):
         #     return ContactType.other
         self.determineBackboneSidechainType()
         try:
-            sc1 = str(read_residue_db("scpolarity", "name", r1)[0]["scpolarity"])
+            sc1 = residue_infos[r1]["scpolarity"]
             scpol1 = SideChainPolarity.mapping[sc1]
-        except IndexError:
+        except (IndexError, KeyError):
             scpol1 = SideChainPolarity.other
 
         try:
-            sc2 = str(read_residue_db("scpolarity", "name", r2)[0]["scpolarity"])
+            sc2 = residue_infos[r2]["scpolarity"]
             scpol2 = SideChainPolarity.mapping[sc2]
-        except IndexError:
+        except (IndexError, KeyError):
             scpol2 = SideChainPolarity.other
 
         # hydrogen bonds: donor, acceptor, both

diff --git a/PyContact/core/ContactAnalyzer.py b/PyContact/core/ContactAnalyzer.py
@@ -2,18 +2,13 @@
 import time
 from .multi_trajectory import run_load_parallel
 from .LogPool import *
-from copy import deepcopy
 
 import MDAnalysis
 import numpy as np
 from PyQt5.QtCore import pyqtSignal, QObject
+from scipy.spatial import KDTree
 
-# TODO: fix aroundPatch with gridsearch in C code using cython
 from .Biochemistry import (AccumulatedContact, AtomContact, AccumulationMapIndex, HydrogenBond, TempContactAccumulate, HydrogenBondAtoms)
-from ..cy_modules.cy_gridsearch import cy_find_contacts
-
-# MDAnalysis.core.flags['use_periodic_selections'] = False
-# MDAnalysis.core.flags['use_KDTree_routines'] = True
 
 
 class Analyzer(QObject):
@@ -61,13 +56,13 @@ def runFrameScan(self, nproc):
         except:
             raise Exception
 
-    def runContactAnalysis(self, map1, map2, nproc):
+    def runContactAnalysis(self, map1, map2):
         """Performs a contadt analysis using nproc threads."""
         self.finalAccumulatedContacts = self.analyze_contactResultsWithMaps(self.contactResults, map1, map2)
 
         self.lastMap1 = map1
         self.lastMap2 = map2
-        return deepcopy(self.finalAccumulatedContacts)
+        return self.finalAccumulatedContacts.copy()
 
     def setTrajectoryData(
             self, resname_array, resid_array, name_array,
@@ -119,42 +114,21 @@ def makeKeyArraysFromMaps(self, map1, map2, contact):
             keys2=["none","none","none","22", "ILE, "none"]
 
         """
+        properties = [
+            lambda x: x,
+            lambda x: self.name_array[x],
+            lambda x: self.resid_array[x],
+            lambda x: self.resname_array[x],
+            lambda x: self.segids[x],
+        ]
         idx1 = contact.idx1
         idx2 = contact.idx2
-        counter = 0
-        keys1 = []
-        for val in map1:
-            if val == 1:
-                if counter == AccumulationMapIndex.index:
-                    keys1.append(idx1)
-                elif counter == AccumulationMapIndex.name:
-                    keys1.append(self.name_array[idx1])
-                elif counter == AccumulationMapIndex.resid:
-                    keys1.append(self.resid_array[idx1])
-                elif counter == AccumulationMapIndex.resname:
-                    keys1.append(self.resname_array[idx1])
-                elif counter == AccumulationMapIndex.segid:
-                    keys1.append(self.segids[idx1])
-            else:
-                keys1.append("none")
-            counter += 1
-        counter = 0
-        keys2 = []
-        for val in map2:
-            if val == 1:
-                if counter == AccumulationMapIndex.index:
-                    keys2.append(idx2)
-                elif counter == AccumulationMapIndex.name:
-                    keys2.append(self.name_array[idx2])
-                elif counter == AccumulationMapIndex.resid:
-                    keys2.append(self.resid_array[idx2])
-                elif counter == AccumulationMapIndex.resname:
-                    keys2.append(self.resname_array[idx2])
-                elif counter == AccumulationMapIndex.segid:
-                    keys2.append(self.segids[idx2])
-            else:
-                keys2.append("none")
-            counter += 1
+        keys1 = [
+            properties[ii](idx1) if x == 1 else "none" for ii, x in enumerate(map1)
+        ]
+        keys2 = [
+            properties[ii](idx2) if x == 1 else "none" for ii, x in enumerate(map2)
+        ]
         return [keys1, keys2]
 
     def makeKeyArraysFromKey(self, key):
@@ -334,21 +308,8 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
             frame = ts.frame
             self.currentFrameNumber = ts.frame
 
-            # pass positions and distance to cython
-            natoms1 = len(sel1.atoms)
-            natoms2 = len(sel2.atoms)
-            pos1 = np.array(np.reshape(sel1.positions, (1, natoms1 * 3)), dtype=np.float64)
-            pos2 = np.array(np.reshape(sel2.positions, (1, natoms2 * 3)), dtype=np.float64)
-            xyz1 = np.array(pos1, dtype=np.float32)
-            xyz2 = np.array(pos2, dtype=np.float32)
-            # 2d array with index of atom1 being the index of the first dimension
-            # individual lists contain atom2 indices
-            nbList1 = cy_find_contacts(xyz1, natoms1, xyz2, natoms2, cutoff)
-
-            # we only need the 1st list
-            # nbList1 = res[:natoms1]
-            # nbList2 = res[natoms1:]
-
+            tree = KDTree(sel2.positions)
+            nbList1 = tree.query_ball_point(sel1.positions, r=cutoff)
 
             idx1 = 0
             for atom1sNeighbors in nbList1:
@@ -366,7 +327,8 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
                             continue
                     # distance = distarray[idx1, idx2]
                     # weight = self.weight_function(distance)
-                    dvec = pos1[0][3*idx1:3*idx1+3] - pos2[0][3*idx2:3*idx2+3]
+                    # dvec = pos1[0][3*idx1:3*idx1+3] - pos2[0][3*idx2:3*idx2+3]
+                    dvec = sel1.positions[idx1] - sel2.positions[idx2]
                     distance = np.sqrt(dvec.dot(dvec))
                     # print(dvec, distance, dvec.dtype)
                     # if (distance - distarray[idx1, idx2]) > 0.001:
@@ -436,7 +398,9 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
                                 # if (typeHeavy == AtomHBondType.acc or typeHeavy == AtomHBondType.both) and (distarray[conv_hatom, idx2] <= hbondcutoff):
                                 # dist = distarray[conv_hatom, idx2]
                                 # dist = np.linalg.norm(sel1.positions[conv_hatom] - sel2.positions[idx2])
-                                dist = np.linalg.norm(pos1[0][3*conv_hatom:3*conv_hatom+3] - pos2[0][3*idx2:3*idx2+3])
+                                dist = np.linalg.norm(
+                                    sel1.positions[conv_hatom] - sel2.positions[idx2]
+                                )
                                 if (dist <= hbondcutoff):
                                     donorPosition = sel1.positions[idx1]
                                     hydrogenPosition = np.array(sel1.positions[conv_hatom], dtype=np.float64)
@@ -465,7 +429,10 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
                                 # if (distarray[conv_hatom, idx2] <= hbondcutoff):
                                 # dist = distarray[idx1, conv_hatom]
                                 # dist = np.linalg.norm(sel1.positions[idx1] - sel2.positions[conv_hatom])
-                                dist = np.linalg.norm(pos1[0][3*idx1:3*idx1+3] - pos2[0][3*conv_hatom:3*conv_hatom+3])
+                                # dist = np.linalg.norm(pos1[0][3*idx1:3*idx1+3] - pos2[0][3*conv_hatom:3*conv_hatom+3])
+                                dist = np.linalg.norm(
+                                    sel1.positions[idx1] - sel2.positions[conv_hatom]
+                                )
                                 if (dist <= hbondcutoff):
                                     donorPosition = sel2.positions[idx2]
                                     hydrogenPosition = np.array(sel2.positions[conv_hatom], dtype=np.float64)
@@ -498,7 +465,7 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
         stop = time.time()
         print("grid:",stop-start)
         return contactResults
-
+    
     def analyze_contactResultsWithMaps(self, contactResults, map1, map2):
         """Analyzes contactsResults with the given maps."""
 
@@ -529,37 +496,26 @@ def analyze_contactResultsWithMaps(self, contactResults, map1, map2):
             for cont in frame:
                 key1, key2 = self.makeKeyArraysFromMaps(map1, map2, cont)
                 key = self.makeKeyFromKeyArrays(key1, key2)
-                if key in currentFrameAcc:
-                    currentFrameAcc[key].fscore += cont.weight
-                    currentFrameAcc[key].contributingAtomContacts.append(cont)
-                    if cont.idx1 in self.backbone:
-                        currentFrameAcc[key].bb1score += cont.weight
-                    else:
-                        currentFrameAcc[key].sc1score += cont.weight
-                    if cont.idx2 in self.backbone:
-                        currentFrameAcc[key].bb2score += cont.weight
-                    else:
-                        currentFrameAcc[key].sc2score += cont.weight
-                else:
+                if key not in currentFrameAcc:
                     currentFrameAcc[key] = TempContactAccumulate(key1, key2)
-                    currentFrameAcc[key].fscore += cont.weight
-                    currentFrameAcc[key].contributingAtomContacts.append(cont)
-                    if cont.idx1 in self.backbone:
-                        currentFrameAcc[key].bb1score += cont.weight
-                    else:
-                        currentFrameAcc[key].sc1score += cont.weight
-                    if cont.idx2 in self.backbone:
-                        currentFrameAcc[key].bb2score += cont.weight
-                    else:
-                        currentFrameAcc[key].sc2score += cont.weight
+                currentFrameAcc[key].fscore += cont.weight
+                currentFrameAcc[key].contributingAtomContacts.append(cont)
+                if cont.idx1 in self.backbone:
+                    currentFrameAcc[key].bb1score += cont.weight
+                else:
+                    currentFrameAcc[key].sc1score += cont.weight
+                if cont.idx2 in self.backbone:
+                    currentFrameAcc[key].bb2score += cont.weight
+                else:
+                    currentFrameAcc[key].sc2score += cont.weight
                 if key not in allkeys:
                     allkeys.append(key)
             frame_contacts_accumulated.append(currentFrameAcc)
             self.frameUpdate.emit(float(counter) / float(total))
             counter += 1
         accumulatedContactsDict = {}
         stop = time.time()
-        # print(stop - start)
+        # print("time", stop - start)
         # accumulatedContactsDict (dict)
         # ---> key vs. list of TempContactAccumulated
         #

diff --git a/PyContact/core/ContactFilters.py b/PyContact/core/ContactFilters.py
@@ -107,7 +107,7 @@ def filterByRange(self, contacts, residRangeA, residRangeB, mapindex):
             bRanges = []
             for ran in splitB:
                 r = ran.split(u"-")
-                print(r)
+                # print(r)
                 if len(r) == 1:
                     bRanges.append(range(int(r[0]), int(r[0])+1))
                 else:
@@ -148,7 +148,7 @@ def __init__(self, name, operator, value):
         self.value = value
 
     def filterContacts(self, contacts):
-        pass
+        raise NotImplementedError
 
 
 class OnlyFilter(object):
@@ -193,7 +193,7 @@ def filterContacts(self, contacts):
         for c in contacts:
             if op.compare(c.total_time(1, 0), self.value, self.operator):
                 filtered.append(c)
-        print(unicode(len(filtered)))
+        # print(unicode(len(filtered)))
         return filtered
 
 
@@ -219,7 +219,7 @@ def filterContacts(self, contacts):
         elif self.ftype == u"HB %":
             for c in contacts:
                 med = c.hbond_percentage()
-                print(med)
+                # print(med)
                 if op.compare(med, self.value, self.operator):
                     filtered.append(c)
         return filtered

diff --git a/PyContact/core/Scripting.py b/PyContact/core/Scripting.py
@@ -27,10 +27,11 @@ def __init__(self, topo, traj, name, configuration):
 
     def runJob(self, ncores=1):
         """Runs contact analysis and accumulation with ncores threads."""
+        ncores = 1
         print("Running job: " + self.name + " on " + str(ncores) + " cores.")
         self.analyzer = Analyzer(self.topo,self.traj, self.configuration.cutoff, self.configuration.hbondcutoff, self.configuration.hbondcutangle, self.configuration.sel1, self.configuration.sel2)
         self.analyzer.runFrameScan(ncores)
-        self.analyzer.runContactAnalysis(self.configuration.map1, self.configuration.map2, ncores)
+        self.analyzer.runContactAnalysis(self.configuration.map1, self.configuration.map2)
 
     def writeSessionToFile(self, fname=""):
         """Writes the current analysis session to a file with either self.name + .session or fname as output filename"""