Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major refactoring and getting ready for conda-forge #91

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,6 @@ pycontact.egg-info/

#pytest
.pytest_cache/

*.lock
.vscode/
15 changes: 0 additions & 15 deletions .travis.yml

This file was deleted.

59 changes: 40 additions & 19 deletions PyContact/core/Biochemistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,31 @@

import numpy as np

from ..db.DbReader import read_residue_db

residue_infos = {
'gly': {'name': 'gly', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
'ala': {'name': 'ala', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
'leu': {'name': 'leu', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
'ile': {'name': 'ile', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
'val': {'name': 'val', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
'phe': {'name': 'phe', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
'met': {'name': 'met', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
'pro': {'name': 'pro', 'scpolarity': 'nonpolar', 'hbondtype': 'none'},
'asp': {'name': 'asp', 'scpolarity': 'negative', 'hbondtype': 'acc'},
'glu': {'name': 'glu', 'scpolarity': 'negative', 'hbondtype': 'acc'},
'lys': {'name': 'lys', 'scpolarity': 'positive', 'hbondtype': 'don'},
'arg': {'name': 'arg', 'scpolarity': 'positive', 'hbondtype': 'don'},
'hsp': {'name': 'hsp', 'scpolarity': 'positive', 'hbondtype': 'don'},
'asn': {'name': 'asn', 'scpolarity': 'polar', 'hbondtype': 'both'},
'gln': {'name': 'gln', 'scpolarity': 'polar', 'hbondtype': 'both'},
'his': {'name': 'his', 'scpolarity': 'polar', 'hbondtype': 'both'},
'hsd': {'name': 'hsd', 'scpolarity': 'polar', 'hbondtype': 'both'},
'hse': {'name': 'hse', 'scpolarity': 'polar', 'hbondtype': 'both'},
'cys': {'name': 'cys', 'scpolarity': 'polar', 'hbondtype': 'don'},
'ser': {'name': 'ser', 'scpolarity': 'polar', 'hbondtype': 'both'},
'thr': {'name': 'thr', 'scpolarity': 'polar', 'hbondtype': 'both'},
'tyr': {'name': 'tyr', 'scpolarity': 'nonpolar', 'hbondtype': 'don'},
'trp': {'name': 'trp', 'scpolarity': 'nonpolar', 'hbondtype': 'don'},
}

compare = lambda x, y: collections.Counter(x) == collections.Counter(y)

Expand Down Expand Up @@ -159,10 +182,13 @@ def hbond_percentage(self):

def total_time(self, ns_per_frame, threshold):
"""Returns the total time, the contact score is above the given threshold value."""
time = 0
for score in self.scoreArray:
if score > threshold:
time += ns_per_frame
# time = 0
# for score in self.scoreArray:
# if score > threshold:
# time += ns_per_frame
sa = np.array(self.scoreArray)
time = np.sum(np.where(sa > threshold, ns_per_frame, 0))
# assert time == time2
self.ttime = time
return self.ttime

Expand All @@ -178,18 +204,13 @@ def median_life_time(self, ns_per_frame, threshold):

def mean_score(self):
"""Returns the mean score of the scoreArray."""
mean = 0
for score in self.scoreArray:
mean += score
mean /= len(self.scoreArray)
self.meanScore = mean
return mean
self.meanScore = np.mean(self.scoreArray)
return self.meanScore

def median_score(self):
"""Returns the median score of the scoreArray."""
med = np.median(self.scoreArray)
self.medianScore = med
return med
self.medianScore = np.median(self.scoreArray)
return self.medianScore

def life_time(self, ns_per_frame, threshold):
"""Computes the life time of a contact in ns, with the given threshold."""
Expand Down Expand Up @@ -239,15 +260,15 @@ def determine_ctype(self):
# return ContactType.other
self.determineBackboneSidechainType()
try:
sc1 = str(read_residue_db("scpolarity", "name", r1)[0]["scpolarity"])
sc1 = residue_infos[r1]["scpolarity"]
scpol1 = SideChainPolarity.mapping[sc1]
except IndexError:
except (IndexError, KeyError):
scpol1 = SideChainPolarity.other

try:
sc2 = str(read_residue_db("scpolarity", "name", r2)[0]["scpolarity"])
sc2 = residue_infos[r2]["scpolarity"]
scpol2 = SideChainPolarity.mapping[sc2]
except IndexError:
except (IndexError, KeyError):
scpol2 = SideChainPolarity.other

# hydrogen bonds: donor, acceptor, both
Expand Down
124 changes: 40 additions & 84 deletions PyContact/core/ContactAnalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,13 @@
import time
from .multi_trajectory import run_load_parallel
from .LogPool import *
from copy import deepcopy

import MDAnalysis
import numpy as np
from PyQt5.QtCore import pyqtSignal, QObject
from scipy.spatial import KDTree

# TODO: fix aroundPatch with gridsearch in C code using cython
from .Biochemistry import (AccumulatedContact, AtomContact, AccumulationMapIndex, HydrogenBond, TempContactAccumulate, HydrogenBondAtoms)
from ..cy_modules.cy_gridsearch import cy_find_contacts

# MDAnalysis.core.flags['use_periodic_selections'] = False
# MDAnalysis.core.flags['use_KDTree_routines'] = True


class Analyzer(QObject):
Expand Down Expand Up @@ -61,13 +56,13 @@ def runFrameScan(self, nproc):
except:
raise Exception

def runContactAnalysis(self, map1, map2, nproc):
def runContactAnalysis(self, map1, map2):
"""Performs a contadt analysis using nproc threads."""
self.finalAccumulatedContacts = self.analyze_contactResultsWithMaps(self.contactResults, map1, map2)

self.lastMap1 = map1
self.lastMap2 = map2
return deepcopy(self.finalAccumulatedContacts)
return self.finalAccumulatedContacts.copy()

def setTrajectoryData(
self, resname_array, resid_array, name_array,
Expand Down Expand Up @@ -119,42 +114,21 @@ def makeKeyArraysFromMaps(self, map1, map2, contact):
keys2=["none","none","none","22", "ILE, "none"]

"""
properties = [
lambda x: x,
lambda x: self.name_array[x],
lambda x: self.resid_array[x],
lambda x: self.resname_array[x],
lambda x: self.segids[x],
]
idx1 = contact.idx1
idx2 = contact.idx2
counter = 0
keys1 = []
for val in map1:
if val == 1:
if counter == AccumulationMapIndex.index:
keys1.append(idx1)
elif counter == AccumulationMapIndex.name:
keys1.append(self.name_array[idx1])
elif counter == AccumulationMapIndex.resid:
keys1.append(self.resid_array[idx1])
elif counter == AccumulationMapIndex.resname:
keys1.append(self.resname_array[idx1])
elif counter == AccumulationMapIndex.segid:
keys1.append(self.segids[idx1])
else:
keys1.append("none")
counter += 1
counter = 0
keys2 = []
for val in map2:
if val == 1:
if counter == AccumulationMapIndex.index:
keys2.append(idx2)
elif counter == AccumulationMapIndex.name:
keys2.append(self.name_array[idx2])
elif counter == AccumulationMapIndex.resid:
keys2.append(self.resid_array[idx2])
elif counter == AccumulationMapIndex.resname:
keys2.append(self.resname_array[idx2])
elif counter == AccumulationMapIndex.segid:
keys2.append(self.segids[idx2])
else:
keys2.append("none")
counter += 1
keys1 = [
properties[ii](idx1) if x == 1 else "none" for ii, x in enumerate(map1)
]
keys2 = [
properties[ii](idx2) if x == 1 else "none" for ii, x in enumerate(map2)
]
return [keys1, keys2]

def makeKeyArraysFromKey(self, key):
Expand Down Expand Up @@ -334,21 +308,8 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
frame = ts.frame
self.currentFrameNumber = ts.frame

# pass positions and distance to cython
natoms1 = len(sel1.atoms)
natoms2 = len(sel2.atoms)
pos1 = np.array(np.reshape(sel1.positions, (1, natoms1 * 3)), dtype=np.float64)
pos2 = np.array(np.reshape(sel2.positions, (1, natoms2 * 3)), dtype=np.float64)
xyz1 = np.array(pos1, dtype=np.float32)
xyz2 = np.array(pos2, dtype=np.float32)
# 2d array with index of atom1 being the index of the first dimension
# individual lists contain atom2 indices
nbList1 = cy_find_contacts(xyz1, natoms1, xyz2, natoms2, cutoff)

# we only need the 1st list
# nbList1 = res[:natoms1]
# nbList2 = res[natoms1:]

tree = KDTree(sel2.positions)
nbList1 = tree.query_ball_point(sel1.positions, r=cutoff)

idx1 = 0
for atom1sNeighbors in nbList1:
Expand All @@ -366,7 +327,8 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
continue
# distance = distarray[idx1, idx2]
# weight = self.weight_function(distance)
dvec = pos1[0][3*idx1:3*idx1+3] - pos2[0][3*idx2:3*idx2+3]
# dvec = pos1[0][3*idx1:3*idx1+3] - pos2[0][3*idx2:3*idx2+3]
dvec = sel1.positions[idx1] - sel2.positions[idx2]
distance = np.sqrt(dvec.dot(dvec))
# print(dvec, distance, dvec.dtype)
# if (distance - distarray[idx1, idx2]) > 0.001:
Expand Down Expand Up @@ -436,7 +398,9 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
# if (typeHeavy == AtomHBondType.acc or typeHeavy == AtomHBondType.both) and (distarray[conv_hatom, idx2] <= hbondcutoff):
# dist = distarray[conv_hatom, idx2]
# dist = np.linalg.norm(sel1.positions[conv_hatom] - sel2.positions[idx2])
dist = np.linalg.norm(pos1[0][3*conv_hatom:3*conv_hatom+3] - pos2[0][3*idx2:3*idx2+3])
dist = np.linalg.norm(
sel1.positions[conv_hatom] - sel2.positions[idx2]
)
if (dist <= hbondcutoff):
donorPosition = sel1.positions[idx1]
hydrogenPosition = np.array(sel1.positions[conv_hatom], dtype=np.float64)
Expand Down Expand Up @@ -465,7 +429,10 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
# if (distarray[conv_hatom, idx2] <= hbondcutoff):
# dist = distarray[idx1, conv_hatom]
# dist = np.linalg.norm(sel1.positions[idx1] - sel2.positions[conv_hatom])
dist = np.linalg.norm(pos1[0][3*idx1:3*idx1+3] - pos2[0][3*conv_hatom:3*conv_hatom+3])
# dist = np.linalg.norm(pos1[0][3*idx1:3*idx1+3] - pos2[0][3*conv_hatom:3*conv_hatom+3])
dist = np.linalg.norm(
sel1.positions[idx1] - sel2.positions[conv_hatom]
)
if (dist <= hbondcutoff):
donorPosition = sel2.positions[idx2]
hydrogenPosition = np.array(sel2.positions[conv_hatom], dtype=np.float64)
Expand Down Expand Up @@ -498,7 +465,7 @@ def analyze_psf_dcd_grid(self, psf, dcd, cutoff, hbondcutoff, hbondcutangle, sel
stop = time.time()
print("grid:",stop-start)
return contactResults

def analyze_contactResultsWithMaps(self, contactResults, map1, map2):
"""Analyzes contactsResults with the given maps."""

Expand Down Expand Up @@ -529,37 +496,26 @@ def analyze_contactResultsWithMaps(self, contactResults, map1, map2):
for cont in frame:
key1, key2 = self.makeKeyArraysFromMaps(map1, map2, cont)
key = self.makeKeyFromKeyArrays(key1, key2)
if key in currentFrameAcc:
currentFrameAcc[key].fscore += cont.weight
currentFrameAcc[key].contributingAtomContacts.append(cont)
if cont.idx1 in self.backbone:
currentFrameAcc[key].bb1score += cont.weight
else:
currentFrameAcc[key].sc1score += cont.weight
if cont.idx2 in self.backbone:
currentFrameAcc[key].bb2score += cont.weight
else:
currentFrameAcc[key].sc2score += cont.weight
else:
if key not in currentFrameAcc:
currentFrameAcc[key] = TempContactAccumulate(key1, key2)
currentFrameAcc[key].fscore += cont.weight
currentFrameAcc[key].contributingAtomContacts.append(cont)
if cont.idx1 in self.backbone:
currentFrameAcc[key].bb1score += cont.weight
else:
currentFrameAcc[key].sc1score += cont.weight
if cont.idx2 in self.backbone:
currentFrameAcc[key].bb2score += cont.weight
else:
currentFrameAcc[key].sc2score += cont.weight
currentFrameAcc[key].fscore += cont.weight
currentFrameAcc[key].contributingAtomContacts.append(cont)
if cont.idx1 in self.backbone:
currentFrameAcc[key].bb1score += cont.weight
else:
currentFrameAcc[key].sc1score += cont.weight
if cont.idx2 in self.backbone:
currentFrameAcc[key].bb2score += cont.weight
else:
currentFrameAcc[key].sc2score += cont.weight
if key not in allkeys:
allkeys.append(key)
frame_contacts_accumulated.append(currentFrameAcc)
self.frameUpdate.emit(float(counter) / float(total))
counter += 1
accumulatedContactsDict = {}
stop = time.time()
# print(stop - start)
# print("time", stop - start)
# accumulatedContactsDict (dict)
# ---> key vs. list of TempContactAccumulated
#
Expand Down
8 changes: 4 additions & 4 deletions PyContact/core/ContactFilters.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def filterByRange(self, contacts, residRangeA, residRangeB, mapindex):
bRanges = []
for ran in splitB:
r = ran.split(u"-")
print(r)
# print(r)
if len(r) == 1:
bRanges.append(range(int(r[0]), int(r[0])+1))
else:
Expand Down Expand Up @@ -148,7 +148,7 @@ def __init__(self, name, operator, value):
self.value = value

def filterContacts(self, contacts):
pass
raise NotImplementedError


class OnlyFilter(object):
Expand Down Expand Up @@ -193,7 +193,7 @@ def filterContacts(self, contacts):
for c in contacts:
if op.compare(c.total_time(1, 0), self.value, self.operator):
filtered.append(c)
print(unicode(len(filtered)))
# print(unicode(len(filtered)))
return filtered


Expand All @@ -219,7 +219,7 @@ def filterContacts(self, contacts):
elif self.ftype == u"HB %":
for c in contacts:
med = c.hbond_percentage()
print(med)
# print(med)
if op.compare(med, self.value, self.operator):
filtered.append(c)
return filtered
Expand Down
3 changes: 2 additions & 1 deletion PyContact/core/Scripting.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ def __init__(self, topo, traj, name, configuration):

def runJob(self, ncores=1):
"""Runs contact analysis and accumulation with ncores threads."""
ncores = 1
print("Running job: " + self.name + " on " + str(ncores) + " cores.")
self.analyzer = Analyzer(self.topo,self.traj, self.configuration.cutoff, self.configuration.hbondcutoff, self.configuration.hbondcutangle, self.configuration.sel1, self.configuration.sel2)
self.analyzer.runFrameScan(ncores)
self.analyzer.runContactAnalysis(self.configuration.map1, self.configuration.map2, ncores)
self.analyzer.runContactAnalysis(self.configuration.map1, self.configuration.map2)

def writeSessionToFile(self, fname=""):
"""Writes the current analysis session to a file with either self.name + .session or fname as output filename"""
Expand Down