Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conformor prevent clash #23

Merged
merged 12 commits into from
Dec 2, 2019
63 changes: 63 additions & 0 deletions alphas/clash_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
from scipy.spatial import distance
from scipy.spatial import KDTree
from numpy import array
import math

class ClashValidator:

def __init__(self):
self.clashes = 0
# VDW = {"N":1.55, "CA":1.7, "C":1.7, "O":1.52}
VDW = np.array([1.55, 1.7, 1.7, 1.52])
self.allowed_distances = np.add.outer(VDW,VDW)

def clash_found_vectorized(self, new_chain, last_chain):
"""
This method calculates every possible distance between
each atom within new_chain and last_chain using
numpy.
"""

# concatenate every residue containing four atoms into one numpy array
last_chain_vector = np.array([atoms_array for residue in last_chain[:-2] for atoms_array in residue.values()])
new_chain_vector = np.array([atoms_array for residue in new_chain for atoms_array in residue.values()])

# get the euclidean distance between each atom
distances = distance.cdist(last_chain_vector, new_chain_vector, 'euclidean')

x, y = distances.shape
#the extra slicing is to allow us to include the last 3 atoms
allowed_distances = np.tile(self.allowed_distances, (math.ceil(x/4), math.ceil(y/4)))[:,:y]

clashes = (distances < allowed_distances)

if np.any(clashes):
self.clashes += 1
return True
return False

def clash_found_kdtree(self, new_chain,last_chain ):
"""
Abstract and pretend all molecules are one. This will make things a lot faster
"""
# concatenate every residue containing four atoms into one numpy array
last_chain_vector = np.array([atoms_array for residue in last_chain[:-2] for atoms_array in residue.values()])
new_chain_vector = np.array([atoms_array for residue in new_chain for atoms_array in residue.values()])

# init kdtrees
last_chain_kdtree = KDTree(last_chain_vector)
new_chain_kdtree = KDTree(new_chain_vector)

clashes = last_chain_kdtree.sparse_distance_matrix(new_chain_kdtree, 3.4) #maximum value to consider
for indices, distance in zip(clashes.keys(),clashes.values()):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

quick note: this can be written as clashes.items().

if distance <= 3.04:
# we don't care what the atoms are, this is a clash
return True

row = indices[0] % 4
column = indices[1] % 4

if distance <= self.allowed_distances[row, column]:
return True
return False
59 changes: 40 additions & 19 deletions alphas/conformer_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/python3
from collections import defaultdict, deque
import copy
import logging
Expand All @@ -9,9 +8,9 @@
import pprint
import random
import sys

import numpy as np
from numpy import array
from clash_validator import ClashValidator


log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -319,6 +318,7 @@ def __init__(self, conformer, sequence, basedb, angledb):
self.register = StateHolder()
self.basedb = basedb
self.angledb = angledb
self.clash_validator = ClashValidator()

@property
def seq(self):
Expand Down Expand Up @@ -346,14 +346,15 @@ def is_bb_complete(self):
"""
bbatoms = self._bbatoms

is_bb_complete = [
len(conformer) == len(self.seq),
is_bb_complete_ = [
len(self.conformer.coords) == len(self.seq),
all(
bbatoms.issubset(res_coords.keys())
for res_coords in self.conformer.coords
),
len(self.seq) * 4 + 1 == sum(1 for res in self.conformer.coords for atom in res.keys())
]
return all(is_bb_complete)
return all(is_bb_complete_)

def get_residue_type(self, pos):
"""
Expand Down Expand Up @@ -475,19 +476,13 @@ def add_COO(self):

def build_bb(self):
n = 0
while not self.is_bb_complete() and n < 100_000:
while not self.is_bb_complete() and n < 100_0:
n += 1

# when KDTrees are implemented than we can use
# a .load() to retrieve the last state
self.register.save(self.conformer)

build_angles = self.angledb.get_fragment()

# Python now gives you ordered values =)
# order of addition is remembered
for residue_angles in build_angles.values():

try:
self.add_coord(
residue_angles,
Expand All @@ -496,8 +491,8 @@ def build_bb(self):
)
except SequenceIndexError:
log.info('Reached the end of the sequence.')
log.info(f'Is sequence complete?: {self.is_bb_complete()}')
self.add_COO()
log.info(f'Is sequence complete?: {self.is_bb_complete()}')
break

self.add_coord(
Expand All @@ -516,10 +511,36 @@ def build_bb(self):
residue_angles,
Catom,
)

try:
# last state
last_conformer = self.register.load()
except IndexError:
# register is empty
self.register.save(self.conformer)
continue

try:
clash_found = self.clash_validator.clash_found_vectorized(self.conformer.coords[len(last_conformer):], last_conformer.coords)
except ValueError:
# added COO only
clash_found = self.clash_validator.clash_found_vectorized(self.conformer.coords[-1:], last_conformer.coords[:-1])

if not clash_found:
# no clashes, save this loop
self.register.save(self.conformer)
else:
# Dont save the last_conformer if it clashes on COO only
# Another issue: last_conformer could be set up to always
# find a clash on any new loop.
# These two issues will be resolved with the same solution
self.conformer = last_conformer
self.register.save(last_conformer)

else:
if n > 100_000:
raise StopIteration('run above 100k cycles!!!!')

def save(self, filename='conformer_gen.pdb'):
self.conformer.save(filename)

Expand Down Expand Up @@ -981,11 +1002,11 @@ def get_kdtree( UDICT, start, stop ):

if __name__ == '__main__':

loop_pickle = LoopDataBase('LOOPS.pickle4')
rosetta_db = read_rosetta_db('l-caa')
loop_pickle = LoopDataBase('/Users/alaashamandy/Desktop/UNIWORK/CSC495/IDPCalcPDBDownloader/alphas/LOOPS.pickle4')
rosetta_db = read_rosetta_db('/Users/alaashamandy/Desktop/UNIWORK/CSC495/IDPCalcPDBDownloader/alphas/l-caa')

#input_sequence = "MDEYSPKRHDVAQLKFLCESLYDEGIATLGDSHHGWVNDPTSAVNLQLNDLIEHIASFVMSFKIKYPDDGDLSELVEEYLDDTYTLFSSYGINDPELQRWQKTKERLFRLFSGEYISTLMKT"
input_sequence = "PDEDRLSPLHSVAAA"
input_sequence = "MDEYSPKRHDVAQLKFLCESLYDEGIATLGDSHHGWVNDPTSAVNLQLNDLIEHIASFVMSFKIKYPDDGDLSELVEEYLDDTYTLFSSYGINDPELQRWQKTKERLFRLFSGEYISTLMKT"
# input_sequence = "PDEDRLSPLHSVAAA"
#input_sequence = "PDEDRLSPLHSV"
#input_sequence = "D"

Expand Down