Skip to content

Commit

Permalink
Merge 5486bcd into 5c6d00b
Browse files Browse the repository at this point in the history
  • Loading branch information
Felix Simkovic committed Mar 31, 2018
2 parents 5c6d00b + 5486bcd commit 1a14245
Show file tree
Hide file tree
Showing 15 changed files with 339 additions and 183 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Changed
- Typos corrected in documentation
- ``THREE_TO_ONE`` and ``ONE_TO_THREE`` dictionaries modified to ``Enum`` objects
- ``SequeneFile.neff`` renamed to ``SequenceFile.meff``
- ``ContactMapChordFigure.get_radius_around_circle`` moved to ``conkit.plot.tools.radius_around_circle``
- ``AmiseBW.curvature`` renamed to ``AmiseBW.gauss_curvature``
Fixed
~~~~~
- ``A3mParser`` keyword argument mismatch sorted
Expand Down
2 changes: 1 addition & 1 deletion conkit/core/tests/test_contactmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def test_calculate_contact_density_3(self):
for c in [Contact(3, 5, 0.4), Contact(2, 4, 0.1), Contact(3, 4, 0.4)]:
contact_map1.add(c)
density = contact_map1.calculate_contact_density()
self.assertEqual([0.2282135747320191, 0.6337372073257503, 0.2282135747320191], density)
self.assertEqual([0.22821357473201903, 0.6337372073257503, 0.22821357473201903], density)

def test_find_1(self):
contact_map1 = ContactMap('1')
Expand Down
81 changes: 48 additions & 33 deletions conkit/misc/bandwidth.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,17 @@
from __future__ import print_function

__author__ = "Felix Simkovic"
__date__ = "02 Aug 2017"
__version__ = "1.0"
__date__ = "31 Mar 2018"
__version__ = "1.0.1"

import abc
import numpy as np

ABC = abc.ABCMeta('ABC', (object,), {})

SQRT_PI = np.sqrt(np.pi)
SQRT_2PI = np.sqrt(2. * np.pi)


class BandwidthBase(ABC):
"""Abstract class for bandwidth calculations"""
Expand All @@ -50,7 +53,6 @@ class BandwidthBase(ABC):
def bandwidth(self):
return 0.0

# REM: abstractproperty requires us to re-declare it in every child class
@property
def bw(self):
return self.bandwidth
Expand All @@ -75,52 +77,63 @@ def __init__(self, data, niterations=25, eps=1e-3):
def bandwidth(self):
data = self._data
x0 = BowmanBW(data).bandwidth
y0 = AmiseBW.optimal_bandwidth_equation(data, x0)
y0 = AmiseBW.optimal_bandwidth(data, x0)
x = 0.8 * x0
y = AmiseBW.optimal_bandwidth_equation(data, x)
y = AmiseBW.optimal_bandwidth(data, x)
for _ in np.arange(self._niterations):
x -= y * (x0 - x) / (y0 - y)
y = AmiseBW.optimal_bandwidth_equation(data, x)
y = AmiseBW.optimal_bandwidth(data, x)
if abs(y) < (self._eps * y0):
break
return x

@staticmethod
def curvature(p, x, w):
z = (x - p) / w
y = (1 * (z ** 2 - 1.0) * np.exp(-0.5 * z * z) / (w * np.sqrt(2. * np.pi)) / w ** 2).sum()
return y / p.shape[0]
def curvature(data, x, w):
"""
See Also
--------
gauss_curvature
"""
import warnings
warnings.warn("This function will be removed in a future release! Use gauss_curvature() instead")
return AmiseBW.gauss_curvature(data, x, w)

@staticmethod
def gauss_curvature(data, x, w):
M, N = data.shape
z = ((x - data) / w)**2
return (N * (z - 1.0) * (np.exp(-0.5 * z) / (w * SQRT_2PI)) / w**2).sum() / M

@staticmethod
def extended_range(mn, mx, bw, ext=3):
return mn - ext * bw, mx + ext * bw
def extended_range(min_, max_, bandwidth, ext=3):
d = bandwidth * ext
return min_ - d, max_ + d

@staticmethod
def optimal_bandwidth_equation(p, default_bw):
alpha = 1. / (2. * np.sqrt(np.pi))
def optimal_bandwidth(data, bandwidth):
M, N = data.shape
alpha = 1. / (2. * SQRT_PI)
sigma = 1.0
n = p.shape[0]
q = AmiseBW.stiffness_integral(p, default_bw)
return default_bw - ((n * q * sigma ** 4) / alpha) ** (-1.0 / (p.shape[1] + 4))
integral = AmiseBW.stiffness_integral(data, bandwidth)
return bandwidth - ((M * integral * sigma ** 4) / alpha) ** (-1.0 / (N + 4))

@staticmethod
def stiffness_integral(p, default_bw, eps=1e-4):
mn, mx = AmiseBW.extended_range(p.min(), p.max(), default_bw, ext=3)
n = 1
dx = (mx - mn) / n
yy = 0.5 * dx * (AmiseBW.curvature(p, mn, default_bw) ** 2 +
AmiseBW.curvature(p, mx, default_bw) ** 2)
# The trapezoidal rule guarantees a relative error of better than eps
# for some number of steps less than maxn.
maxn = (mx - mn) / np.sqrt(eps)
# Cap the total computation spent
maxn = 2048 if maxn > 2048 else maxn
def stiffness_integral(data, bandwidth, eps=1e-4):
min_, max_ = AmiseBW.extended_range(data.min(), data.max(), bandwidth, ext=3)
dx = 1.0 * (max_ - min_)
maxn = dx / np.sqrt(eps)
if maxn > 2048:
maxn = 2048
yy = 0.5 * dx * (
AmiseBW.gauss_curvature(data, min_, bandwidth)**2
+ AmiseBW.gauss_curvature(data, max_, bandwidth)**2
)
n = 2
while n <= maxn:
dx /= 2.
y = 0
for i in np.arange(1, n, 2):
y += AmiseBW.curvature(p, mn + i * dx, default_bw) ** 2
y += AmiseBW.gauss_curvature(data, min_ + i * dx, bandwidth) ** 2
yy = 0.5 * yy + y * dx
if n > 8 and abs(y * dx - 0.5 * yy) < eps * yy:
break
Expand All @@ -145,8 +158,8 @@ def __init__(self, data):
@property
def bandwidth(self):
data = self._data
sigma = np.sqrt((data ** 2).sum() / data.shape[0] - (data.sum() / data.shape[0]) ** 2)
return sigma * ((((data.shape[1] + 2) * data.shape[0]) / 4.) ** (-1. / (data.shape[1] + 4)))
M, N = data.shape
return np.sqrt((data ** 2).sum() / M - (data.sum() / M) ** 2) * ((((N + 2) * M) / 4.) ** (-1. / (N + 4)))


class LinearBW(BandwidthBase):
Expand Down Expand Up @@ -185,8 +198,9 @@ def __init__(self, data):
@property
def bandwidth(self):
data = self._data
M, N = data.shape
sigma = np.minimum(np.std(data, axis=0, ddof=1), (np.percentile(data, 75) - np.percentile(data, 25)) / 1.349)[0]
return 1.059 * sigma * data.shape[0] ** (-1. / (data.shape[1] + 4))
return 1.059 * sigma * M ** (-1. / (N + 4))


class SilvermanBW(BandwidthBase):
Expand All @@ -206,8 +220,9 @@ def __init__(self, data):
@property
def bandwidth(self):
data = self._data
M, N = data.shape
sigma = np.minimum(np.std(data, axis=0, ddof=1), (np.percentile(data, 75) - np.percentile(data, 25)) / 1.349)[0]
return 0.9 * sigma * (data.shape[0] * (data.shape[1] + 2) / 4.) ** (-1. / (data.shape[1] + 4))
return 0.9 * sigma * (M * (N + 2) / 4.) ** (-1. / (N + 4))


def bandwidth_factory(method):
Expand Down
80 changes: 50 additions & 30 deletions conkit/misc/tests/test_bandwidth.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,88 +10,108 @@


class TestAmiseBW(unittest.TestCase):
def test_1(self):
def test_extended_range_1(self):
min_, max_ = bandwidth.AmiseBW.extended_range(1, 5, 1.5, 3)
self.assertEqual(min_, -3.5)
self.assertEqual(max_, 9.5)

def test_gauss_curvature_1(self):
data = np.array([1, 2, 3, 4, 5, 3, 2, 3, 4], dtype=np.int64)[:, np.newaxis]
curvature = bandwidth.AmiseBW.gauss_curvature(data, -1.5, 0.5)
self.assertEqual(round(curvature, 7), 3.17e-5)

def test_stiffness_integral_1(self):
data = np.array([1, 2, 3, 4, 5, 3, 2, 3, 4], dtype=np.int64)[:, np.newaxis]
integral = bandwidth.AmiseBW.stiffness_integral(data, 2.0, 1e-4)
self.assertEqual(round(integral, 7), 0.0031009)

def test_optimal_bandwidth_1(self):
data = np.array([1, 2, 3, 4, 5, 3, 2, 3, 4], dtype=np.int64)[:, np.newaxis]
optimal = bandwidth.AmiseBW.optimal_bandwidth(data, 2.0)
self.assertEqual(round(optimal, 7), 0.4116948)

def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.AmiseBW(x).bw, 7), 1.1455243)
self.assertEqual(round(bandwidth.AmiseBW(x).bandwidth, 7), 1.1455243)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.AmiseBW(x).bw, 7), 1.5310027)
self.assertEqual(round(bandwidth.AmiseBW(x).bandwidth, 7), 1.5310027)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.AmiseBW(x).bw, 7), 0.3758801)
self.assertEqual(round(bandwidth.AmiseBW(x).bandwidth, 7), 0.3758801)


class TestBowmanBW(unittest.TestCase):
def test_1(self):
def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.BowmanBW(x).bw, 7), 0.7881495)
self.assertEqual(round(bandwidth.BowmanBW(x).bandwidth, 7), 0.7881495)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.BowmanBW(x).bw, 7), 1.4223373)
self.assertEqual(round(bandwidth.BowmanBW(x).bandwidth, 7), 1.4223373)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.BowmanBW(x).bw, 7), 0.6052020)
self.assertEqual(round(bandwidth.BowmanBW(x).bandwidth, 7), 0.6052020)


class TestLinearBW(unittest.TestCase):
def test_1(self):
def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(bandwidth.LinearBW(x, threshold=8).bw, 0.625)
self.assertEqual(bandwidth.LinearBW(x, threshold=8).bandwidth, 0.625)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(bandwidth.LinearBW(x, threshold=10).bw, 1.0)
self.assertEqual(bandwidth.LinearBW(x, threshold=10).bandwidth, 1.0)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.LinearBW(x, threshold=15).bw, 7), 0.3333333)
self.assertEqual(round(bandwidth.LinearBW(x, threshold=15).bandwidth, 7), 0.3333333)


class TestScottBW(unittest.TestCase):
def test_1(self):
def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.ScottBW(x).bw, 7), 0.8357821)
self.assertEqual(round(bandwidth.ScottBW(x).bandwidth, 7), 0.8357821)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.ScottBW(x).bw, 7), 1.4513602)
self.assertEqual(round(bandwidth.ScottBW(x).bandwidth, 7), 1.4513602)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.ScottBW(x).bw, 7), 0.5179240)
self.assertEqual(round(bandwidth.ScottBW(x).bandwidth, 7), 0.5179240)


class TestSilvermanBW(unittest.TestCase):
def test_1(self):
def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.SilvermanBW(x).bw, 7), 0.7523629)
self.assertEqual(round(bandwidth.SilvermanBW(x).bandwidth, 7), 0.7523629)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.SilvermanBW(x).bw, 7), 1.3065002)
self.assertEqual(round(bandwidth.SilvermanBW(x).bandwidth, 7), 1.3065002)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.SilvermanBW(x).bw, 7), 0.4662301)
self.assertEqual(round(bandwidth.SilvermanBW(x).bandwidth, 7), 0.4662301)


if __name__ == "__main__":
Expand Down
51 changes: 25 additions & 26 deletions conkit/plot/contactdensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
import warnings

from conkit.plot.figure import Figure
from conkit.plot.tools import ColorDefinitions, _isinstance
from conkit.plot.tools import ColorDefinitions
from conkit.plot.tools import find_minima
from conkit.plot.tools import _isinstance


class ContactDensityFigure(Figure):
Expand Down Expand Up @@ -100,6 +102,7 @@ def bw_method(self):
For a full list of options, please refer to
:func:`calculate_contact_density() <conkit.core.contactmap.ContactMap.calculate_contact_density>`
"""
return self._bw_method

Expand All @@ -120,7 +123,7 @@ def hierarchy(self, hierarchy):
Raises
------
TypeError
The hierarchy is not an contact map
The hierarchy is not a :obj:`ContactMap <conkit.core.contactmap.ContactMap>`
"""
if hierarchy and _isinstance(hierarchy, "ContactMap"):
Expand All @@ -131,32 +134,28 @@ def hierarchy(self, hierarchy):
def redraw(self):
import warnings
warnings.warn("This method has been deprecated, use draw() instead")
draw()
self.draw()

def draw(self):
"""Draw the actual plot"""
dens = np.asarray(self.hierarchy.calculate_contact_density(self.bw_method))

residues = np.asarray(
list(set(sorted([c.res1_seq for c in self.hierarchy] + [c.res2_seq for c in self.hierarchy]))))
x = np.arange(residues.min(), residues.max())
self.ax.plot(
x, dens, linestyle="solid", color=ColorDefinitions.GENERAL, label="Kernel Density Estimate", zorder=2)

try:
import scipy.signal
line_kwargs = dict(linestyle="--", linewidth=1.0, alpha=0.5, color=ColorDefinitions.MISMATCH, zorder=1)
for minimum in scipy.signal.argrelmin(dens, order=1)[0]:
self.ax.axvline(x[minimum], **line_kwargs)
self.ax.axvline(0, ymin=0, ymax=0, label="Domain Boundary", **line_kwargs)
except ImportError:
warnings.warn("SciPy not installed - cannot determine local minima")

x, y = self.get_xy_data()
self.ax.plot(x, y, linestyle="solid", color=ColorDefinitions.GENERAL, label="Contact Density", zorder=2)
line_kwargs = dict(linestyle="--", linewidth=1.0, alpha=0.5, color=ColorDefinitions.MISMATCH, zorder=1)
for minimum in find_minima(y, order=1):
self.ax.axvline(x[minimum], **line_kwargs)
self.ax.axvline(0, ymin=0, ymax=0, label="Domain Boundary", **line_kwargs)
self.ax.set_xlim(x.min(), x.max())
self.ax.set_ylim(0., dens.max())
self.ax.set_ylim(0., y.max())
self.ax.set_xlabel('Residue number')
self.ax.set_ylabel('Kernel Density Estimate')

self.ax.set_ylabel('Density Estimate')
if self.legend:
self.ax.legend(
bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode="expand", borderaxespad=0., scatterpoints=1)
self.ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode="expand", borderaxespad=0.,
scatterpoints=1)
# TODO: deprecate this in 0.10
if self._file_name:
self.savefig(self._file_name, dpi=self._dpi)

def get_xy_data(self):
residues = np.asarray(self.hierarchy.as_list()).flatten()
x = np.arange(residues.min(), residues.max())
y = np.asarray(self.hierarchy.calculate_contact_density(self.bw_method))
return x, y
Loading

0 comments on commit 1a14245

Please sign in to comment.