Skip to content

Commit

Permalink
Merge pull request #36 from fsimkovic/master
Browse files Browse the repository at this point in the history
Minor changes, fixes and added test cases
  • Loading branch information
Felix Simkovic committed Apr 5, 2018
2 parents 5c6d00b + d070f90 commit 843a455
Show file tree
Hide file tree
Showing 15 changed files with 399 additions and 183 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ Changed
- Typos corrected in documentation
- ``THREE_TO_ONE`` and ``ONE_TO_THREE`` dictionaries modified to ``Enum`` objects
- ``SequeneFile.neff`` renamed to ``SequenceFile.meff``
- ``ContactMapChordFigure.get_radius_around_circle`` moved to ``conkit.plot.tools.radius_around_circle``
- ``AmiseBW.curvature`` renamed to ``AmiseBW.gauss_curvature``
Fixed
~~~~~
- ``A3mParser`` keyword argument mismatch sorted
Expand Down
2 changes: 1 addition & 1 deletion conkit/core/tests/test_contactmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def test_calculate_contact_density_3(self):
for c in [Contact(3, 5, 0.4), Contact(2, 4, 0.1), Contact(3, 4, 0.4)]:
contact_map1.add(c)
density = contact_map1.calculate_contact_density()
self.assertEqual([0.2282135747320191, 0.6337372073257503, 0.2282135747320191], density)
self.assertEqual([0.22821357473201903, 0.6337372073257503, 0.22821357473201903], density)

def test_find_1(self):
contact_map1 = ContactMap('1')
Expand Down
81 changes: 48 additions & 33 deletions conkit/misc/bandwidth.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,17 @@
from __future__ import print_function

__author__ = "Felix Simkovic"
__date__ = "02 Aug 2017"
__version__ = "1.0"
__date__ = "31 Mar 2018"
__version__ = "1.0.1"

import abc
import numpy as np

ABC = abc.ABCMeta('ABC', (object,), {})

SQRT_PI = np.sqrt(np.pi)
SQRT_2PI = np.sqrt(2. * np.pi)


class BandwidthBase(ABC):
"""Abstract class for bandwidth calculations"""
Expand All @@ -50,7 +53,6 @@ class BandwidthBase(ABC):
def bandwidth(self):
return 0.0

# REM: abstractproperty requires us to re-declare it in every child class
@property
def bw(self):
return self.bandwidth
Expand All @@ -75,52 +77,63 @@ def __init__(self, data, niterations=25, eps=1e-3):
def bandwidth(self):
data = self._data
x0 = BowmanBW(data).bandwidth
y0 = AmiseBW.optimal_bandwidth_equation(data, x0)
y0 = AmiseBW.optimal_bandwidth(data, x0)
x = 0.8 * x0
y = AmiseBW.optimal_bandwidth_equation(data, x)
y = AmiseBW.optimal_bandwidth(data, x)
for _ in np.arange(self._niterations):
x -= y * (x0 - x) / (y0 - y)
y = AmiseBW.optimal_bandwidth_equation(data, x)
y = AmiseBW.optimal_bandwidth(data, x)
if abs(y) < (self._eps * y0):
break
return x

@staticmethod
def curvature(p, x, w):
z = (x - p) / w
y = (1 * (z ** 2 - 1.0) * np.exp(-0.5 * z * z) / (w * np.sqrt(2. * np.pi)) / w ** 2).sum()
return y / p.shape[0]
def curvature(data, x, w):
"""
See Also
--------
gauss_curvature
"""
import warnings
warnings.warn("This function will be removed in a future release! Use gauss_curvature() instead")
return AmiseBW.gauss_curvature(data, x, w)

@staticmethod
def gauss_curvature(data, x, w):
M, N = data.shape
z = ((x - data) / w)**2
return (N * (z - 1.0) * (np.exp(-0.5 * z) / (w * SQRT_2PI)) / w**2).sum() / M

@staticmethod
def extended_range(mn, mx, bw, ext=3):
return mn - ext * bw, mx + ext * bw
def extended_range(min_, max_, bandwidth, ext=3):
d = bandwidth * ext
return min_ - d, max_ + d

@staticmethod
def optimal_bandwidth_equation(p, default_bw):
alpha = 1. / (2. * np.sqrt(np.pi))
def optimal_bandwidth(data, bandwidth):
M, N = data.shape
alpha = 1. / (2. * SQRT_PI)
sigma = 1.0
n = p.shape[0]
q = AmiseBW.stiffness_integral(p, default_bw)
return default_bw - ((n * q * sigma ** 4) / alpha) ** (-1.0 / (p.shape[1] + 4))
integral = AmiseBW.stiffness_integral(data, bandwidth)
return bandwidth - ((M * integral * sigma ** 4) / alpha) ** (-1.0 / (N + 4))

@staticmethod
def stiffness_integral(p, default_bw, eps=1e-4):
mn, mx = AmiseBW.extended_range(p.min(), p.max(), default_bw, ext=3)
n = 1
dx = (mx - mn) / n
yy = 0.5 * dx * (AmiseBW.curvature(p, mn, default_bw) ** 2 +
AmiseBW.curvature(p, mx, default_bw) ** 2)
# The trapezoidal rule guarantees a relative error of better than eps
# for some number of steps less than maxn.
maxn = (mx - mn) / np.sqrt(eps)
# Cap the total computation spent
maxn = 2048 if maxn > 2048 else maxn
def stiffness_integral(data, bandwidth, eps=1e-4):
min_, max_ = AmiseBW.extended_range(data.min(), data.max(), bandwidth, ext=3)
dx = 1.0 * (max_ - min_)
maxn = dx / np.sqrt(eps)
if maxn > 2048:
maxn = 2048
yy = 0.5 * dx * (
AmiseBW.gauss_curvature(data, min_, bandwidth)**2
+ AmiseBW.gauss_curvature(data, max_, bandwidth)**2
)
n = 2
while n <= maxn:
dx /= 2.
y = 0
for i in np.arange(1, n, 2):
y += AmiseBW.curvature(p, mn + i * dx, default_bw) ** 2
y += AmiseBW.gauss_curvature(data, min_ + i * dx, bandwidth) ** 2
yy = 0.5 * yy + y * dx
if n > 8 and abs(y * dx - 0.5 * yy) < eps * yy:
break
Expand All @@ -145,8 +158,8 @@ def __init__(self, data):
@property
def bandwidth(self):
data = self._data
sigma = np.sqrt((data ** 2).sum() / data.shape[0] - (data.sum() / data.shape[0]) ** 2)
return sigma * ((((data.shape[1] + 2) * data.shape[0]) / 4.) ** (-1. / (data.shape[1] + 4)))
M, N = data.shape
return np.sqrt((data ** 2).sum() / M - (data.sum() / M) ** 2) * ((((N + 2) * M) / 4.) ** (-1. / (N + 4)))


class LinearBW(BandwidthBase):
Expand Down Expand Up @@ -185,8 +198,9 @@ def __init__(self, data):
@property
def bandwidth(self):
data = self._data
M, N = data.shape
sigma = np.minimum(np.std(data, axis=0, ddof=1), (np.percentile(data, 75) - np.percentile(data, 25)) / 1.349)[0]
return 1.059 * sigma * data.shape[0] ** (-1. / (data.shape[1] + 4))
return 1.059 * sigma * M ** (-1. / (N + 4))


class SilvermanBW(BandwidthBase):
Expand All @@ -206,8 +220,9 @@ def __init__(self, data):
@property
def bandwidth(self):
data = self._data
M, N = data.shape
sigma = np.minimum(np.std(data, axis=0, ddof=1), (np.percentile(data, 75) - np.percentile(data, 25)) / 1.349)[0]
return 0.9 * sigma * (data.shape[0] * (data.shape[1] + 2) / 4.) ** (-1. / (data.shape[1] + 4))
return 0.9 * sigma * (M * (N + 2) / 4.) ** (-1. / (N + 4))


def bandwidth_factory(method):
Expand Down
124 changes: 93 additions & 31 deletions conkit/misc/tests/test_bandwidth.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,93 +5,155 @@

import numpy as np
import unittest

from conkit.misc import bandwidth


class TestAmiseBW(unittest.TestCase):
def test_1(self):
def test_extended_range_1(self):
min_, max_ = bandwidth.AmiseBW.extended_range(1, 5, 1.5, 3)
self.assertEqual(min_, -3.5)
self.assertEqual(max_, 9.5)

def test_curvature_1(self):
data = np.array([1, 2, 3, 4, 5, 3, 2, 3, 4], dtype=np.int64)[:, np.newaxis]
curvature = bandwidth.AmiseBW.curvature(data, -1.5, 0.5)
self.assertEqual(round(curvature, 7), 3.17e-5)

def test_gauss_curvature_1(self):
data = np.array([1, 2, 3, 4, 5, 3, 2, 3, 4], dtype=np.int64)[:, np.newaxis]
curvature = bandwidth.AmiseBW.gauss_curvature(data, -1.5, 0.5)
self.assertEqual(round(curvature, 7), 3.17e-5)

def test_stiffness_integral_1(self):
data = np.array([1, 2, 3, 4, 5, 3, 2, 3, 4], dtype=np.int64)[:, np.newaxis]
integral = bandwidth.AmiseBW.stiffness_integral(data, 2.0, 1e-4)
self.assertEqual(round(integral, 7), 0.0031009)

def test_optimal_bandwidth_1(self):
data = np.array([1, 2, 3, 4, 5, 3, 2, 3, 4], dtype=np.int64)[:, np.newaxis]
optimal = bandwidth.AmiseBW.optimal_bandwidth(data, 2.0)
self.assertEqual(round(optimal, 7), 0.4116948)

def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.AmiseBW(x).bw, 7), 1.1455243)
self.assertEqual(round(bandwidth.AmiseBW(x).bandwidth, 7), 1.1455243)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.AmiseBW(x).bw, 7), 1.5310027)
self.assertEqual(round(bandwidth.AmiseBW(x).bandwidth, 7), 1.5310027)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.AmiseBW(x).bw, 7), 0.3758801)
self.assertEqual(round(bandwidth.AmiseBW(x).bandwidth, 7), 0.3758801)


class TestBowmanBW(unittest.TestCase):
def test_1(self):
def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.BowmanBW(x).bw, 7), 0.7881495)
self.assertEqual(round(bandwidth.BowmanBW(x).bandwidth, 7), 0.7881495)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.BowmanBW(x).bw, 7), 1.4223373)
self.assertEqual(round(bandwidth.BowmanBW(x).bandwidth, 7), 1.4223373)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.BowmanBW(x).bw, 7), 0.6052020)
self.assertEqual(round(bandwidth.BowmanBW(x).bandwidth, 7), 0.6052020)


class TestLinearBW(unittest.TestCase):
def test_1(self):
def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(bandwidth.LinearBW(x, threshold=8).bw, 0.625)
self.assertEqual(bandwidth.LinearBW(x, threshold=8).bandwidth, 0.625)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(bandwidth.LinearBW(x, threshold=10).bw, 1.0)
self.assertEqual(bandwidth.LinearBW(x, threshold=10).bandwidth, 1.0)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.LinearBW(x, threshold=15).bw, 7), 0.3333333)
self.assertEqual(round(bandwidth.LinearBW(x, threshold=15).bandwidth, 7), 0.3333333)


class TestScottBW(unittest.TestCase):
def test_1(self):
def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.ScottBW(x).bw, 7), 0.8357821)
self.assertEqual(round(bandwidth.ScottBW(x).bandwidth, 7), 0.8357821)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.ScottBW(x).bw, 7), 1.4513602)
self.assertEqual(round(bandwidth.ScottBW(x).bandwidth, 7), 1.4513602)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.ScottBW(x).bw, 7), 0.5179240)
self.assertEqual(round(bandwidth.ScottBW(x).bandwidth, 7), 0.5179240)


class TestSilvermanBW(unittest.TestCase):
def test_1(self):
def test_bandwidth_1(self):
xy = np.array([(1, 5), (3, 3), (2, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.SilvermanBW(x).bw, 7), 0.7523629)
self.assertEqual(round(bandwidth.SilvermanBW(x).bandwidth, 7), 0.7523629)

def test_2(self):
def test_bandwidth_2(self):
xy = np.array([(1, 5), (3, 3), (2, 4), (1, 10), (4, 9)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.SilvermanBW(x).bw, 7), 1.3065002)
self.assertEqual(round(bandwidth.SilvermanBW(x).bandwidth, 7), 1.3065002)

def test_3(self):
def test_bandwidth_3(self):
xy = np.array([(3, 5), (2, 4), (3, 4)])
x = np.asarray([i for (x, y) in xy for i in np.arange(x, y + 1)])[:, np.newaxis]
self.assertEqual(round(bandwidth.SilvermanBW(x).bw, 7), 0.4662301)
self.assertEqual(round(bandwidth.SilvermanBW(x).bandwidth, 7), 0.4662301)


class Test(unittest.TestCase):
def test_bandwidth_factory_1(self):
obj = bandwidth.bandwidth_factory("amise")
self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.AmiseBW'>")

def test_bandwidth_factory_2(self):
obj = bandwidth.bandwidth_factory("bowman")
self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.BowmanBW'>")

def test_bandwidth_factory_3(self):
obj = bandwidth.bandwidth_factory("linear")
self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.LinearBW'>")

def test_bandwidth_factory_4(self):
obj = bandwidth.bandwidth_factory("scott")
self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.ScottBW'>")

def test_bandwidth_factory_5(self):
obj = bandwidth.bandwidth_factory("silverman")
self.assertEqual(str(obj), "<class 'conkit.misc.bandwidth.SilvermanBW'>")

def test_bandwidth_factory_6(self):
with self.assertRaises(ValueError):
bandwidth.bandwidth_factory("SILVERMAN")

def test_bandwidth_factory_7(self):
with self.assertRaises(ValueError):
bandwidth.bandwidth_factory("Silverman")

def test_bandwidth_factory_8(self):
with self.assertRaises(ValueError):
bandwidth.bandwidth_factory("silvermn")

def test_bandwidth_factory_9(self):
with self.assertRaises(ValueError):
bandwidth.bandwidth_factory("garbage")


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 843a455

Please sign in to comment.