Finish fluctmatch-3.3

tclick · May 15, 2018 · e27ccd7 · e27ccd7
2 parents ea0d593 + 56739c8
commit e27ccd7
Show file tree

Hide file tree

Showing 70 changed files with 2,128 additions and 1,905 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 3.2.12
+current_version = 3.3.0
 commit = True
 tag = True
 

diff --git a/README.rst b/README.rst
@@ -44,9 +44,9 @@ Overview
     :alt: PyPI Package latest release
     :target: https://pypi.python.org/pypi/fluctmatch
 
-.. |commits-since| image:: https://img.shields.io/github/commits-since/tclick/python-fluctmatch/v3.2.12.svg
+.. |commits-since| image:: https://img.shields.io/github/commits-since/tclick/python-fluctmatch/v3.3.0.svg
     :alt: Commits since latest release
-    :target: https://github.com/tclick/python-fluctmatch/compare/v3.2.12...master
+    :target: https://github.com/tclick/python-fluctmatch/compare/v3.3.0...master
 
 .. |wheel| image:: https://img.shields.io/pypi/wheel/fluctmatch.svg
     :alt: PyPI Wheel

diff --git a/docs/conf.py b/docs/conf.py
@@ -26,7 +26,7 @@
 year = u'2014-2017'
 author = u'Timothy Click'
 copyright = '{0}, {1}'.format(year, author)
-version = release = u'3.2.12'
+version = release = u'3.3.0'
 
 pygments_style = 'trac'
 templates_path = ['.']

diff --git a/setup.py b/setup.py
@@ -45,7 +45,7 @@ def read(*names, **kwargs):
 
 setup(
     name="fluctmatch",
-    version="3.2.12",
+    version="3.3.0",
     license="BSD license",
     description="Elastic network model using fluctuation matching.",
     long_description="%s\n%s" % (

diff --git a/src/fluctmatch/__init__.py b/src/fluctmatch/__init__.py
@@ -20,10 +20,14 @@
     print_function,
     unicode_literals,
 )
-
 from future.builtins import dict
 
-__version__ = "3.2.12"
+import logging
+
+__version__ = "3.3.0"
 
 _MODELS = dict()
 _DESCRIBE = dict()
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
diff --git a/src/fluctmatch/__main__.py b/src/fluctmatch/__main__.py
@@ -30,6 +30,7 @@
     print_function,
     unicode_literals,
 )
+
 from fluctmatch.cli import main
 
 if __name__ == "__main__":

diff --git a/src/fluctmatch/analysis/__init__.py b/src/fluctmatch/analysis/__init__.py
@@ -21,5 +21,10 @@
     unicode_literals,
 )
 
+import logging
+
 from fluctmatch.intcor import IC
 from fluctmatch.parameter import PRM
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
diff --git a/src/fluctmatch/analysis/entropy.py b/src/fluctmatch/analysis/entropy.py
@@ -31,6 +31,7 @@
 class Entropy(object):
     """Calculate various entropic contributions from the coupling strengths.
     """
+
     def __init__(self, filename, ressep=3):
         """
         Parameters
@@ -125,8 +126,7 @@ def windiff_entropy(self, bins=100):
 
         table = self._table._separate(self._table.table)
         hist, edges = np.histogram(
-            table, range=(1e-4, table.values.max()), bins=bins
-        )
+            table, range=(1e-4, table.values.max()), bins=bins)
         hist = (hist / table.size).astype(dtype=np.float)
         xaxis = (edges[:-1] + edges[1:]) / 2
         try:
@@ -150,8 +150,7 @@ def windiff_entropy(self, bins=100):
         S_P.fillna(0., inplace=True)
         S_Q.fillna(0., inplace=True)
         entropy = -(
-            S_P.groupby(level=header).sum() + S_Q.groupby(level=header).sum()
-        )
+            S_P.groupby(level=header).sum() + S_Q.groupby(level=header).sum())
         entropy[entropy == -0.0] = entropy[entropy == -0.0].abs()
 
         return entropy
diff --git a/src/fluctmatch/analysis/fluctsca.py b/src/fluctmatch/analysis/fluctsca.py
@@ -20,6 +20,7 @@
     print_function,
     unicode_literals,
 )
+from future.builtins import range
 
 import functools
 import multiprocessing as mp
@@ -28,9 +29,9 @@
 
 import numpy as np
 import pandas as pd
-from future.builtins import range
 from scipy import linalg
 from scipy.stats import (scoreatpercentile, t)
+from sklearn.utils import extmath
 
 
 def _rand(avg_kb, std_kb, x):
@@ -68,6 +69,31 @@ def randomize(table, ntrials=100):
     return np.array(Lrand)
 
 
+def svd(kb):
+    """Calculate the singular value decomposition with an appropriate sign flip.
+
+    Parameters
+    ----------
+    a : (M, N) array_like
+        Matrix to decompose.
+
+    Returns
+    -------
+    U : ndarray
+        Unitary matrix having left singular vectors as columns.
+        Of shape ``(M, M)`` or ``(M, K)``, depending on `full_matrices`.
+    s : ndarray
+        The singular values, sorted in non-increasing order.
+        Of shape (K,), with ``K = min(M, N)``.
+    Vh : ndarray
+        Unitary matrix having right singular vectors as rows.
+        Of shape ``(N, N)`` or ``(K, N)`` depending on `full_matrices`.
+    """
+    U, W, Vt = linalg.svd(kb, full_matrices=False)
+    U, Vt = extmath.svd_flip(U, Vt, u_based_decision=True)
+    return U, W, Vt
+
+
 def correlate(Usca, Lsca, kmax=6):
     """Calculate the correlation matrix of *Usca* with *Lsca* eigenvalues.
 
@@ -85,10 +111,7 @@ def correlate(Usca, Lsca, kmax=6):
     Correlation matrix
     """
     S = np.power(Lsca, 2)
-    Ucorr = [
-        np.outer(Usca[:, _].dot(S[_]), Usca.T[_])
-        for _ in range(kmax)
-    ]
+    Ucorr = [np.outer(Usca[:, _].dot(S[_]), Usca.T[_]) for _ in range(kmax)]
     return Ucorr
 
 
@@ -112,7 +135,15 @@ def chooseKpos(Lsca, Lrand, stddev=2):
     return Lsca[Lsca > value].shape[0]
 
 
-def figUnits(v1, v2, v3, units, filename, fig_path=os.getcwd(),marker='o', dotsize=9, notinunits=1):
+def figUnits(v1,
+             v2,
+             v3,
+             units,
+             filename,
+             fig_path=os.getcwd(),
+             marker='o',
+             dotsize=9,
+             notinunits=1):
     ''' 3d scatter plot specified by 'units', which must be a list of elements
     in the class Unit_. See figColors_ for the color code. Admissible color codes are in [0 1]
     (light/dark gray can also be obtained by using -1/+1).
@@ -147,11 +178,23 @@ def figUnits(v1, v2, v3, units, filename, fig_path=os.getcwd(),marker='o', dotsi
     ax.axes
 
     if notinunits == 1:
-        ax.plot(v1, v2, v3, marker, markersize=dotsize, markerfacecolor='w',
-                markeredgecolor='k')
+        ax.plot(
+            v1,
+            v2,
+            v3,
+            marker,
+            markersize=dotsize,
+            markerfacecolor='w',
+            markeredgecolor='k')
     elif len(notinunits) == 3:
-        ax.plot(notinunits[0], notinunits[1], notinunits[2], marker, markersize=dotsize,
-                markerfacecolor='w', markeredgecolor='k')
+        ax.plot(
+            notinunits[0],
+            notinunits[1],
+            notinunits[2],
+            marker,
+            markersize=dotsize,
+            markerfacecolor='w',
+            markeredgecolor='k')
 
     # Plot items in the units with colors:
     for u in units:
@@ -162,8 +205,14 @@ def figUnits(v1, v2, v3, units, filename, fig_path=os.getcwd(),marker='o', dotsi
             bgr = [.3, .3, .3]
         if u.col < 0:
             bgr = [.7, .7, .7]
-        ax.plot(v1[np.ix_(items_list)], v2[np.ix_(items_list)], v3[np.ix_(items_list)],
-                marker, markersize=dotsize, markerfacecolor=bgr, markeredgecolor='k')
+        ax.plot(
+            v1[np.ix_(items_list)],
+            v2[np.ix_(items_list)],
+            v3[np.ix_(items_list)],
+            marker,
+            markersize=dotsize,
+            markerfacecolor=bgr,
+            markeredgecolor='k')
 
     ax.set_xlabel('IC{:d}'.format(1))
     ax.set_ylabel('IC{:d}'.format(2))
@@ -183,6 +232,7 @@ class Unit(object):
             -  `vect`  = an additional vector describing the member items (ex: a list of sequence weights)
 
     """
+
     def __init__(self):
         self.name = ""
         self.items = set()
@@ -204,34 +254,37 @@ def icList(Vpica, kpos, Csca, p_cut=0.95):
     scaled_pdf = []
     all_fits = []
     for k in range(kpos):
-        pd = t.fit(Vpica[:,k])
+        pd = t.fit(Vpica[:, k])
         all_fits.append(pd)
-        iqr = scoreatpercentile(Vpica[:,k],75) - scoreatpercentile(Vpica[:,k],25)
-        binwidth=2*iqr*(len(Vpica[:,k])**(-0.33))
-        nbins=round((max(Vpica[:,k])-min(Vpica[:,k]))/binwidth)
-        h_params = np.histogram(Vpica[:,k], nbins.astype(np.int))
+        iqr = scoreatpercentile(Vpica[:, k], 75) - scoreatpercentile(
+            Vpica[:, k], 25)
+        binwidth = 2 * iqr * (len(Vpica[:, k])**(-0.33))
+        nbins = round((max(Vpica[:, k]) - min(Vpica[:, k])) / binwidth)
+        h_params = np.histogram(Vpica[:, k], nbins.astype(np.int))
         x_dist = np.linspace(min(h_params[1]), max(h_params[1]), num=100)
-        area_hist=Npos*(h_params[1][2]-h_params[1][1]);
-        scaled_pdf.append(area_hist*(t.pdf(x_dist,pd[0],pd[1],pd[2])))
-        cd = t.cdf(x_dist,pd[0],pd[1],pd[2])
+        area_hist = Npos * (h_params[1][2] - h_params[1][1])
+        scaled_pdf.append(area_hist * (t.pdf(x_dist, pd[0], pd[1], pd[2])))
+        cd = t.cdf(x_dist, pd[0], pd[1], pd[2])
         tmp = scaled_pdf[k].argmax()
-        if abs(max(Vpica[:,k])) > abs(min(Vpica[:,k])):
+        if abs(max(Vpica[:, k])) > abs(min(Vpica[:, k])):
             tail = cd[tmp:len(cd)]
         else:
             cd = 1 - cd
             tail = cd[0:tmp]
-        diff = abs(tail - p_cut);
+        diff = abs(tail - p_cut)
         x_pos = diff.argmin()
-        cutoff.append(x_dist[x_pos+tmp])
+        cutoff.append(x_dist[x_pos + tmp])
     #select the positions with significant contributions to each IC
     ic_init = []
-    for k in range(kpos): ic_init.append([i for i in range(Npos) if Vpica[i,k]> cutoff[k]])
+    for k in range(kpos):
+        ic_init.append([i for i in range(Npos) if Vpica[i, k] > cutoff[k]])
     #construct the sorted, non-redundant iclist
     sortedpos = []
     icsize = []
     ics = []
     Csca_nodiag = Csca.copy()
-    for i in range(Npos): Csca_nodiag[i,i]=0
+    for i in range(Npos):
+        Csca_nodiag[i, i] = 0
     for k in range(kpos):
         icpos_tmp = list(ic_init[k])
         for kprime in [kp for kp in range(kpos) if (kp != k)]:
@@ -240,12 +293,12 @@ def icList(Vpica, kpos, Csca, p_cut=0.95):
                 remsec = np.linalg.norm(Csca_nodiag[i,ic_init[k]]) \
                          < np.linalg.norm(Csca_nodiag[i,ic_init[kprime]])
                 if remsec: icpos_tmp.remove(i)
-        sortedpos += sorted(icpos_tmp, key=lambda i: -Vpica[i,k])
+        sortedpos += sorted(icpos_tmp, key=lambda i: -Vpica[i, k])
         icsize.append(len(icpos_tmp))
         s = Unit()
-        s.items = sorted(icpos_tmp, key=lambda i: -Vpica[i,k])
-        s.col = k/kpos
-        s.vect = -Vpica[s.items,k]
+        s.items = sorted(icpos_tmp, key=lambda i: -Vpica[i, k])
+        s.col = k / kpos
+        s.vect = -Vpica[s.items, k]
         ics.append(s)
     return ics, icsize, sortedpos, cutoff, scaled_pdf, all_fits
 
@@ -274,8 +327,9 @@ def basicICA(x, r, Niter):
     for _ in range(Niter):
         w_old = np.copy(w)
         u = w.dot(x)
-        w += r*(M*np.eye(L)+(1-2*(1./(1+np.exp(-u)))).dot(u.T)).dot(w)
-        delta = (w-w_old).ravel()
+        w += r * (M * np.eye(L) + (1 - 2 * (1. /
+                                            (1 + np.exp(-u)))).dot(u.T)).dot(w)
+        delta = (w - w_old).ravel()
         change.append(delta.dot(delta.T))
     return [w, change]
 
@@ -287,10 +341,11 @@ def rotICA(V, kmax=6, learnrate=.0001, iterations=10000):
     :Example:
        >>> Vica, W = rotICA(V, kmax=6, learnrate=.0001, iterations=10000)
     """
-    V1 = V[:,:kmax].T
+    V1 = V[:, :kmax].T
     [W, changes_s] = basicICA(V1, learnrate, iterations)
     Vica = (W.dot(V1)).T
     for n in range(kmax):
-        imax = abs(Vica[:,n]).argmax()
-        Vica[:,n] = np.sign(Vica[imax,n])*Vica[:,n]/np.linalg.norm(Vica[:,n])
+        imax = abs(Vica[:, n]).argmax()
+        Vica[:, n] = np.sign(Vica[imax, n]) * Vica[:, n] / np.linalg.norm(
+            Vica[:, n])
     return Vica, W
diff --git a/src/fluctmatch/analysis/paramstats.py b/src/fluctmatch/analysis/paramstats.py
@@ -28,6 +28,7 @@
 class ParamStats(object):
     """Calculate parameter statistics from a parameter table.
     """
+
     def __init__(self, table):
         """
 
@@ -55,10 +56,7 @@ def table_hist(self):
         A `pandas.Series` histogram
         """
         hist, bin_edges = np.histogram(
-            self._table.table,
-            bins=100,
-            density=True
-        )
+            self._table.table, bins=100, density=True)
         edges = (bin_edges[1:] + bin_edges[:-1]) / 2
         return pd.Series(hist, index=edges)
 
@@ -80,10 +78,7 @@ def interaction_hist(self):
         A `pandas.Series` histogram
         """
         hist, bin_edges = np.histogram(
-            self._table.interactions,
-            bins="auto",
-            density=True
-        )
+            self._table.interactions, bins="auto", density=True)
         edges = (bin_edges[1:] + bin_edges[:-1]) / 2
         return pd.Series(hist, index=edges)
 
@@ -105,9 +100,6 @@ def residue_hist(self):
         A `pandas.Series` histogram
         """
         hist, bin_edges = np.histogram(
-            self._table.per_residue,
-            bins="auto",
-            density=True
-        )
+            self._table.per_residue, bins="auto", density=True)
         edges = (bin_edges[1:] + bin_edges[:-1]) / 2
         return pd.Series(hist, index=edges)