Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

ENH: scipy.special information theory functions #3382

Merged
merged 2 commits into from

4 participants

@argriffing
Collaborator

Add three information theory functions to scipy.special. These are atoms in cvxpy, they simplify some scipy.stats code, and presumably they would be useful for sklearn, statsmodels, etc. The function names are abbreviated in accordance with scipy.special naming convention and with the corresponding cvx and cvxpy atom names, and to reduce the possibility of confusion with functions like entropy and kl_divergence which are reductions (sum-along-axis) of these functions.

The pure python implementation handles 0 correctly, and perhaps the functions could eventually be replaced with cython implementations that also handle inf and negative inputs in a way that respects limits and preserves convexity.

This PR replaces #3370.

@coveralls

Coverage Status

Changes Unknown when pulling fd012b2 on argriffing:special-information-theory into ** on scipy:master**.

@pv pv removed the PR label
@pv pv merged commit fd012b2 into from
@pv
Owner
pv commented

Merged in fd012b2

@pv pv added this to the 0.15.0 milestone
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
View
12 scipy/special/__init__.py
@@ -230,6 +230,18 @@
boxcox -- Compute the Box-Cox transformation.
boxcox1p -- Compute the Box-Cox transformation.
+
+Information Theory Functions
+----------------------------
+
+.. autosummary::
+ :toctree: generated/
+
+ entr -- entr(x) = -x*log(x)
+ rel_entr -- rel_entr(x, y) = x*log(x/y)
+ kl_div -- kl_div(x, y) = x*log(x/y) - x + y
+
+
Gamma and Related Functions
---------------------------
View
53 scipy/special/basic.py
@@ -6,11 +6,11 @@
import numpy as np
from scipy.lib.six import xrange
-from numpy import pi, asarray, floor, isscalar, iscomplex, real, imag, sqrt, \
- where, mgrid, cos, sin, exp, place, seterr, issubdtype, extract, \
- less, vectorize, inexact, nan, zeros, sometrue, atleast_1d, sinc
+from numpy import (pi, asarray, floor, isscalar, iscomplex, real, imag, sqrt,
+ where, mgrid, cos, sin, exp, place, seterr, issubdtype, extract,
+ less, vectorize, inexact, nan, zeros, sometrue, atleast_1d, sinc)
from ._ufuncs import ellipkm1, mathieu_a, mathieu_b, iv, jv, gamma, psi, zeta, \
- hankel1, hankel2, yv, kv, gammaln, ndtri, errprint, poch, binom
+ hankel1, hankel2, yv, kv, gammaln, ndtri, errprint, poch, binom, xlogy
from . import _ufuncs
import types
from . import specfun
@@ -33,6 +33,7 @@
'sinc', 'sph_harm', 'sph_in', 'sph_inkn',
'sph_jn', 'sph_jnyn', 'sph_kn', 'sph_yn', 'y0_zeros', 'y1_zeros',
'y1p_zeros', 'yn_zeros', 'ynp_zeros', 'yv', 'yvp', 'zeta',
+ 'entr', 'rel_entr', 'kl_div',
'SpecialFunctionWarning']
@@ -1404,3 +1405,47 @@ def factorialk(n,k,exact=True):
return val
else:
raise NotImplementedError
+
+
+def entr(x):
+ """
+ entr(x) = -x*log(x)
+
+ Elementwise function for computing entropy.
+
+ Notes
+ -----
+ This function is concave.
+
+ """
+ return -xlogy(x, x)
+
+
+def rel_entr(x, y):
+ """
+ rel_entr(x, y) = x*log(x/y)
+
+ Elementwise function for computing relative entropy.
+
+ Notes
+ -----
+ This function is jointly convex in (x, y).
+
+ """
+ return xlogy(x, x) - xlogy(x, y)
+
+
+def kl_div(x, y):
+ """
+ kl_div(x, y) = x*log(x/y) - x + y
+
+ Elementwise function for computing Kullback-Leibler divergence.
+
+ Notes
+ -----
+ This function is non-negative and is jointly convex in (x, y).
+ It is zero exactly when x==y.
+
+ """
+ return xlogy(x, x) - xlogy(x, y) - x + y
+
View
52 scipy/special/tests/test_basic.py
@@ -22,6 +22,7 @@
from __future__ import division, print_function, absolute_import
+import itertools
import warnings
import numpy as np
@@ -2891,5 +2892,56 @@ def xfunc(x, y):
assert_func_equal(special.xlog1py, w1, z1, rtol=1e-13, atol=1e-13)
+def test_entr():
+ def xfunc(x):
+ if x < 0:
+ return -np.inf
+ else:
+ return -special.xlogy(x, x)
+ values = (0, 0.5, 1.0, np.inf)
+ signs = [1]
+ arr = []
+ for sgn, v in itertools.product(signs, values):
+ arr.append(sgn * v)
+ z = np.array(arr, dtype=float)
+ w = np.vectorize(xfunc, otypes=[np.float64])(z)
+ assert_func_equal(special.entr, w, z, rtol=1e-13, atol=1e-13)
+
+
+def test_kl_div():
+ def xfunc(x, y):
+ if x < 0 or y < 0 or (y == 0 and x != 0):
+ # extension of natural domain to preserve convexity
+ return np.inf
+ elif np.isposinf(x) or np.isposinf(y):
+ # limits within the natural domain
+ return np.inf
+ elif x == 0:
+ return y
+ else:
+ return special.xlogy(x, x) - special.xlogy(x, y) - x + y
+ values = (0, 0.5, 1.0)
+ signs = [1]
+ arr = []
+ for sgna, va, sgnb, vb in itertools.product(signs, values, signs, values):
+ arr.append((sgna*va, sgnb*vb))
+ z = np.array(arr, dtype=float)
+ w = np.vectorize(xfunc, otypes=[np.float64])(z[:,0], z[:,1])
+ assert_func_equal(special.kl_div, w, z, rtol=1e-13, atol=1e-13)
+
+
+def test_rel_entr():
+ def xfunc(x, y):
+ return special.xlogy(x, x) - special.xlogy(x, y)
+ values = (0, 0.5, 1.0)
+ signs = [1]
+ arr = []
+ for sgna, va, sgnb, vb in itertools.product(signs, values, signs, values):
+ arr.append((sgna*va, sgnb*vb))
+ z = np.array(arr, dtype=float)
+ w = np.vectorize(xfunc, otypes=[np.float64])(z[:,0], z[:,1])
+ assert_func_equal(special.rel_entr, w, z, rtol=1e-13, atol=1e-13)
+
+
if __name__ == "__main__":
run_module_suite()
View
11 scipy/stats/_discrete_distns.py
@@ -5,7 +5,7 @@
from __future__ import division, print_function, absolute_import
from scipy import special
-from scipy.special import gammaln as gamln
+from scipy.special import entr, gammaln as gamln
from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh
@@ -81,8 +81,7 @@ def _stats(self, n, p):
def _entropy(self, n, p):
k = np.r_[0:n + 1]
vals = self._pmf(k, n, p)
- h = -np.sum(special.xlogy(vals, vals), axis=0)
- return h
+ return np.sum(entr(vals), axis=0)
binom = binom_gen(name='binom')
@@ -130,8 +129,7 @@ def _stats(self, p):
return binom._stats(1, p)
def _entropy(self, p):
- h = -special.xlogy(p, p) - special.xlogy(1 - p, 1 - p)
- return h
+ return entr(p) + entr(1-p)
bernoulli = bernoulli_gen(b=1, name='bernoulli')
@@ -341,8 +339,7 @@ def _stats(self, M, n, N):
def _entropy(self, M, n, N):
k = np.r_[N - (M - n):min(n, N) + 1]
vals = self.pmf(k, M, n, N)
- h = -np.sum(special.xlogy(vals, vals), axis=0)
- return h
+ return np.sum(entr(vals), axis=0)
def _sf(self, k, M, n, N):
"""More precise calculation, 1 - cdf doesn't cut it."""
View
35 scipy/stats/_distn_infrastructure.py
@@ -15,7 +15,8 @@
from scipy.misc import doccer
from ._distr_params import distcont, distdiscrete
-from scipy.special import comb, xlogy, chndtr, gammaln, hyp0f1
+from scipy.special import (comb, xlogy, chndtr, gammaln, hyp0f1,
+ entr, rel_entr, kl_div)
# for root finding for discrete distribution ppf, and max likelihood estimation
from scipy import optimize
@@ -2087,15 +2088,15 @@ def est_loc_scale(self, data, *args):
def _entropy(self, *args):
def integ(x):
val = self._pdf(x, *args)
- return xlogy(val, val)
+ return entr(val)
# upper limit is often inf, so suppress warnings when integrating
olderr = np.seterr(over='ignore')
- entr = -integrate.quad(integ, self.a, self.b)[0]
+ h = integrate.quad(integ, self.a, self.b)[0]
np.seterr(**olderr)
- if not np.isnan(entr):
- return entr
+ if not np.isnan(h):
+ return h
else:
# try with different limits if integration problems
low, upp = self.ppf([1e-10, 1. - 1e-10], *args)
@@ -2107,7 +2108,7 @@ def integ(x):
lower = low
else:
lower = self.a
- return -integrate.quad(integ, lower, upper)[0]
+ return integrate.quad(integ, lower, upper)[0]
def entropy(self, *args, **kwds):
"""
@@ -2341,8 +2342,7 @@ def entropy(pk, qk=None, base=None):
If only probabilities `pk` are given, the entropy is calculated as
``S = -sum(pk * log(pk), axis=0)``.
- If `qk` is not None, then compute a relative entropy (also known as
- Kullback-Leibler divergence or Kullback-Leibler distance)
+ If `qk` is not None, then compute the Kullback-Leibler divergence
``S = sum(pk * log(pk / qk), axis=0)``.
This routine will normalize `pk` and `qk` if they don't sum to 1.
@@ -2367,21 +2367,14 @@ def entropy(pk, qk=None, base=None):
pk = asarray(pk)
pk = 1.0*pk / sum(pk, axis=0)
if qk is None:
- vec = xlogy(pk, pk)
+ vec = entr(pk)
else:
qk = asarray(qk)
if len(qk) != len(pk):
raise ValueError("qk and pk must have same length.")
qk = 1.0*qk / sum(qk, axis=0)
- # If qk is zero anywhere, then unless pk is zero at those places
- # too, the relative entropy is infinite.
- mask = qk == 0.0
- qk[mask] = 1.0 # Avoid the divide-by-zero warning
- quotient = pk / qk
- vec = -xlogy(pk, quotient)
- vec[mask & (pk != 0.0)] = -inf
- vec[mask & (pk == 0.0)] = 0.0
- S = -sum(vec, axis=0)
+ vec = kl_div(pk, qk)
+ S = sum(vec, axis=0)
if base is not None:
S /= log(base)
return S
@@ -3042,14 +3035,14 @@ def _entropy(self, *args):
else:
mu = int(self.stats(*args, **{'moments': 'm'}))
val = self.pmf(mu, *args)
- ent = -xlogy(val, val)
+ ent = entr(val)
k = 1
term = 1.0
while (abs(term) > _EPS):
val = self.pmf(mu+k, *args)
- term = -xlogy(val, val)
+ term = entr(val)
val = self.pmf(mu-k, *args)
- term -= xlogy(val, val)
+ term += entr(val)
k += 1
ent += term
return ent
Something went wrong with that request. Please try again.