Fix unit tests, handle overflow behavior in expongauss.

* Overflow in the exp occured for large negative x values, causing nan return. Fixed, although there is still a warning that overflow has occurred. Is that worth turning off inside function? * Add logpdf, sf functions * Fix spacing in docstring. * Added to _distr_params, note added in test_continuous_basic.py that things have to be added there. * Added to dists in test_distributions.py, also some specific unit tests added. * Left the explicit _argcheck in there, since it seems helpful to be explicit rather than relying on default.
scipy · Feb 20, 2015 · 09f0831 · 09f0831
1 parent 20fe8e5
commit 09f0831
Show file tree

Hide file tree

Showing 7 changed files with 95 additions and 44 deletions.
diff --git a/THANKS.txt b/THANKS.txt
@@ -139,7 +139,7 @@ Brandon Liu for stats.combine_pvalues.
 Clark Fitzgerald for namedtuple outputs in scipy.stats.
 Florian Wilhelm for usage of RandomState in scipy.stats distributions.
 Robert T. McGibbon for Levinson-Durbin Toeplitz solver.
-Alex Conley for the Exponentiall Modified Gaussian distribution.
+Alex Conley for the Exponentially Modified Normal distribution.
 
 
 Institutions

diff --git a/doc/release/0.16.0-notes.rst b/doc/release/0.16.0-notes.rst
@@ -61,8 +61,8 @@ of the forward and backward passes was added to `scipy.signal.filtfilt`.
 The Wishart distribution and its inverse have been added, as
 `scipy.stats.wishart` and `scipy.stats.invwishart`.
 
-The Exponentially Modified Gaussian distribution has been
-added as `scipy.stats.expongauss`.
+The Exponentially Modified Normal distribution has been
+added as `scipy.stats.exponnorm`.
 
 `scipy.optimize` improvements
 -----------------------------

diff --git a/scipy/stats/_constants.py b/scipy/stats/_constants.py
@@ -13,6 +13,10 @@
 # The largest [in magnitude] usable floating value.
 _XMAX = np.finfo(float).machar.xmax
 
+# The log of the largest usable floating value; useful for knowing
+# when exp(something) will overflow
+_LOGXMAX = np.log(_XMAX)
+
 # The smallest [in magnitude] usable floating value.
 _XMIN = np.finfo(float).machar.xmin
 

diff --git a/scipy/stats/_continuous_distns.py b/scipy/stats/_continuous_distns.py
@@ -30,8 +30,7 @@
         _ncx2_log_pdf, _ncx2_pdf, _ncx2_cdf, get_distribution_names,
         )
 
-from ._constants import _XMIN, _EULER, _ZETA3
-
+from ._constants import _XMIN, _EULER, _ZETA3, _XMAX, _LOGXMAX
 
 ## Kolmogorov-Smirnov one-sided and two-sided test statistics
 class ksone_gen(rv_continuous):
@@ -970,57 +969,67 @@ def _entropy(self):
 expon = expon_gen(a=0.0, name='expon')
 
 
-## Exponentially Modified Gaussian (exponential distribution
-##  with parameter l convolved with a Gaussian with location loc
-##  and sigma s)
-class expongauss_gen(rv_continuous):
-    """An exponentially modified Gaussian continuous random variable.
+## Exponentially Modified Normal (exponential distribution
+##  convolved with a Normal).
+## This is called an exponentially modified gaussian on wikipedia
+class exponnorm_gen(rv_continuous):
+    """An exponentially modified Normal continuous random variable.
 
     %(before_notes)s
 
     Notes
     -----
-    The probability density function for `expongauss` is::
-
-        expongauss.pdf(x, lam, s) = 
-           lam/2 * exp(lam/2 * (lam*s**2-2*x)) * erfc((lam*s**2-x)/(sqrt(2)*s))
+    The probability density function for `exponnorm` is::
 
-    for ``lam > 0, s > 0``.
+        exponnorm.pdf(x, K) = 1/(2*K) exp(1/(2 * K**2)) exp(-x / K) * erfc(- (x - 1/K) / sqrt(2))
+           
+    where the shape parameter ``K`` > 0.
 
+    It can be thought of as the sum of a normally distributed random
+    value with mean ``loc`` and sigma ``scale`` and an exponentially
+    distributed random number with a pdf proportional to exp(-lambda * x)
+    where lambda = (``K`` * ``scale``)**(-1).
 
-    The two shape parameters for `expongauss` (``lam`` and ``s``) must
-    be set explicitly.
     .. versionadded:: 0.16.0
 
     %(example)s
 
     """
-    def _rvs(self, lam, s):
-        expval = self._random_state.standard_exponential(self._size) / lam
-        gval = s * self._random_state.standard_normal(self._size)
+    def _rvs(self, K):
+        expval = self._random_state.standard_exponential(self._size) * K
+        gval = self._random_state.standard_normal(self._size)
         return expval + gval
 
-    def _pdf(self, x, lam, s):
-        ls2 = lam * s * s
-        exparg = 0.5 * lam * (ls2 - 2 * x)
-        erfcarg = (ls2 - x) / (sqrt(2) * s)
-        return 0.5 * lam * exp(exparg) * erfc(erfcarg)
-
-    def _cdf(self, x, lam, s):
-        u = lam * x
-        v = lam * s
-        exparg = -u + 0.5 * v * v
-        return special.ndtr(x / s) - exp(exparg) * special.ndtr(x / s - v)
-
-    def _stats(self, lam, s):
-        invlam = 1.0 / lam
-        islam = 1.0 / (s * lam)
-        islamsq = islam * islam
-        skew = 2.0 * (1 + islamsq)**(-1.5) * islam**3
-        kurt = 2 * (islamsq * (3 * islamsq + 2) + 1) / (1 + islamsq)**2 - 3
-        return invlam, s * s + invlam * invlam, skew, kurt
-
-expongauss = expongauss_gen(name='expongauss')
+    def _pdf(self, x, K):
+        invK = 1.0 / K
+        exparg = 0.5 * invK**2 - invK * x
+        # Avoid overflows; setting exp(exparg) to the max float works
+        #  all right here
+        expval = _lazywhere(exparg < _LOGXMAX, (exparg,), exp, _XMAX)
+        return 0.5 * invK * expval * erfc(-(x - invK) / sqrt(2))
+
+    def _logpdf(self, x, K):
+        invK = 1.0 / K
+        exparg = 0.5 * invK**2 - invK * x
+        return exparg + log(0.5 * invK * erfc(-(x - invK) / sqrt(2)))
+
+    def _cdf(self, x, K):
+        invK = 1.0 / K
+        expval = invK * (0.5 * invK - x)
+        return special.ndtr(x) - exp(expval) * special.ndtr(x - invK)
+
+    def _sf(self, x, K):
+        invK = 1.0 / K
+        expval = invK * (0.5 * invK - x)
+        return special.ndtr(-x) + exp(expval) * special.ndtr(x - invK)
+
+    def _stats(self, K):
+        K2 = K * K
+        opK2 = 1.0 + K2
+        skw = 2 * K**3 * opK2**(-1.5)
+        krt = 6.0 * K2 * K2 * opK2**(-2)
+        return K, opK2, skw, krt
+exponnorm = exponnorm_gen(name='exponnorm')
 
 
 class exponweib_gen(rv_continuous):

diff --git a/scipy/stats/_distr_params.py b/scipy/stats/_distr_params.py
@@ -18,6 +18,7 @@
     ['dweibull', (2.0685080649914673,)],
     ['erlang', (10,)],
     ['expon', ()],
+    ['exponnorm', (1.5,)],
     ['exponpow', (2.697119160358469,)],
     ['exponweib', (2.8923945291034436, 1.9505288745913174)],
     ['f', (29, 18)],

diff --git a/scipy/stats/tests/test_continuous_basic.py b/scipy/stats/tests/test_continuous_basic.py
@@ -25,6 +25,9 @@
 not for numerically exact results.
 """
 
+## Note that you need to add new distributions you want tested
+## to _distr_params
+
 DECIMAL = 5  # specify the precision of the tests  # increased from 0 to 5
 
 ## Last four of these fail all around. Need to be checked

diff --git a/scipy/stats/tests/test_distributions.py b/scipy/stats/tests/test_distributions.py
@@ -33,8 +33,8 @@
 dists = ['uniform','norm','lognorm','expon','beta',
          'powerlaw','bradford','burr','fisk','cauchy','halfcauchy',
          'foldcauchy','gamma','gengamma','loggamma',
-         'alpha','anglit','arcsine','betaprime',
-         'dgamma','exponweib','exponpow','frechet_l','frechet_r',
+         'alpha','anglit','arcsine','betaprime','dgamma',
+         'exponnorm', 'exponweib','exponpow','frechet_l','frechet_r',
          'gilbrat','f','ncf','chi2','chi','nakagami','genpareto',
          'genextreme','genhalflogistic','pareto','lomax','halfnorm',
          'halflogistic','fatiguelife','foldnorm','ncx2','t','nct',
@@ -775,6 +775,40 @@ def test_tail(self):  # Regression test for ticket 807
         assert_equal(stats.expon.cdf(1e-18), 1e-18)
         assert_equal(stats.expon.isf(stats.expon.sf(40)), 40)
 
+
+class TestExponNorm(TestCase):
+    def test_moments(self):
+        # Some moment test cases based on non-loc/scaled formula
+        def get_moms(lam, sig, mu):
+            # See wikipedia for these formulae
+            #  where it is listed as an exponentially modified gaussian
+            opK2 = 1.0 + 1 / (lam*sig)**2
+            exp_skew = 2 / (lam * sig)**3 * opK2**(-1.5)
+            exp_kurt = 6.0 * (1 + (lam * sig)**2)**(-2)
+            return [mu + 1/lam, sig*sig + 1.0/(lam*lam), exp_skew, exp_kurt] 
+
+        mu, sig, lam = 0, 1, 1        
+        K = 1.0 / (lam * sig)
+        sts = stats.exponnorm.stats(K, loc=mu, scale=sig, moments='mvsk')
+        assert_almost_equal(sts, get_moms(lam, sig, mu))
+        mu, sig, lam = -3, 2, 0.1
+        K = 1.0 / (lam * sig)
+        sts = stats.exponnorm.stats(K, loc=mu, scale=sig, moments='mvsk')
+        assert_almost_equal(sts, get_moms(lam, sig, mu))
+        mu, sig, lam = 0, 3, 1
+        K = 1.0 / (lam * sig)
+        sts = stats.exponnorm.stats(K, loc=mu, scale=sig, moments='mvsk')
+        assert_almost_equal(sts, get_moms(lam, sig, mu))
+        mu, sig, lam = -5, 11, 3.5
+        K = 1.0 / (lam * sig)
+        sts = stats.exponnorm.stats(K, loc=mu, scale=sig, moments='mvsk')
+        assert_almost_equal(sts, get_moms(lam, sig, mu))
+
+    def test_extremes_x(self):
+        # Test for extreme values against overflows
+        assert_almost_equal(stats.exponnorm.pdf(-900, 1), 0.0)
+        assert_almost_equal(stats.exponnorm.pdf(+900, 1), 0.0)
+
 
 class TestGenExpon(TestCase):
     def test_pdf_unity_area(self):
@@ -1940,7 +1974,7 @@ def test_stats_shapes_argcheck():
     # anyway, so some distributions may or may not fail.
 
 
-## Test subclassing distributions w/ explicit shapes
+# Test subclassing distributions w/ explicit shapes
 
 class _distr_gen(stats.rv_continuous):
     def _pdf(self, x, a):