Merge pull request #8554 from WarrenWeckesser/uniform-fit

ENH: stats: Use explicit MLE formulas in uniform.fit()
scipy · Apr 1, 2018 · d468c4e · d468c4e
2 parents b643769 + dec2a82
commit d468c4e
Show file tree

Hide file tree

Showing 2 changed files with 177 additions and 0 deletions.
diff --git a/scipy/stats/_continuous_distns.py b/scipy/stats/_continuous_distns.py
@@ -5902,6 +5902,16 @@ def integ(p):
 tukeylambda = tukeylambda_gen(name='tukeylambda')
 
 
+class FitUniformFixedScaleDataError(FitDataError):
+    def __init__(self, ptp, fscale):
+        self.args = (
+            "Invalid values in `data`.  Maximum likelihood estimation with "
+            "the uniform distribution and fixed scale requires that "
+            "data.ptp() <= fscale, but data.ptp() = %r and fscale = %r." %
+            (ptp, fscale),
+        )
+
+
 class uniform_gen(rv_continuous):
     r"""A uniform continuous random variable.
 
@@ -5930,6 +5940,155 @@ def _stats(self):
     def _entropy(self):
         return 0.0
 
+    def fit(self, data, *args, **kwds):
+        """
+        Maximum likelihood estimate for the location and scale parameters.
+
+        `uniform.fit` uses only the following parameters.  Because exact
+        formulas are used, the parameters related to optimization that are
+        available in the `fit` method of other distributions are ignored
+        here.  The only positional argument accepted is `data`.
+
+        Parameters
+        ----------
+        data : array_like
+            Data to use in calculating the maximum likelihood estimate.
+        floc : float, optional
+            Hold the location parameter fixed to the specified value.
+        fscale : float, optional
+            Hold the scale parameter fixed to the specified value.
+
+        Returns
+        -------
+        loc, scale : float
+            Maximum likelihood estimates for the location and scale.
+
+        Notes
+        -----
+        An error is raised if `floc` is given and any values in `data` are
+        less than `floc`, or if `fscale` is given and `fscale` is less
+        than ``data.max() - data.min()``.  An error is also raised if both
+        `floc` and `fscale` are given.
+
+        Examples
+        --------
+        >>> from scipy.stats import uniform
+
+        We'll fit the uniform distribution to `x`:
+
+        >>> x = np.array([2, 2.5, 3.1, 9.5, 13.0])
+
+        For a uniform distribution MLE, the location is the minimum of the
+        data, and the scale is the maximum minus the minimum.
+
+        >>> loc, scale = uniform.fit(x)
+        >>> loc
+        2.0
+        >>> scale
+        11.0
+
+        If we know the data comes from a uniform distribution where the support
+        starts at 0, we can use `floc=0`:
+
+        >>> loc, scale = uniform.fit(x, floc=0)
+        >>> loc
+        0.0
+        >>> scale
+        13.0
+
+        Alternatively, if we know the length of the support is 12, we can use
+        `fscale=12`:
+
+        >>> loc, scale = uniform.fit(x, fscale=12)
+        >>> loc
+        1.5
+        >>> scale
+        12.0
+
+        In that last example, the support interval is [1.5, 13.5].  This
+        solution is not unique.  For example, the distribution with ``loc=2``
+        and ``scale=12`` has the same likelihood as the one above.  When
+        `fscale` is given and it is larger than ``data.max() - data.min()``,
+        the parameters returned by the `fit` method center the support over
+        the interval ``[data.min(), data.max()]``.
+
+        """
+        if len(args) > 0:
+            raise TypeError("Too many arguments.")
+
+        floc = kwds.pop('floc', None)
+        fscale = kwds.pop('fscale', None)
+
+        # Ignore the optimizer-related keyword arguments, if given.
+        kwds.pop('loc', None)
+        kwds.pop('scale', None)
+        kwds.pop('optimizer', None)
+        if kwds:
+            raise TypeError("Unknown arguments: %s." % kwds)
+
+        if floc is not None and fscale is not None:
+            # This check is for consistency with `rv_continuous.fit`.
+            raise ValueError("All parameters fixed. There is nothing to "
+                             "optimize.")
+
+        data = np.asarray(data)
+
+        # MLE for the uniform distribution
+        # --------------------------------
+        # The PDF is
+        #
+        #     f(x, loc, scale) = {1/scale  for loc <= x <= loc + scale
+        #                        {0        otherwise}
+        #
+        # The likelihood function is
+        #     L(x, loc, scale) = (1/scale)**n
+        # where n is len(x), assuming loc <= x <= loc + scale for all x.
+        # The log-likelihood is
+        #     l(x, loc, scale) = -n*log(scale)
+        # The log-likelihood is maximized by making scale as small as possible,
+        # while keeping loc <= x <= loc + scale.   So if neither loc nor scale
+        # are fixed, the log-likelihood is maximized by choosing
+        #     loc = x.min()
+        #     scale = x.ptp()
+        # If loc is fixed, it must be less than or equal to x.min(), and then
+        # the scale is
+        #     scale = x.max() - loc
+        # If scale is fixed, it must not be less than x.ptp().  If scale is
+        # greater than x.ptp(), the solution is not unique.  Note that the
+        # likelihood does not depend on loc, except for the requirement that
+        # loc <= x <= loc + scale.  All choices of loc for which
+        #     x.max() - scale <= loc <= x.min()
+        # have the same log-likelihood.  In this case, we choose loc such that
+        # the support is centered over the interval [data.min(), data.max()]:
+        #     loc = x.min() = 0.5*(scale - x.ptp())
+
+        if fscale is None:
+            # scale is not fixed.
+            if floc is None:
+                # loc is not fixed, scale is not fixed.
+                loc = data.min()
+                scale = data.ptp()
+            else:
+                # loc is fixed, scale is not fixed.
+                loc = floc
+                scale = data.max() - loc
+                if data.min() < loc:
+                    raise FitDataError("uniform", lower=loc, upper=loc + scale)
+        else:
+            # loc is not fixed, scale is fixed.
+            ptp = data.ptp()
+            if ptp > fscale:
+                raise FitUniformFixedScaleDataError(ptp=ptp, fscale=fscale)
+            # If ptp < fscale, the ML estimate is not unique; see the comments
+            # above.  We choose the distribution for which the support is
+            # centered over the interval [data.min(), data.max()].
+            loc = data.min() - 0.5*(fscale - ptp)
+            scale = fscale
+
+        # We expect the return values to be floating point, so ensure it
+        # by explicitly converting to float.
+        return float(loc), float(scale)
+
 
 uniform = uniform_gen(a=0.0, b=1.0, name='uniform')
 

diff --git a/scipy/stats/tests/test_distributions.py b/scipy/stats/tests/test_distributions.py
@@ -1735,6 +1735,24 @@ def test_lognorm_fit(self):
         assert_equal(loc, 1)
         assert_allclose(scale, np.exp(lnxm1.mean()), rtol=1e-12)
 
+    def test_uniform_fit(self):
+        x = np.array([1.0, 1.1, 1.2, 9.0])
+
+        loc, scale = stats.uniform.fit(x)
+        assert_equal(loc, x.min())
+        assert_equal(scale, x.ptp())
+
+        loc, scale = stats.uniform.fit(x, floc=0)
+        assert_equal(loc, 0)
+        assert_equal(scale, x.max())
+
+        loc, scale = stats.uniform.fit(x, fscale=10)
+        assert_equal(loc, 0)
+        assert_equal(scale, 10)
+
+        assert_raises(ValueError, stats.uniform.fit, x, floc=2.0)
+        assert_raises(ValueError, stats.uniform.fit, x, fscale=5.0)
+
     def test_fshapes(self):
         # take a beta distribution, with shapes='a, b', and make sure that
         # fa is equivalent to f0, and fb is equivalent to f1