Skip to content

Commit

Permalink
Merge pull request #8554 from WarrenWeckesser/uniform-fit
Browse files Browse the repository at this point in the history
ENH: stats: Use explicit MLE formulas in uniform.fit()
  • Loading branch information
ev-br committed Apr 1, 2018
2 parents b643769 + dec2a82 commit d468c4e
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 0 deletions.
159 changes: 159 additions & 0 deletions scipy/stats/_continuous_distns.py
Expand Up @@ -5902,6 +5902,16 @@ def integ(p):
tukeylambda = tukeylambda_gen(name='tukeylambda')


class FitUniformFixedScaleDataError(FitDataError):
def __init__(self, ptp, fscale):
self.args = (
"Invalid values in `data`. Maximum likelihood estimation with "
"the uniform distribution and fixed scale requires that "
"data.ptp() <= fscale, but data.ptp() = %r and fscale = %r." %
(ptp, fscale),
)


class uniform_gen(rv_continuous):
r"""A uniform continuous random variable.
Expand Down Expand Up @@ -5930,6 +5940,155 @@ def _stats(self):
def _entropy(self):
return 0.0

def fit(self, data, *args, **kwds):
"""
Maximum likelihood estimate for the location and scale parameters.
`uniform.fit` uses only the following parameters. Because exact
formulas are used, the parameters related to optimization that are
available in the `fit` method of other distributions are ignored
here. The only positional argument accepted is `data`.
Parameters
----------
data : array_like
Data to use in calculating the maximum likelihood estimate.
floc : float, optional
Hold the location parameter fixed to the specified value.
fscale : float, optional
Hold the scale parameter fixed to the specified value.
Returns
-------
loc, scale : float
Maximum likelihood estimates for the location and scale.
Notes
-----
An error is raised if `floc` is given and any values in `data` are
less than `floc`, or if `fscale` is given and `fscale` is less
than ``data.max() - data.min()``. An error is also raised if both
`floc` and `fscale` are given.
Examples
--------
>>> from scipy.stats import uniform
We'll fit the uniform distribution to `x`:
>>> x = np.array([2, 2.5, 3.1, 9.5, 13.0])
For a uniform distribution MLE, the location is the minimum of the
data, and the scale is the maximum minus the minimum.
>>> loc, scale = uniform.fit(x)
>>> loc
2.0
>>> scale
11.0
If we know the data comes from a uniform distribution where the support
starts at 0, we can use `floc=0`:
>>> loc, scale = uniform.fit(x, floc=0)
>>> loc
0.0
>>> scale
13.0
Alternatively, if we know the length of the support is 12, we can use
`fscale=12`:
>>> loc, scale = uniform.fit(x, fscale=12)
>>> loc
1.5
>>> scale
12.0
In that last example, the support interval is [1.5, 13.5]. This
solution is not unique. For example, the distribution with ``loc=2``
and ``scale=12`` has the same likelihood as the one above. When
`fscale` is given and it is larger than ``data.max() - data.min()``,
the parameters returned by the `fit` method center the support over
the interval ``[data.min(), data.max()]``.
"""
if len(args) > 0:
raise TypeError("Too many arguments.")

floc = kwds.pop('floc', None)
fscale = kwds.pop('fscale', None)

# Ignore the optimizer-related keyword arguments, if given.
kwds.pop('loc', None)
kwds.pop('scale', None)
kwds.pop('optimizer', None)
if kwds:
raise TypeError("Unknown arguments: %s." % kwds)

if floc is not None and fscale is not None:
# This check is for consistency with `rv_continuous.fit`.
raise ValueError("All parameters fixed. There is nothing to "
"optimize.")

data = np.asarray(data)

# MLE for the uniform distribution
# --------------------------------
# The PDF is
#
# f(x, loc, scale) = {1/scale for loc <= x <= loc + scale
# {0 otherwise}
#
# The likelihood function is
# L(x, loc, scale) = (1/scale)**n
# where n is len(x), assuming loc <= x <= loc + scale for all x.
# The log-likelihood is
# l(x, loc, scale) = -n*log(scale)
# The log-likelihood is maximized by making scale as small as possible,
# while keeping loc <= x <= loc + scale. So if neither loc nor scale
# are fixed, the log-likelihood is maximized by choosing
# loc = x.min()
# scale = x.ptp()
# If loc is fixed, it must be less than or equal to x.min(), and then
# the scale is
# scale = x.max() - loc
# If scale is fixed, it must not be less than x.ptp(). If scale is
# greater than x.ptp(), the solution is not unique. Note that the
# likelihood does not depend on loc, except for the requirement that
# loc <= x <= loc + scale. All choices of loc for which
# x.max() - scale <= loc <= x.min()
# have the same log-likelihood. In this case, we choose loc such that
# the support is centered over the interval [data.min(), data.max()]:
# loc = x.min() = 0.5*(scale - x.ptp())

if fscale is None:
# scale is not fixed.
if floc is None:
# loc is not fixed, scale is not fixed.
loc = data.min()
scale = data.ptp()
else:
# loc is fixed, scale is not fixed.
loc = floc
scale = data.max() - loc
if data.min() < loc:
raise FitDataError("uniform", lower=loc, upper=loc + scale)
else:
# loc is not fixed, scale is fixed.
ptp = data.ptp()
if ptp > fscale:
raise FitUniformFixedScaleDataError(ptp=ptp, fscale=fscale)
# If ptp < fscale, the ML estimate is not unique; see the comments
# above. We choose the distribution for which the support is
# centered over the interval [data.min(), data.max()].
loc = data.min() - 0.5*(fscale - ptp)
scale = fscale

# We expect the return values to be floating point, so ensure it
# by explicitly converting to float.
return float(loc), float(scale)


uniform = uniform_gen(a=0.0, b=1.0, name='uniform')

Expand Down
18 changes: 18 additions & 0 deletions scipy/stats/tests/test_distributions.py
Expand Up @@ -1735,6 +1735,24 @@ def test_lognorm_fit(self):
assert_equal(loc, 1)
assert_allclose(scale, np.exp(lnxm1.mean()), rtol=1e-12)

def test_uniform_fit(self):
x = np.array([1.0, 1.1, 1.2, 9.0])

loc, scale = stats.uniform.fit(x)
assert_equal(loc, x.min())
assert_equal(scale, x.ptp())

loc, scale = stats.uniform.fit(x, floc=0)
assert_equal(loc, 0)
assert_equal(scale, x.max())

loc, scale = stats.uniform.fit(x, fscale=10)
assert_equal(loc, 0)
assert_equal(scale, 10)

assert_raises(ValueError, stats.uniform.fit, x, floc=2.0)
assert_raises(ValueError, stats.uniform.fit, x, fscale=5.0)

def test_fshapes(self):
# take a beta distribution, with shapes='a, b', and make sure that
# fa is equivalent to f0, and fb is equivalent to f1
Expand Down

0 comments on commit d468c4e

Please sign in to comment.