Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: stats: Use explicit MLE formulas in uniform.fit() #8554

Merged
merged 1 commit into from
Apr 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
159 changes: 159 additions & 0 deletions scipy/stats/_continuous_distns.py
Expand Up @@ -5887,6 +5887,16 @@ def integ(p):
tukeylambda = tukeylambda_gen(name='tukeylambda')


class FitUniformFixedScaleDataError(FitDataError):
def __init__(self, ptp, fscale):
self.args = (
"Invalid values in `data`. Maximum likelihood estimation with "
"the uniform distribution and fixed scale requires that "
"data.ptp() <= fscale, but data.ptp() = %r and fscale = %r." %
(ptp, fscale),
)


class uniform_gen(rv_continuous):
r"""A uniform continuous random variable.

Expand Down Expand Up @@ -5915,6 +5925,155 @@ def _stats(self):
def _entropy(self):
return 0.0

def fit(self, data, *args, **kwds):
"""
Maximum likelihood estimate for the location and scale parameters.

`uniform.fit` uses only the following parameters. Because exact
formulas are used, the parameters related to optimization that are
available in the `fit` method of other distributions are ignored
here. The only positional argument accepted is `data`.

Parameters
----------
data : array_like
Data to use in calculating the maximum likelihood estimate.
floc : float, optional
Hold the location parameter fixed to the specified value.
fscale : float, optional
Hold the scale parameter fixed to the specified value.

Returns
-------
loc, scale : float
Maximum likelihood estimates for the location and scale.

Notes
-----
An error is raised if `floc` is given and any values in `data` are
less than `floc`, or if `fscale` is given and `fscale` is less
than ``data.max() - data.min()``. An error is also raised if both
`floc` and `fscale` are given.

Examples
--------
>>> from scipy.stats import uniform

We'll fit the uniform distribution to `x`:

>>> x = np.array([2, 2.5, 3.1, 9.5, 13.0])

For a uniform distribution MLE, the location is the minimum of the
data, and the scale is the maximum minus the minimum.

>>> loc, scale = uniform.fit(x)
>>> loc
2.0
>>> scale
11.0

If we know the data comes from a uniform distribution where the support
starts at 0, we can use `floc=0`:

>>> loc, scale = uniform.fit(x, floc=0)
>>> loc
0.0
>>> scale
13.0

Alternatively, if we know the length of the support is 12, we can use
`fscale=12`:

>>> loc, scale = uniform.fit(x, fscale=12)
>>> loc
1.5
>>> scale
12.0

In that last example, the support interval is [1.5, 13.5]. This
solution is not unique. For example, the distribution with ``loc=2``
and ``scale=12`` has the same likelihood as the one above. When
`fscale` is given and it is larger than ``data.max() - data.min()``,
the parameters returned by the `fit` method center the support over
the interval ``[data.min(), data.max()]``.

"""
if len(args) > 0:
raise TypeError("Too many arguments.")

floc = kwds.pop('floc', None)
fscale = kwds.pop('fscale', None)

# Ignore the optimizer-related keyword arguments, if given.
kwds.pop('loc', None)
kwds.pop('scale', None)
kwds.pop('optimizer', None)
if kwds:
raise TypeError("Unknown arguments: %s." % kwds)

if floc is not None and fscale is not None:
# This check is for consistency with `rv_continuous.fit`.
raise ValueError("All parameters fixed. There is nothing to "
"optimize.")

data = np.asarray(data)

# MLE for the uniform distribution
# --------------------------------
# The PDF is
#
# f(x, loc, scale) = {1/scale for loc <= x <= loc + scale
# {0 otherwise}
#
# The likelihood function is
# L(x, loc, scale) = (1/scale)**n
# where n is len(x), assuming loc <= x <= loc + scale for all x.
# The log-likelihood is
# l(x, loc, scale) = -n*log(scale)
# The log-likelihood is maximized by making scale as small as possible,
# while keeping loc <= x <= loc + scale. So if neither loc nor scale
# are fixed, the log-likelihood is maximized by choosing
# loc = x.min()
# scale = x.ptp()
# If loc is fixed, it must be less than or equal to x.min(), and then
# the scale is
# scale = x.max() - loc
# If scale is fixed, it must not be less than x.ptp(). If scale is
# greater than x.ptp(), the solution is not unique. Note that the
# likelihood does not depend on loc, except for the requirement that
# loc <= x <= loc + scale. All choices of loc for which
# x.max() - scale <= loc <= x.min()
# have the same log-likelihood. In this case, we choose loc such that
# the support is centered over the interval [data.min(), data.max()]:
# loc = x.min() = 0.5*(scale - x.ptp())

if fscale is None:
# scale is not fixed.
if floc is None:
# loc is not fixed, scale is not fixed.
loc = data.min()
scale = data.ptp()
else:
# loc is fixed, scale is not fixed.
loc = floc
scale = data.max() - loc
if data.min() < loc:
raise FitDataError("uniform", lower=loc, upper=loc + scale)
else:
# loc is not fixed, scale is fixed.
ptp = data.ptp()
if ptp > fscale:
raise FitUniformFixedScaleDataError(ptp=ptp, fscale=fscale)
# If ptp < fscale, the ML estimate is not unique; see the comments
# above. We choose the distribution for which the support is
# centered over the interval [data.min(), data.max()].
loc = data.min() - 0.5*(fscale - ptp)
scale = fscale

# We expect the return values to be floating point, so ensure it
# by explicitly converting to float.
return float(loc), float(scale)


uniform = uniform_gen(a=0.0, b=1.0, name='uniform')

Expand Down
18 changes: 18 additions & 0 deletions scipy/stats/tests/test_distributions.py
Expand Up @@ -1709,6 +1709,24 @@ def test_lognorm_fit(self):
assert_equal(loc, 1)
assert_allclose(scale, np.exp(lnxm1.mean()), rtol=1e-12)

def test_uniform_fit(self):
x = np.array([1.0, 1.1, 1.2, 9.0])

loc, scale = stats.uniform.fit(x)
assert_equal(loc, x.min())
assert_equal(scale, x.ptp())

loc, scale = stats.uniform.fit(x, floc=0)
assert_equal(loc, 0)
assert_equal(scale, x.max())

loc, scale = stats.uniform.fit(x, fscale=10)
assert_equal(loc, 0)
assert_equal(scale, 10)

assert_raises(ValueError, stats.uniform.fit, x, floc=2.0)
assert_raises(ValueError, stats.uniform.fit, x, fscale=5.0)

def test_fshapes(self):
# take a beta distribution, with shapes='a, b', and make sure that
# fa is equivalent to f0, and fb is equivalent to f1
Expand Down