diff --git a/scipy/stats/_continuous_distns.py b/scipy/stats/_continuous_distns.py index 97c4d717ebb9..16205a656280 100644 --- a/scipy/stats/_continuous_distns.py +++ b/scipy/stats/_continuous_distns.py @@ -3393,8 +3393,8 @@ def fit(self, data, *args, **kwds): floc = kwds.get('floc', None) method = kwds.get('method', 'mle') - if (isinstance(data, CensoredData) or floc is None - or method.lower() == 'mm'): + if (isinstance(data, CensoredData) or + floc is None and method.lower() != 'mm'): # loc is not fixed or we're not doing standard MLE. # Use the default fit method. return super().fit(data, *args, **kwds) @@ -3407,9 +3407,7 @@ def fit(self, data, *args, **kwds): _remove_optimizer_parameters(kwds) - # Special case: loc is fixed. - - if f0 is not None and fscale is not None: + if f0 is not None and floc is not None and fscale is not None: # This check is for consistency with `rv_continuous.fit`. # Without this check, this function would just return the # parameters that were given. @@ -3422,6 +3420,34 @@ def fit(self, data, *args, **kwds): if not np.isfinite(data).all(): raise ValueError("The data contains non-finite values.") + # Use explicit formulas for mm (gh-19884) + if method.lower() == 'mm': + m1 = np.mean(data) + m2 = np.var(data) + m3 = np.mean((data - m1) ** 3) + a, loc, scale = f0, floc, fscale + # Three unknowns + if a is None and loc is None and scale is None: + scale = m3 / (2 * m2) + # Two unknowns + if loc is None and scale is None: + scale = np.sqrt(m2 / a) + if a is None and scale is None: + scale = m2 / (m1 - loc) + if a is None and loc is None: + a = m2 / (scale ** 2) + # One unknown + if a is None: + a = (m1 - loc) / scale + if loc is None: + loc = m1 - a * scale + if scale is None: + scale = (m1 - loc) / a + return a, loc, scale + + # Special case: loc is fixed. + + # NB: data == loc is ok if a >= 1; the below check is more strict. if np.any(data <= floc): raise FitDataError("gamma", lower=floc, upper=np.inf) diff --git a/scipy/stats/tests/test_distributions.py b/scipy/stats/tests/test_distributions.py index 3535596a293e..9ab21a6ab402 100644 --- a/scipy/stats/tests/test_distributions.py +++ b/scipy/stats/tests/test_distributions.py @@ -4672,6 +4672,41 @@ def test_entropy(self, a, ref, rtol): assert_allclose(stats.gamma.entropy(a), ref, rtol=rtol) + @pytest.mark.parametrize("a", [1e-2, 1, 1e2]) + @pytest.mark.parametrize("loc", [1e-2, 0, 1e2]) + @pytest.mark.parametrize('scale', [1e-2, 1, 1e2]) + @pytest.mark.parametrize('fix_a', [True, False]) + @pytest.mark.parametrize('fix_loc', [True, False]) + @pytest.mark.parametrize('fix_scale', [True, False]) + def test_fit_mm(self, a, loc, scale, fix_a, fix_loc, fix_scale): + rng = np.random.default_rng(6762668991392531563) + data = stats.gamma.rvs(a, loc=loc, scale=scale, size=100, + random_state=rng) + + kwds = {} + if fix_a: + kwds['fa'] = a + if fix_loc: + kwds['floc'] = loc + if fix_scale: + kwds['fscale'] = scale + nfree = 3 - len(kwds) + + if nfree == 0: + error_msg = "All parameters fixed. There is nothing to optimize." + with pytest.raises(ValueError, match=error_msg): + stats.halfcauchy.fit(data, method='mm', **kwds) + return + + theta = stats.gamma.fit(data, method='mm', **kwds) + dist = stats.gamma(*theta) + if nfree >= 1: + assert_allclose(dist.mean(), np.mean(data)) + if nfree >= 2: + assert_allclose(dist.moment(2), np.mean(data**2)) + if nfree >= 3: + assert_allclose(dist.moment(3), np.mean(data**3)) + def test_pdf_overflow_gh19616(): # Confirm that gh19616 (intermediate over/underflows in PDF) is resolved # Reference value from R GeneralizedHyperbolic library