New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: fix moments method to support arrays and list #12197
Changes from 25 commits
2f1beae
2f0a903
8359f4a
65e359a
5391f86
5ab67bf
e3fd4ef
06c5162
b7206d4
60c517a
78e3fe9
8ca99eb
3ef2889
ec282dd
4989c3c
559bd3d
3dbf027
a6f4788
cd90f3e
112b588
43858ec
2cebce8
7b69ef6
a13a73b
acf4c1b
5b10a45
35387ff
d6a9b11
7ed600d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2,6 +2,8 @@ | |||||
import numpy as np | ||||||
import numpy.testing as npt | ||||||
import pytest | ||||||
import hypothesis.extra.numpy as npst | ||||||
from hypothesis import given, strategies as st | ||||||
from pytest import raises as assert_raises | ||||||
from scipy.integrate import IntegrationWarning | ||||||
|
||||||
|
@@ -719,3 +721,143 @@ def test_burr_fisk_moment_gh13234_regression(): | |||||
|
||||||
vals1 = stats.fisk.moment(1, 8) | ||||||
assert isinstance(vals1, float) | ||||||
|
||||||
|
||||||
def test_moments_with_array_gh12192_regression(): | ||||||
# array loc and scalar scale | ||||||
vals0 = stats.norm.moment(n=1, loc=np.array([1, 2, 3]), scale=1) | ||||||
expected0 = np.array([1., 2., 3.]) | ||||||
npt.assert_equal(vals0, expected0) | ||||||
|
||||||
# array loc and invalid scalar scale | ||||||
vals1 = stats.norm.moment(n=1, loc=np.array([1, 2, 3]), scale=-1) | ||||||
expected1 = np.array([np.nan, np.nan, np.nan]) | ||||||
npt.assert_equal(vals1, expected1) | ||||||
|
||||||
# array loc and array scale with invalid entries | ||||||
vals2 = stats.norm.moment(n=1, loc=np.array([1, 2, 3]), scale=[-3, 1, 0]) | ||||||
expected2 = np.array([np.nan, 2., np.nan]) | ||||||
npt.assert_equal(vals2, expected2) | ||||||
|
||||||
# (loc == 0) & (scale < 0) | ||||||
vals3 = stats.norm.moment(n=2, loc=0, scale=-4) | ||||||
expected3 = np.nan | ||||||
npt.assert_equal(vals3, expected3) | ||||||
assert isinstance(vals3, expected3.__class__) | ||||||
|
||||||
# array loc with 0 entries and scale with invalid entries | ||||||
vals4 = stats.norm.moment(n=2, loc=[1, 0, 2], scale=[3, -4, -5]) | ||||||
expected4 = np.array([10., np.nan, np.nan]) | ||||||
npt.assert_equal(vals4, expected4) | ||||||
|
||||||
# all(loc == 0) & (array scale with invalid entries) | ||||||
vals5 = stats.norm.moment(n=2, loc=[0, 0, 0], scale=[5., -2, 100.]) | ||||||
expected5 = np.array([25., np.nan, 10000.]) | ||||||
npt.assert_equal(vals5, expected5) | ||||||
|
||||||
# all( (loc == 0) & (scale < 0) ) | ||||||
vals6 = stats.norm.moment(n=2, loc=[0, 0, 0], scale=[-5., -2, -100.]) | ||||||
expected6 = np.array([np.nan, np.nan, np.nan]) | ||||||
npt.assert_equal(vals6, expected6) | ||||||
|
||||||
# scalar args, loc, and scale | ||||||
vals7 = stats.chi.moment(n=2, df=1, loc=0, scale=0) | ||||||
expected7 = np.nan | ||||||
npt.assert_equal(vals7, expected7) | ||||||
assert isinstance(vals7, expected7.__class__) | ||||||
|
||||||
# array args, scalar loc, and scalar scale | ||||||
vals8 = stats.chi.moment(n=2, df=[1, 2, 3], loc=0, scale=0) | ||||||
expected8 = np.array([np.nan, np.nan, np.nan]) | ||||||
npt.assert_equal(vals8, expected8) | ||||||
|
||||||
# array args, array loc, and array scale | ||||||
vals9 = stats.chi.moment(n=2, df=[1, 2, 3], loc=[1., 0., 2.], | ||||||
scale=[1., -3., 0.]) | ||||||
expected9 = np.array([3.59576912, np.nan, np.nan]) | ||||||
npt.assert_allclose(vals9, expected9, rtol=1e-8) | ||||||
|
||||||
# (n > 4), all(loc != 0), and all(scale != 0) | ||||||
vals10 = stats.norm.moment(5, [1., 2.], [1., 2.]) | ||||||
expected10 = np.array([26., 832.]) | ||||||
npt.assert_allclose(vals10, expected10, rtol=1e-13) | ||||||
|
||||||
# test broadcasting and more | ||||||
a = [-1.1, 0, 1, 2.2, np.pi] | ||||||
b = [-1.1, 0, 1, 2.2, np.pi] | ||||||
loc = [-1.1, 0, np.sqrt(2)] | ||||||
scale = [-2.1, 0, 1, 2.2, np.pi] | ||||||
|
||||||
a = np.array(a).reshape((-1, 1, 1, 1)) | ||||||
b = np.array(b).reshape((-1, 1, 1)) | ||||||
loc = np.array(loc).reshape((-1, 1)) | ||||||
scale = np.array(scale) | ||||||
|
||||||
vals11 = stats.beta.moment(n=2, a=a, b=b, loc=loc, scale=scale) | ||||||
|
||||||
a, b, loc, scale = np.broadcast_arrays(a, b, loc, scale) | ||||||
|
||||||
for i in np.ndenumerate(a): | ||||||
with np.errstate(invalid='ignore', divide='ignore'): | ||||||
i = i[0] # just get the index | ||||||
# check against same function with scalar input | ||||||
expected = stats.beta.moment(n=2, a=a[i], b=b[i], | ||||||
loc=loc[i], scale=scale[i]) | ||||||
np.testing.assert_equal(vals11[i], expected) | ||||||
|
||||||
|
||||||
def test_broadcasting_in_moments_gh12192_regression(): | ||||||
vals0 = stats.norm.moment(n=1, loc=np.array([1, 2, 3]), scale=[[1]]) | ||||||
expected0 = np.array([[1., 2., 3.]]) | ||||||
npt.assert_equal(vals0, expected0) | ||||||
assert vals0.shape == expected0.shape | ||||||
|
||||||
vals1 = stats.norm.moment(n=1, loc=np.array([[1], [2], [3]]), | ||||||
scale=[1, 2, 3]) | ||||||
expected1 = np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]) | ||||||
npt.assert_equal(vals1, expected1) | ||||||
assert vals1.shape == expected1.shape | ||||||
|
||||||
vals2 = stats.chi.moment(n=1, df=[1., 2., 3.], loc=0., scale=1.) | ||||||
expected2 = np.array([0.79788456, 1.25331414, 1.59576912]) | ||||||
npt.assert_allclose(vals2, expected2, rtol=1e-8) | ||||||
assert vals2.shape == expected2.shape | ||||||
|
||||||
vals3 = stats.chi.moment(n=1, df=[[1.], [2.], [3.]], loc=[0., 1., 2.], | ||||||
scale=[-1., 0., 3.]) | ||||||
expected3 = np.array([[np.nan, np.nan, 4.39365368], | ||||||
[np.nan, np.nan, 5.75994241], | ||||||
[np.nan, np.nan, 6.78730736]]) | ||||||
npt.assert_allclose(vals3, expected3, rtol=1e-8) | ||||||
assert vals3.shape == expected3.shape | ||||||
npt.assert_(vals3.shape == expected3.shape) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this was added but the original was not removed. |
||||||
|
||||||
|
||||||
# A strategy to generate the shape of a one-dimensional array of length [0..7] | ||||||
# Subsequent draws will use the same shape to ensure compatibility. For fancier | ||||||
# tricks see `npst.broadcastable_shapes()` or `mutually_broadcastable_shapes()`. | ||||||
shared_shapes = st.shared(st.tuples(st.integers(0, 7))) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally, to show the advantage of Hypothesis, I think we want this test to go beyond what the tests above can do - show that this works for any distribution and any broadcastable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd recommend having separate tests for always-valid and might-be-invalid inputs; otherwise you eventually and without realising write a test which never checks the valid case 😅 For a deeper test, you're going to want the (This is getting pretty far from the original intent to fix a bug though, and I've seen projects take quite a while to settle on using Hypothesis. Might be worth merging this PR, and then tackling the test changes in a new one?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the feedback @Zac-HD and @mdhaber! I agree we should use Hypothesis to its fullest and add a deeper test. I have some time this and next week so I can go a little deeper and add a more comprehensive test.
I also think this would be better. Though I will try to push something or at least confirm locally using Hypothesis that the bug-fix works. I can then propose a PR and we can discuss Hypothesis in detail there. What do you think, @mdhaber? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, a local test with Hypothesis is fine. This PR has been tricky to get right, so a more comprehensive test would give additional confidence that the bug fix is ready to be merged. |
||||||
|
||||||
|
||||||
def scalar_or_array(**kwargs): | ||||||
return st.floats(**kwargs) | npst.arrays( | ||||||
dtype=float, shape=shared_shapes, elements=kwargs | ||||||
) | ||||||
|
||||||
|
||||||
@given( | ||||||
# Our moment `n`, between one and four (inclusive) | ||||||
n=st.integers(1, 4), | ||||||
# Our locations and scale are each either a scalar (Python-native) float, | ||||||
# or an ndarray of floats. If both are arrays, they will have the same shape. | ||||||
loc=scalar_or_array(min_value=0, max_value=1000), # what should the bounds be? | ||||||
scale=scalar_or_array(min_value=-1, max_value=1000), | ||||||
) | ||||||
def test_moments_gh12192_regression(n, loc, scale): | ||||||
got = stats.norm.moment(n, loc=loc, scale=scale) | ||||||
ref = np.vectorize(stats.norm.moment, otypes=[float])(n, loc=loc, scale=scale) | ||||||
|
||||||
if isinstance(got, np.ndarray): | ||||||
assert got.shape == ref.shape | ||||||
assert got.dtype == ref.dtype | ||||||
np.testing.assert_allclose(got, ref) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was the problem that we assumed if
mu
wasNone
then the rest would beNone
?Could you explain what is going on here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I think so.
I am creating a list of moments that are not
None
and callingargsreduce
to remove the invalid entries. But as we don't know which of them would beNone
and which wouldn't, I have also created a list of indices recording the index of the moments that are notNone
. Onceargsreduce
returns a list of reduced moments, I just fill in themom
list with the moments that have been reduced (using the index list).I didn't think much about a better approach to get this done quickly. If this is acceptable for now, it should work just fine and we can get back to it later and do something that is clearer. Does that sound good?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to let a second reviewer decide that. I originally stated that I'd prefer to have a second reviewer, but after noticing this, I've decided that I should not merge this myself. This is sufficiently complex that I should not be the only one to do a thorough review - especially since I wrote a fair amount of it in tirthasheshpatel#1. Would you reach out to some other core developers who might be interested? In the meantime, I'll take a look at some of those PRs you submitted for adding
alternative
tomstats
hypothesis tests.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure.
Thanks!