From 0541e57b05bdb1902cacc4cd0c3670d6be63c602 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Fri, 19 Jul 2019 01:37:40 -0500 Subject: [PATCH 1/4] BUG: quantile segfaults on invalid quantil values --- pandas/_libs/groupby.pyx | 4 ++++ pandas/tests/groupby/test_transform.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index e3f18572abca1..bdf804edcf30b 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -719,6 +719,10 @@ def group_quantile(ndarray[float64_t] out, ndarray[int64_t] counts, non_na_counts, sort_arr assert values.shape[0] == N + + if not 0 <= q <= 1: + raise ValueError("Quantile values must lie in the interval [0, 1]") + inter_methods = { 'linear': INTERPOLATION_LINEAR, 'lower': INTERPOLATION_LOWER, diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 9a8b7cf18f2c0..5f538ab7e4a63 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -958,6 +958,18 @@ def test_groupby_transform_timezone_column(func): tm.assert_frame_equal(result, expected) +def test_quantile_validation(): + # GH#27470 + df = pd.DataFrame(dict(a=[0, 0, 0, 1, 1, 1])) + g = df.groupby(np.repeat([0, 1], 3)) + result = g.quantile(0.5) + expected = DataFrame(dict(a=[0.0, 1.0])) + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError, match="uantile values"): + g.quantile(1.1) + + @pytest.mark.parametrize( "func, values", [ From 264e9ca5b42a2cca5360dc5a4d2790561d1ffe4b Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 25 Jul 2019 13:19:47 -0500 Subject: [PATCH 2/4] move test --- pandas/tests/groupby/test_function.py | 12 ++++++++++++ pandas/tests/groupby/test_transform.py | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index efc3142b25b82..724c8e2a3f5a9 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1333,3 +1333,15 @@ def test_groupby_mean_no_overflow(): } ) assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840 + + +def test_quantile_validation(): + # GH#27470 + df = pd.DataFrame(dict(a=[0, 0, 0, 1, 1, 1])) + g = df.groupby(np.repeat([0, 1], 3)) + result = g.quantile(0.5) + expected = DataFrame(dict(a=[0.0, 1.0])) + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError, match="uantile values"): + g.quantile(1.1) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 5f538ab7e4a63..9a8b7cf18f2c0 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -958,18 +958,6 @@ def test_groupby_transform_timezone_column(func): tm.assert_frame_equal(result, expected) -def test_quantile_validation(): - # GH#27470 - df = pd.DataFrame(dict(a=[0, 0, 0, 1, 1, 1])) - g = df.groupby(np.repeat([0, 1], 3)) - result = g.quantile(0.5) - expected = DataFrame(dict(a=[0.0, 1.0])) - tm.assert_frame_equal(result, expected) - - with pytest.raises(ValueError, match="uantile values"): - g.quantile(1.1) - - @pytest.mark.parametrize( "func, values", [ From 14ac38bd7c2a0bdcfeb9a37e3d902a8e678b8956 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 25 Jul 2019 15:15:11 -0500 Subject: [PATCH 3/4] Use algorithms.check_percentile --- pandas/_libs/groupby.pyx | 3 --- pandas/core/groupby/groupby.py | 2 ++ pandas/tests/groupby/test_function.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index bdf804edcf30b..82f1b27752d9e 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -720,9 +720,6 @@ def group_quantile(ndarray[float64_t] out, assert values.shape[0] == N - if not 0 <= q <= 1: - raise ValueError("Quantile values must lie in the interval [0, 1]") - inter_methods = { 'linear': INTERPOLATION_LINEAR, 'lower': INTERPOLATION_LOWER, diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9aba9723e0546..91cc878124cdc 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1895,6 +1895,8 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: return vals + algorithms.check_percentile(q) + return self._get_cythonized_result( "group_quantile", self.grouper, diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 724c8e2a3f5a9..fc4cda97620f0 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1343,5 +1343,5 @@ def test_quantile_validation(): expected = DataFrame(dict(a=[0.0, 1.0])) tm.assert_frame_equal(result, expected) - with pytest.raises(ValueError, match="uantile values"): + with pytest.raises(ValueError, match="all be in the interval"): g.quantile(1.1) From 3cde126b0184af7e4be9c809431f22efd2cf90c1 Mon Sep 17 00:00:00 2001 From: pilkibun Date: Thu, 25 Jul 2019 15:35:25 -0500 Subject: [PATCH 4/4] remove noise --- pandas/_libs/groupby.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 82f1b27752d9e..e3f18572abca1 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -719,7 +719,6 @@ def group_quantile(ndarray[float64_t] out, ndarray[int64_t] counts, non_na_counts, sort_arr assert values.shape[0] == N - inter_methods = { 'linear': INTERPOLATION_LINEAR, 'lower': INTERPOLATION_LOWER,