diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py deleted file mode 100644 index d495441593aed..0000000000000 --- a/pandas/tests/groupby/test_allowlist.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -TODO: Existing tests should be moved or deduplicated -Do not add tests here! -""" - -import pytest - -from pandas import ( - DataFrame, - date_range, -) -import pandas._testing as tm - - -@pytest.mark.parametrize( - "op", - [ - "sum", - "prod", - "min", - "max", - "median", - "mean", - "skew", - "std", - "var", - "sem", - ], -) -@pytest.mark.parametrize("axis", [0, 1]) -@pytest.mark.parametrize("skipna", [True, False]) -@pytest.mark.parametrize("sort", [True, False]) -def test_regression_allowlist_methods(op, axis, skipna, sort): - # GH6944 - # GH 17537 - # explicitly test the allowlist methods - raw_frame = DataFrame([0]) - if axis == 0: - frame = raw_frame - msg = "The 'axis' keyword in DataFrame.groupby is deprecated and will be" - else: - frame = raw_frame.T - msg = "DataFrame.groupby with axis=1 is deprecated" - - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = frame.groupby(level=0, axis=axis, sort=sort) - - if op == "skew": - # skew has skipna - result = getattr(grouped, op)(skipna=skipna) - expected = frame.groupby(level=0).apply( - lambda h: getattr(h, op)(axis=axis, skipna=skipna) - ) - if sort: - expected = expected.sort_index(axis=axis) - tm.assert_frame_equal(result, expected) - else: - result = getattr(grouped, op)() - expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(axis=axis)) - if sort: - expected = expected.sort_index(axis=axis) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "method", - [ - "count", - "corr", - "cummax", - "cummin", - "cumprod", - "describe", - "rank", - "quantile", - "diff", - "shift", - "all", - "any", - "idxmin", - "idxmax", - "ffill", - "bfill", - "pct_change", - ], -) -def test_groupby_selection_with_methods(df, method): - # some methods which require DatetimeIndex - rng = date_range("2014", periods=len(df)) - df.index = rng - - g = df.groupby(["A"])[["C"]] - g_exp = df[["C"]].groupby(df["A"]) - # TODO check groupby with > 1 col ? - - res = getattr(g, method)() - exp = getattr(g_exp, method)() - - # should always be frames! - tm.assert_frame_equal(res, exp) - - -def test_groupby_selection_other_methods(df): - # some methods which require DatetimeIndex - rng = date_range("2014", periods=len(df)) - df.columns.name = "foo" - df.index = rng - - g = df.groupby(["A"])[["C"]] - g_exp = df[["C"]].groupby(df["A"]) - - # methods which aren't just .foo() - tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) - msg = "DataFrameGroupBy.dtypes is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_frame_equal(g.dtypes, g_exp.dtypes) - tm.assert_frame_equal(g.apply(lambda x: x.sum()), g_exp.apply(lambda x: x.sum())) - - tm.assert_frame_equal(g.resample("D").mean(), g_exp.resample("D").mean()) - tm.assert_frame_equal(g.resample("D").ohlc(), g_exp.resample("D").ohlc()) - - tm.assert_frame_equal( - g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3) - ) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index ac192f190962d..159c620e36cdd 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1662,3 +1662,53 @@ def test_duplicate_columns(request, groupby_func, as_index): if groupby_func not in ("size", "ngroup", "cumcount"): expected = expected.rename(columns={"c": "b"}) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "op", + [ + "sum", + "prod", + "min", + "max", + "median", + "mean", + "skew", + "std", + "var", + "sem", + ], +) +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +def test_regression_allowlist_methods(op, axis, skipna, sort): + # GH6944 + # GH 17537 + # explicitly test the allowlist methods + raw_frame = DataFrame([0]) + if axis == 0: + frame = raw_frame + msg = "The 'axis' keyword in DataFrame.groupby is deprecated and will be" + else: + frame = raw_frame.T + msg = "DataFrame.groupby with axis=1 is deprecated" + + with tm.assert_produces_warning(FutureWarning, match=msg): + grouped = frame.groupby(level=0, axis=axis, sort=sort) + + if op == "skew": + # skew has skipna + result = getattr(grouped, op)(skipna=skipna) + expected = frame.groupby(level=0).apply( + lambda h: getattr(h, op)(axis=axis, skipna=skipna) + ) + if sort: + expected = expected.sort_index(axis=axis) + tm.assert_frame_equal(result, expected) + else: + result = getattr(grouped, op)() + expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(axis=axis)) + if sort: + expected = expected.sort_index(axis=axis) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 29382be60b08b..53148eb37e15a 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2981,3 +2981,65 @@ def test_groupby_sum_on_nan_should_return_nan(bug_var): expected_df = DataFrame([bug_var, bug_var, bug_var, np.nan], columns=["A"]) tm.assert_frame_equal(result, expected_df) + + +@pytest.mark.parametrize( + "method", + [ + "count", + "corr", + "cummax", + "cummin", + "cumprod", + "describe", + "rank", + "quantile", + "diff", + "shift", + "all", + "any", + "idxmin", + "idxmax", + "ffill", + "bfill", + "pct_change", + ], +) +def test_groupby_selection_with_methods(df, method): + # some methods which require DatetimeIndex + rng = date_range("2014", periods=len(df)) + df.index = rng + + g = df.groupby(["A"])[["C"]] + g_exp = df[["C"]].groupby(df["A"]) + # TODO check groupby with > 1 col ? + + res = getattr(g, method)() + exp = getattr(g_exp, method)() + + # should always be frames! + tm.assert_frame_equal(res, exp) + + +def test_groupby_selection_other_methods(df): + # some methods which require DatetimeIndex + rng = date_range("2014", periods=len(df)) + df.columns.name = "foo" + df.index = rng + + g = df.groupby(["A"])[["C"]] + g_exp = df[["C"]].groupby(df["A"]) + + # methods which aren't just .foo() + tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) + msg = "DataFrameGroupBy.dtypes is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + tm.assert_frame_equal(g.dtypes, g_exp.dtypes) + tm.assert_frame_equal(g.apply(lambda x: x.sum()), g_exp.apply(lambda x: x.sum())) + + tm.assert_frame_equal(g.resample("D").mean(), g_exp.resample("D").mean()) + tm.assert_frame_equal(g.resample("D").ohlc(), g_exp.resample("D").ohlc()) + + tm.assert_frame_equal( + g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3) + )