From 4d9ffcf83dc356c47e302c1558a40cf148019218 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <2736230899@qq.com> Date: Sat, 11 May 2024 07:01:48 +0800 Subject: [PATCH] BUG: Improve error message when `transfrom()` with incorrect axis (#58494) --- pandas/core/apply.py | 3 ++- pandas/tests/apply/test_invalid_arg.py | 18 +++++++++++++++--- .../tests/groupby/aggregate/test_aggregate.py | 5 ++--- pandas/tests/groupby/aggregate/test_other.py | 4 ++-- pandas/tests/resample/test_resample_api.py | 10 +++++----- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 832beeddcef3c..32e8aea7ea8ab 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -628,7 +628,8 @@ def normalize_dictlike_arg( cols = Index(list(func.keys())).difference(obj.columns, sort=True) if len(cols) > 0: - raise KeyError(f"Column(s) {list(cols)} do not exist") + # GH 58474 + raise KeyError(f"Label(s) {list(cols)} do not exist") aggregator_types = (list, tuple, dict) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index b5ad1094f5bf5..3137d3ff50954 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -118,15 +118,15 @@ def test_dict_nested_renaming_depr(method): def test_missing_column(method, func): # GH 40004 obj = DataFrame({"A": [1]}) - match = re.escape("Column(s) ['B'] do not exist") - with pytest.raises(KeyError, match=match): + msg = r"Label\(s\) \['B'\] do not exist" + with pytest.raises(KeyError, match=msg): getattr(obj, method)(func) def test_transform_mixed_column_name_dtypes(): # GH39025 df = DataFrame({"a": ["1"]}) - msg = r"Column\(s\) \[1, 'b'\] do not exist" + msg = r"Label\(s\) \[1, 'b'\] do not exist" with pytest.raises(KeyError, match=msg): df.transform({"a": int, 1: str, "b": int}) @@ -359,3 +359,15 @@ def test_transform_reducer_raises(all_reductions, frame_or_series, op_wrapper): msg = "Function did not transform" with pytest.raises(ValueError, match=msg): obj.transform(op) + + +def test_transform_missing_labels_raises(): + # GH 58474 + df = DataFrame({"foo": [2, 4, 6], "bar": [1, 2, 3]}, index=["A", "B", "C"]) + msg = r"Label\(s\) \['A', 'B'\] do not exist" + with pytest.raises(KeyError, match=msg): + df.transform({"A": lambda x: x + 2, "B": lambda x: x * 2}, axis=0) + + msg = r"Label\(s\) \['bar', 'foo'\] do not exist" + with pytest.raises(KeyError, match=msg): + df.transform({"foo": lambda x: x + 2, "bar": lambda x: x * 2}, axis=1) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2b9df1b7079da..3362d6209af6d 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -5,7 +5,6 @@ import datetime import functools from functools import partial -import re import numpy as np import pytest @@ -816,8 +815,8 @@ def test_agg_relabel_other_raises(self): def test_missing_raises(self): df = DataFrame({"A": [0, 1], "B": [1, 2]}) - match = re.escape("Column(s) ['C'] do not exist") - with pytest.raises(KeyError, match=match): + msg = r"Label\(s\) \['C'\] do not exist" + with pytest.raises(KeyError, match=msg): df.groupby("A").agg(c=("C", "sum")) def test_agg_namedtuple(self): diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 12f99e3cf7a63..78f2917e9a057 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -209,7 +209,7 @@ def test_aggregate_api_consistency(): expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]]) - msg = r"Column\(s\) \['r', 'r2'\] do not exist" + msg = r"Label\(s\) \['r', 'r2'\] do not exist" with pytest.raises(KeyError, match=msg): grouped[["D", "C"]].agg({"r": "sum", "r2": "mean"}) @@ -224,7 +224,7 @@ def test_agg_dict_renaming_deprecation(): {"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}} ) - msg = r"Column\(s\) \['ma'\] do not exist" + msg = r"Label\(s\) \['ma'\] do not exist" with pytest.raises(KeyError, match=msg): df.groupby("A")[["B", "C"]].agg({"ma": "max"}) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index a77097fd5ce61..bf1f6bd34b171 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -328,7 +328,7 @@ def test_agg_consistency(): r = df.resample("3min") - msg = r"Column\(s\) \['r1', 'r2'\] do not exist" + msg = r"Label\(s\) \['r1', 'r2'\] do not exist" with pytest.raises(KeyError, match=msg): r.agg({"r1": "mean", "r2": "sum"}) @@ -343,7 +343,7 @@ def test_agg_consistency_int_str_column_mix(): r = df.resample("3min") - msg = r"Column\(s\) \[2, 'b'\] do not exist" + msg = r"Label\(s\) \[2, 'b'\] do not exist" with pytest.raises(KeyError, match=msg): r.agg({2: "mean", "b": "sum"}) @@ -534,7 +534,7 @@ def test_agg_with_lambda(cases, agg): ], ) def test_agg_no_column(cases, agg): - msg = r"Column\(s\) \['result1', 'result2'\] do not exist" + msg = r"Label\(s\) \['result1', 'result2'\] do not exist" with pytest.raises(KeyError, match=msg): cases[["A", "B"]].agg(**agg) @@ -582,7 +582,7 @@ def test_agg_specificationerror_series(cases, agg): def test_agg_specificationerror_invalid_names(cases): # errors # invalid names in the agg specification - msg = r"Column\(s\) \['B'\] do not exist" + msg = r"Label\(s\) \['B'\] do not exist" with pytest.raises(KeyError, match=msg): cases[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) @@ -631,7 +631,7 @@ def test_try_aggregate_non_existing_column(): df = DataFrame(data).set_index("dt") # Error as we don't have 'z' column - msg = r"Column\(s\) \['z'\] do not exist" + msg = r"Label\(s\) \['z'\] do not exist" with pytest.raises(KeyError, match=msg): df.resample("30min").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})