From 7aeccd392f0c07ecd1c8d929a0d0312fd8151971 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 26 Oct 2017 18:54:22 -0500 Subject: [PATCH] Rename categories with Series (#17982) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * PERF/API: Treat series as array-like for rename_categories HEAD: ``` [ 50.00%] ··· Running categoricals.Categoricals3.time_rank_string_cat 6.63ms [ 50.00%] ····· [100.00%] ··· Running categoricals.Categoricals3.time_rank_string_cat_ordered 4.85ms ``` Closes https://github.com/pandas-dev/pandas/issues/17981 * Redo docstring * Use list-like * Warn * Fix doc indent * Doc cleanup * More doc cleanup * Fix API reference * Typos --- doc/source/whatsnew/v0.21.0.txt | 31 +++++++++++++++++++- pandas/core/categorical.py | 49 ++++++++++++++++++++++++++------ pandas/tests/test_categorical.py | 12 ++++++++ 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 18f8858748df5..11106554483e0 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -239,6 +239,36 @@ Now, to find prices per store/product, we can simply do: .pipe(lambda grp: grp.Revenue.sum()/grp.Quantity.sum()) .unstack().round(2)) + +.. _whatsnew_0210.enhancements.reanme_categories: + +``Categorical.rename_categories`` accepts a dict-like +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`~Series.cat.rename_categories` now accepts a dict-like argument for +``new_categories``. The previous categories are looked up in the dictionary's +keys and replaced if found. The behavior of missing and extra keys is the same +as in :meth:`DataFrame.rename`. + +.. ipython:: python + + c = pd.Categorical(['a', 'a', 'b']) + c.rename_categories({"a": "eh", "b": "bee"}) + +.. warning:: + + To assist with upgrading pandas, ``rename_categories`` treats ``Series`` as + list-like. Typically, they are considered to be dict-like, and in a future + version of pandas ``rename_categories`` will change to treat them as + dict-like. + + .. ipython:: python + :okwarning: + + c.rename_categories(pd.Series([0, 1], index=['a', 'c'])) + + Follow the warning message's recommendations. + See the :ref:`documentation ` for more. .. _whatsnew_0210.enhancements.other: @@ -267,7 +297,6 @@ Other Enhancements - :func:`DataFrame.items` and :func:`Series.items` are now present in both Python 2 and 3 and is lazy in all cases. (:issue:`13918`, :issue:`17213`) - :func:`Styler.where` has been implemented as a convenience for :func:`Styler.applymap`. (:issue:`17474`) - :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) -- :func:`Categorical.rename_categories` now accepts a dict-like argument as ``new_categories`` and only updates the categories found in that dict. (:issue:`17336`) - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) - :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names. (:issue:`14207`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index e8537fb576536..e709c771b7d18 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -866,11 +866,6 @@ def set_categories(self, new_categories, ordered=None, rename=False, def rename_categories(self, new_categories, inplace=False): """ Renames categories. - The new categories can be either a list-like dict-like object. - If it is list-like, all items must be unique and the number of items - in the new categories must be the same as the number of items in the - old categories. - Raises ------ ValueError @@ -879,15 +874,30 @@ def rename_categories(self, new_categories, inplace=False): Parameters ---------- - new_categories : Index-like or dict-like (>=0.21.0) - The renamed categories. + new_categories : list-like or dict-like + + * list-like: all items must be unique and the number of items in + the new categories must match the existing number of categories. + + * dict-like: specifies a mapping from + old categories to new. Categories not contained in the mapping + are passed through and extra categories in the mapping are + ignored. *New in version 0.21.0*. + + .. warning:: + + Currently, Series are considered list like. In a future version + of pandas they'll be considered dict-like. + inplace : boolean (default: False) Whether or not to rename the categories inplace or return a copy of this categorical with renamed categories. Returns ------- - cat : Categorical with renamed categories added or None if inplace. + cat : Categorical or None + With ``inplace=False``, the new categorical is returned. + With ``inplace=True``, there is no return value. See also -------- @@ -896,10 +906,33 @@ def rename_categories(self, new_categories, inplace=False): remove_categories remove_unused_categories set_categories + + Examples + -------- + >>> c = Categorical(['a', 'a', 'b']) + >>> c.rename_categories([0, 1]) + [0, 0, 1] + Categories (2, int64): [0, 1] + + For dict-like ``new_categories``, extra keys are ignored and + categories not in the dictionary are passed through + + >>> c.rename_categories({'a': 'A', 'c': 'C'}) + [A, A, b] + Categories (2, object): [A, b] """ inplace = validate_bool_kwarg(inplace, 'inplace') cat = self if inplace else self.copy() + if isinstance(new_categories, ABCSeries): + msg = ("Treating Series 'new_categories' as a list-like and using " + "the values. In a future version, 'rename_categories' will " + "treat Series like a dictionary.\n" + "For dict-like, use 'new_categories.to_dict()'\n" + "For list-like, use 'new_categories.values'.") + warn(msg, FutureWarning, stacklevel=2) + new_categories = list(new_categories) + if is_dict_like(new_categories): cat.categories = [new_categories.get(item, item) for item in cat.categories] diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index d88e92a39a6c5..272ba25bf8f8a 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1203,6 +1203,18 @@ def test_rename_categories(self): with pytest.raises(ValueError): cat.rename_categories([1, 2]) + def test_rename_categories_series(self): + # https://github.com/pandas-dev/pandas/issues/17981 + c = pd.Categorical(['a', 'b']) + xpr = "Treating Series 'new_categories' as a list-like " + with tm.assert_produces_warning(FutureWarning) as rec: + result = c.rename_categories(pd.Series([0, 1])) + + assert len(rec) == 1 + assert xpr in str(rec[0].message) + expected = pd.Categorical([0, 1]) + tm.assert_categorical_equal(result, expected) + def test_rename_categories_dict(self): # GH 17336 cat = pd.Categorical(['a', 'b', 'c', 'd'])