Skip to content

Commit

Permalink
deprecate categories and ordered parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 committed Jan 6, 2019
1 parent 2e8f46f commit a4cf7a2
Show file tree
Hide file tree
Showing 16 changed files with 138 additions and 155 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,7 @@ Deprecations
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`)
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)
- :meth:`Categorical.from_codes` has deprecated parameters ``categories`` and ``ordered``. Supply a :class:`~pandas.api.types.CategoricalDtype` to new parameter ``dtype`` instead. (:issue:`24398`)
- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`)
- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain
many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`)
Expand Down
35 changes: 23 additions & 12 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,9 +605,9 @@ def _from_inferred_categories(cls, inferred_categories, inferred_codes,
@classmethod
def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
"""
Make a Categorical type from codes and categories arrays.
Make a Categorical type from codes and CategoricalDtype.
This constructor is useful if you already have codes and categories and
This constructor is useful if you already have codes and the dtype and
so do not need the (computation intensive) factorization step, which is
usually done on the constructor.
Expand All @@ -621,19 +621,21 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
categories or -1 for NaN
categories : index-like, optional
The categories for the categorical. Items need to be unique.
.. deprecated:: 0.24.0
Use ``dtype`` instead.
ordered : bool, optional
Whether or not this categorical is treated as an ordered
categorical. If not given, the resulting categorical will be
unordered.
.. versionchanged:: 0.24.0
The default value has been changed to ``None``. Previously
the default value was ``False``.
dtype : CategoricalDtype, optional
.. deprecated:: 0.24.0
Use ``dtype`` instead.
dtype : CategoricalDtype
An instance of ``CategoricalDtype`` to use for this categorical.
.. versionadded:: 0.24.0
dtype will be required in the future.
Examples
--------
Expand All @@ -642,8 +644,18 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
[a, b, a, b]
Categories (2, object): [a < b]
"""
dtype = CategoricalDtype._from_values_or_dtype(codes, categories,
ordered, dtype)
if dtype is not None:
if categories is not None or ordered is not None:
raise ValueError("Cannot specify `categories` or `ordered` "
"together with `dtype`.")
elif categories is None and dtype is None:
raise ValueError("Must specify `dtype`.")
else:
msg = u("The 'categories' and 'ordered' keyword are deprecated "
"and will be removed in a future version. Please use "
"'dtype' instead.")
warn(msg, FutureWarning, stacklevel=2)
dtype = CategoricalDtype(categories, ordered)

codes = np.asarray(codes) # #21767
if not is_integer_dtype(codes):
Expand Down Expand Up @@ -1211,9 +1223,8 @@ def map(self, mapper):
"""
new_categories = self.categories.map(mapper)
try:
return self.from_codes(self._codes.copy(),
categories=new_categories,
ordered=self.ordered)
new_dtype = CategoricalDtype(new_categories, ordered=self.ordered)
return self.from_codes(self._codes.copy(), dtype=new_dtype)
except ValueError:
# NA values are represented in self._codes with -1
# np.take causes NA values to take final element in new_categories
Expand Down
15 changes: 7 additions & 8 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pandas.core.dtypes.common import (
ensure_categorical, is_categorical_dtype, is_datetime64_dtype, is_hashable,
is_list_like, is_scalar, is_timedelta64_dtype)
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.generic import ABCSeries

import pandas.core.algorithms as algorithms
Expand Down Expand Up @@ -292,21 +293,19 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
from pandas.core.groupby.categorical import recode_for_groupby
self.grouper, self.all_grouper = recode_for_groupby(
self.grouper, self.sort, observed)
categories = self.grouper.categories
dtype = CategoricalDtype(self.grouper.categories,
ordered=self.grouper.ordered)

# we make a CategoricalIndex out of the cat grouper
# preserving the categories / ordered attributes
self._labels = self.grouper.codes
if observed:
codes = algorithms.unique1d(self.grouper.codes)
else:
codes = np.arange(len(categories))
codes = np.arange(len(dtype.categories))

self._group_index = CategoricalIndex(
Categorical.from_codes(
codes=codes,
categories=categories,
ordered=self.grouper.ordered))
Categorical.from_codes(codes=codes, dtype=dtype))

# we are done
if isinstance(self.grouper, Grouping):
Expand Down Expand Up @@ -395,8 +394,8 @@ def _make_labels(self):

@cache_readonly
def groups(self):
return self.index.groupby(Categorical.from_codes(self.labels,
self.group_index))
return self.index.groupby(
Categorical(self.labels, self.group_index, fastpath=True))


def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
Expand Down
12 changes: 7 additions & 5 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
ensure_int64, ensure_platform_int, is_categorical_dtype, is_hashable,
is_integer, is_iterator, is_list_like, is_object_dtype, is_scalar,
pandas_dtype)
from pandas.core.dtypes.dtypes import ExtensionDtype, PandasExtensionDtype
from pandas.core.dtypes.dtypes import (
CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
from pandas.core.dtypes.generic import ABCDataFrame
from pandas.core.dtypes.missing import array_equivalent, isna

Expand Down Expand Up @@ -2026,13 +2027,14 @@ def _get_codes_for_sorting(self):
"""
from pandas.core.arrays import Categorical

def cats(level_codes):
return np.arange(np.array(level_codes).max() + 1 if
def as_dtype(level_codes):
cats = np.arange(np.array(level_codes).max() + 1 if
len(level_codes) else 0,
dtype=level_codes.dtype)
return CategoricalDtype(cats, ordered=True)

return [Categorical.from_codes(level_codes, cats(level_codes),
ordered=True)
return [Categorical.from_codes(level_codes,
dtype=as_dtype(level_codes))
for level_codes in self.codes]

def sortlevel(self, level=0, ascending=True, sort_remaining=True):
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from pandas.core.dtypes.common import (
is_categorical_dtype, is_datetime64tz_dtype, is_object_dtype,
needs_i8_conversion, pandas_dtype)
from pandas.core.dtypes.dtypes import CategoricalDtype as CDT

from pandas import ( # noqa:F401
Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Float64Index,
Expand Down Expand Up @@ -621,9 +622,8 @@ def decode(obj):
name=obj[u'name'])
elif typ == u'category':
from_codes = globals()[obj[u'klass']].from_codes
return from_codes(codes=obj[u'codes'],
categories=obj[u'categories'],
ordered=obj[u'ordered'])
dtype = CDT(obj[u'categories'], ordered=obj[u'ordered'])
return from_codes(codes=obj[u'codes'], dtype=dtype)

elif typ == u'interval':
return Interval(obj[u'left'], obj[u'right'], obj[u'closed'])
Expand Down
7 changes: 3 additions & 4 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ensure_int64, ensure_object, ensure_platform_int, is_categorical_dtype,
is_datetime64_dtype, is_datetime64tz_dtype, is_list_like,
is_timedelta64_dtype)
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.dtypes.missing import array_equivalent

from pandas import (
Expand Down Expand Up @@ -2206,10 +2207,8 @@ def convert(self, values, nan_rep, encoding, errors):
categories = categories[~mask]
codes[codes != -1] -= mask.astype(int).cumsum().values

self.data = Categorical.from_codes(codes,
categories=categories,
ordered=self.ordered)

dtype = CategoricalDtype(categories, ordered=self.ordered)
self.data = Categorical.from_codes(codes, dtype=dtype)
else:

try:
Expand Down
83 changes: 25 additions & 58 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,13 @@ class TestCategoricalConstructors(object):
def test_validate_ordered(self):
# see gh-14058
exp_msg = "'ordered' must either be 'True' or 'False'"
exp_err = TypeError

# This should be a boolean.
# This should be a boolean or None.
ordered = np.array([0, 1, 2])

with pytest.raises(exp_err, match=exp_msg):
with pytest.raises(TypeError, match=exp_msg):
Categorical([1, 2, 3], ordered=ordered)

with pytest.raises(exp_err, match=exp_msg):
Categorical.from_codes([0, 0, 1], categories=['a', 'b', 'c'],
ordered=ordered)

def test_constructor_empty(self):
# GH 17248
c = Categorical([])
Expand Down Expand Up @@ -421,76 +416,41 @@ def test_constructor_with_categorical_categories(self):
tm.assert_categorical_equal(result, expected)

def test_from_codes(self):
dtype = CategoricalDtype(categories=[1, 2])

# no dtype or categories
msg = 'Must specify `dtype`.'
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2])

# too few categories
dtype = CategoricalDtype(categories=[1, 2])
msg = "codes need to be between "
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([1, 2], dtype=dtype)

# no int codes
msg = "codes need to be array-like integers"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes(["a"], dtype=dtype)

# no unique categories
with pytest.raises(ValueError,
match="Categorical categories must be unique"):
Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"])

# NaN categories included
with pytest.raises(ValueError,
match="Categorial categories cannot be null"):
Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan])

# too negative
dtype = CategoricalDtype(categories=["a", "b", "c"])
msg = r"codes need to be between -1 and len\(categories\)-1"
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], categories=dtype.categories)
with pytest.raises(ValueError, match=msg):
Categorical.from_codes([-2, 1, 2], dtype=dtype)

exp = Categorical(["a", "b", "c"], ordered=False)
res = Categorical.from_codes([0, 1, 2], categories=dtype.categories)
tm.assert_categorical_equal(exp, res)

res = Categorical.from_codes([0, 1, 2], dtype=dtype)
tm.assert_categorical_equal(exp, res)

codes = np.random.choice([0, 1], 5, p=[0.9, 0.1])
dtype = CategoricalDtype(categories=["train", "test"])
Categorical.from_codes(codes, categories=dtype.categories)
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_with_categorical_categories(self):
# GH17884
expected = Categorical(['a', 'b'], categories=['a', 'b', 'c'])

result = Categorical.from_codes(
[0, 1], categories=Categorical(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

result = Categorical.from_codes(
[0, 1], categories=CategoricalIndex(['a', 'b', 'c']))
tm.assert_categorical_equal(result, expected)

# non-unique Categorical still raises
with pytest.raises(ValueError,
match="Categorical categories must be unique"):
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))

def test_from_codes_with_nan_code(self):
# GH21767
codes = [1, 2, np.nan]
dtype = CategoricalDtype(categories=['a', 'b', 'c'])
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, categories=dtype.categories)
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)
Expand All @@ -500,36 +460,43 @@ def test_from_codes_with_float(self):
codes = [1.0, 2.0, 0] # integer, but in float dtype
dtype = CategoricalDtype(categories=['a', 'b', 'c'])

with tm.assert_produces_warning(FutureWarning):
cat = Categorical.from_codes(codes, dtype.categories)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

with tm.assert_produces_warning(FutureWarning):
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
cat = Categorical.from_codes(codes, dtype=dtype)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

codes = [1.1, 2.0, 0] # non-integer
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype.categories)
with pytest.raises(ValueError,
match="codes need to be array-like integers"):
Categorical.from_codes(codes, dtype=dtype)

def test_from_codes_deprecated(self, ordered):
# GH24398
cats = ['a', 'b']
with tm.assert_produces_warning(FutureWarning):
Categorical.from_codes([0, 1], categories=cats)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Categorical.from_codes([0, 1], categories=cats, ordered=True)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
Categorical.from_codes([0, 1], categories=cats, ordered=False)

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories(self, dtype):
cats = ['a', 'b']
codes = np.array([0, 0, 1, 1], dtype='i8')
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical.from_codes(codes, cats)
expected = Categorical.from_codes(codes,
dtype=CategoricalDtype(cats))
tm.assert_categorical_equal(result, expected)

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories_sorts(self, dtype):
cats = ['b', 'a']
codes = np.array([0, 1, 1, 1], dtype='i8')
result = Categorical._from_inferred_categories(cats, codes, dtype)
expected = Categorical.from_codes([1, 0, 0, 0], ['a', 'b'])
expected = Categorical.from_codes([1, 0, 0, 0],
dtype=CategoricalDtype(['a', 'b']))
tm.assert_categorical_equal(result, expected)

def test_from_inferred_categories_dtype(self):
Expand Down
28 changes: 16 additions & 12 deletions pandas/tests/arrays/categorical/test_subclass.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
# -*- coding: utf-8 -*-

from pandas import Categorical
from pandas.api.types import CategoricalDtype
import pandas.util.testing as tm


class TestCategoricalSubclassing(object):

def test_constructor(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
assert isinstance(sc, tm.SubclassedCategorical)
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
assert isinstance(subclassed, tm.SubclassedCategorical)
tm.assert_categorical_equal(subclassed, Categorical(['a', 'b', 'c']))

def test_from_codes(self):
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
assert isinstance(sc, tm.SubclassedCategorical)
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
tm.assert_categorical_equal(sc, exp)
dtype = CategoricalDtype(['a', 'b', 'c'])
subclassed = tm.SubclassedCategorical.from_codes([1, 0, 2],
dtype=dtype)
assert isinstance(subclassed, tm.SubclassedCategorical)

expected = Categorical.from_codes([1, 0, 2], dtype=dtype)
tm.assert_categorical_equal(subclassed, expected)

def test_map(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
res = sc.map(lambda x: x.upper())
assert isinstance(res, tm.SubclassedCategorical)
exp = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(res, exp)
subclassed = tm.SubclassedCategorical(['a', 'b', 'c'])
result = subclassed.map(lambda x: x.upper())
assert isinstance(result, tm.SubclassedCategorical)
expected = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(result, expected)
Loading

0 comments on commit a4cf7a2

Please sign in to comment.