Skip to content

Commit

Permalink
Merge 4db1296 into 20749a1
Browse files Browse the repository at this point in the history
  • Loading branch information
thequackdaddy committed Oct 24, 2016
2 parents 20749a1 + 4db1296 commit 2379c1c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
30 changes: 23 additions & 7 deletions patsy/categorical.py
Expand Up @@ -42,6 +42,7 @@
have_pandas, have_pandas_categorical,
have_pandas_categorical_dtype,
safe_is_pandas_categorical,
safe_pandas_Categorical_reorder,
pandas_Categorical_from_codes,
pandas_Categorical_categories,
pandas_Categorical_codes,
Expand Down Expand Up @@ -309,21 +310,26 @@ def categorical_to_int(data, levels, NA_action, origin=None):
assert isinstance(levels, tuple)
# In this function, missing values are always mapped to -1

if isinstance(data, _CategoricalBox):
if data.levels is not None and tuple(data.levels) != levels:
raise PatsyError("mismatching levels: expected %r, got %r"
% (levels, tuple(data.levels)), origin)
data = data.data

if safe_is_pandas_categorical(data):
data_levels_tuple = tuple(pandas_Categorical_categories(data))
if not data_levels_tuple == levels:
if not set(data_levels_tuple) == set(levels):
raise PatsyError("mismatching levels: expected %r, got %r"
% (levels, data_levels_tuple), origin)
if not data_levels_tuple == levels:
if isinstance(data, pandas.Categorical):
data = safe_pandas_Categorical_reorder(data, levels)
else:
data = safe_pandas_Categorical_reorder(data.cat, levels)
# pandas.Categorical also uses -1 to indicate NA, and we don't try to
# second-guess its NA detection, so we can just pass it back.
return pandas_Categorical_codes(data)

if isinstance(data, _CategoricalBox):
if data.levels is not None and tuple(data.levels) != levels:
raise PatsyError("mismatching levels: expected %r, got %r"
% (levels, tuple(data.levels)), origin)
data = data.data

data = _categorical_shape_fix(data)

try:
Expand Down Expand Up @@ -402,11 +408,21 @@ def Series_from_codes(codes, categories):
con([1, 0], ("a", "b")),
("a", "c"),
NAAction())

# I don't think this test is necesssary. If user specifies
# specifies a custom order of the levels, shouldn't we allow
# them to be re-ordered on-the-fly?

# Contradicts test_highlevel.test_C_and_pandas_categorical
# test where levels can be re-ordered.

"""
assert_raises(PatsyError,
categorical_to_int,
con([1, 0], ("a", "b")),
("b", "a"),
NAAction())
"""

def t(data, levels, expected, NA_action=NAAction()):
got = categorical_to_int(data, levels, NA_action)
Expand Down
22 changes: 22 additions & 0 deletions patsy/util.py
Expand Up @@ -21,6 +21,7 @@
"no_pickling",
"assert_no_pickling",
"safe_string_eq",
"test_safe_pandas_Categorical_reorder",
]

import sys
Expand Down Expand Up @@ -610,6 +611,27 @@ def test_pandas_Categorical_from_codes():
assert np.all(np.asarray(c)[:-1] == ["b", "b", "a"])
assert np.isnan(np.asarray(c)[-1])


def safe_pandas_Categorical_reorder(categorical, newlevels):
if hasattr(categorical, 'reorder_categories'):
return categorical.reorder_categories(newlevels, ordered=False)
data = np.asarray(categorical).tolist()
data = [np.where(d==np.array(newlevels))[0][0]
if not pandas.isnull(d) else -1 for d in data]
return pandas_Categorical_from_codes(data, newlevels)


def test_safe_pandas_Categorical_reorder():
c = pandas_Categorical_from_codes([1, 1, 0, -1], ["a", "b"])
c = safe_pandas_Categorical_reorder(c, ['b', 'a'])
assert np.all(np.asarray(c)[:-1] == ["b", "b", "a"])
assert np.isnan(np.asarray(c)[-1])
if hasattr(c, 'categories'):
assert np.all(c.categories==['b', 'a'])
else:
assert np.all(c.levels==['b', 'a'])


# Needed to support pandas < 0.15
def pandas_Categorical_categories(cat):
# In 0.15+, a categorical Series has a .cat attribute which is similar to
Expand Down

0 comments on commit 2379c1c

Please sign in to comment.