Skip to content

Commit

Permalink
REF/TST: misplaced Categorical tests (#37678)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Nov 8, 2020
1 parent fcfaf78 commit fc38f46
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 170 deletions.
115 changes: 0 additions & 115 deletions pandas/tests/arrays/categorical/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,30 +59,6 @@ def test_isin_cats():
tm.assert_numpy_array_equal(expected, result)


@pytest.mark.parametrize(
"to_replace, value, result, expected_error_msg",
[
("b", "c", ["a", "c"], "Categorical.categories are different"),
("c", "d", ["a", "b"], None),
# https://github.com/pandas-dev/pandas/issues/33288
("a", "a", ["a", "b"], None),
("b", None, ["a", None], "Categorical.categories length are different"),
],
)
def test_replace(to_replace, value, result, expected_error_msg):
# GH 26988
cat = pd.Categorical(["a", "b"])
expected = pd.Categorical(result)
result = cat.replace(to_replace, value)
tm.assert_categorical_equal(result, expected)
if to_replace == "b": # the "c" test is supposed to be unchanged
with pytest.raises(AssertionError, match=expected_error_msg):
# ensure non-inplace call does not affect original
tm.assert_categorical_equal(cat, expected)
cat.replace(to_replace, value, inplace=True)
tm.assert_categorical_equal(cat, expected)


@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])])
def test_isin_empty(empty):
s = pd.Categorical(["a", "b"])
Expand All @@ -105,94 +81,3 @@ def test_diff():
result = df.diff()

tm.assert_frame_equal(result, expected)


class TestTake:
# https://github.com/pandas-dev/pandas/issues/20664

def test_take_default_allow_fill(self):
cat = pd.Categorical(["a", "b"])
with tm.assert_produces_warning(None):
result = cat.take([0, -1])

assert result.equals(cat)

def test_take_positive_no_warning(self):
cat = pd.Categorical(["a", "b"])
with tm.assert_produces_warning(None):
cat.take([0, 0])

def test_take_bounds(self, allow_fill):
# https://github.com/pandas-dev/pandas/issues/20664
cat = pd.Categorical(["a", "b", "a"])
if allow_fill:
msg = "indices are out-of-bounds"
else:
msg = "index 4 is out of bounds for( axis 0 with)? size 3"
with pytest.raises(IndexError, match=msg):
cat.take([4, 5], allow_fill=allow_fill)

def test_take_empty(self, allow_fill):
# https://github.com/pandas-dev/pandas/issues/20664
cat = pd.Categorical([], categories=["a", "b"])
if allow_fill:
msg = "indices are out-of-bounds"
else:
msg = "cannot do a non-empty take from an empty axes"
with pytest.raises(IndexError, match=msg):
cat.take([0], allow_fill=allow_fill)

def test_positional_take(self, ordered):
cat = pd.Categorical(
["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered
)
result = cat.take([0, 1, 2], allow_fill=False)
expected = pd.Categorical(
["a", "a", "b"], categories=cat.categories, ordered=ordered
)
tm.assert_categorical_equal(result, expected)

def test_positional_take_unobserved(self, ordered):
cat = pd.Categorical(["a", "b"], categories=["a", "b", "c"], ordered=ordered)
result = cat.take([1, 0], allow_fill=False)
expected = pd.Categorical(
["b", "a"], categories=cat.categories, ordered=ordered
)
tm.assert_categorical_equal(result, expected)

def test_take_allow_fill(self):
# https://github.com/pandas-dev/pandas/issues/23296
cat = pd.Categorical(["a", "a", "b"])
result = cat.take([0, -1, -1], allow_fill=True)
expected = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b"])
tm.assert_categorical_equal(result, expected)

def test_take_fill_with_negative_one(self):
# -1 was a category
cat = pd.Categorical([-1, 0, 1])
result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1)
expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1])
tm.assert_categorical_equal(result, expected)

def test_take_fill_value(self):
# https://github.com/pandas-dev/pandas/issues/23296
cat = pd.Categorical(["a", "b", "c"])
result = cat.take([0, 1, -1], fill_value="a", allow_fill=True)
expected = pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"])
tm.assert_categorical_equal(result, expected)

def test_take_fill_value_new_raises(self):
# https://github.com/pandas-dev/pandas/issues/23296
cat = pd.Categorical(["a", "b", "c"])
xpr = r"'fill_value=d' is not present in this Categorical's categories"
with pytest.raises(ValueError, match=xpr):
cat.take([0, 1, -1], fill_value="d", allow_fill=True)

def test_take_nd_deprecated(self):
cat = pd.Categorical(["a", "b", "c"])
with tm.assert_produces_warning(FutureWarning):
cat.take_nd([0, 1])

ci = pd.Index(cat)
with tm.assert_produces_warning(FutureWarning):
ci.take_nd([0, 1])
7 changes: 0 additions & 7 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,3 @@ def test_validate_inplace_raises(self, value):

with pytest.raises(ValueError, match=msg):
cat.sort_values(inplace=value)

def test_isna(self):
exp = np.array([False, False, True])
c = Categorical(["a", "b", np.nan])
res = c.isna()

tm.assert_numpy_array_equal(res, exp)
61 changes: 60 additions & 1 deletion pandas/tests/arrays/categorical/test_indexing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
import numpy as np
import pytest

from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series
from pandas import (
Categorical,
CategoricalIndex,
Index,
Interval,
IntervalIndex,
PeriodIndex,
Series,
Timedelta,
Timestamp,
)
import pandas._testing as tm
import pandas.core.common as com
from pandas.tests.arrays.categorical.common import TestCategorical
Expand Down Expand Up @@ -256,6 +266,55 @@ def test_where_ordered_differs_rasies(self):
ser.where([True, False, True], other)


class TestContains:
def test_contains(self):
# GH#21508
c = Categorical(list("aabbca"), categories=list("cab"))

assert "b" in c
assert "z" not in c
assert np.nan not in c
with pytest.raises(TypeError, match="unhashable type: 'list'"):
assert [1] in c

# assert codes NOT in index
assert 0 not in c
assert 1 not in c

c = Categorical(list("aabbca") + [np.nan], categories=list("cab"))
assert np.nan in c

@pytest.mark.parametrize(
"item, expected",
[
(Interval(0, 1), True),
(1.5, True),
(Interval(0.5, 1.5), False),
("a", False),
(Timestamp(1), False),
(Timedelta(1), False),
],
ids=str,
)
def test_contains_interval(self, item, expected):
# GH#23705
cat = Categorical(IntervalIndex.from_breaks(range(3)))
result = item in cat
assert result is expected

def test_contains_list(self):
# GH#21729
cat = Categorical([1, 2, 3])

assert "a" not in cat

with pytest.raises(TypeError, match="unhashable type"):
["a"] in cat

with pytest.raises(TypeError, match="unhashable type"):
["a", "b"] in cat


@pytest.mark.parametrize("index", [True, False])
def test_mask_with_boolean(index):
s = Series(range(3))
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/arrays/categorical/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@


class TestCategoricalMissing:
def test_isna(self):
exp = np.array([False, False, True])
cat = Categorical(["a", "b", np.nan])
res = cat.isna()

tm.assert_numpy_array_equal(res, exp)

def test_na_flags_int_categories(self):
# #1457

Expand Down
47 changes: 0 additions & 47 deletions pandas/tests/arrays/categorical/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,50 +395,3 @@ def test_numeric_like_ops(self):
msg = "Object with dtype category cannot perform the numpy op log"
with pytest.raises(TypeError, match=msg):
np.log(s)

def test_contains(self):
# GH21508
c = Categorical(list("aabbca"), categories=list("cab"))

assert "b" in c
assert "z" not in c
assert np.nan not in c
with pytest.raises(TypeError, match="unhashable type: 'list'"):
assert [1] in c

# assert codes NOT in index
assert 0 not in c
assert 1 not in c

c = Categorical(list("aabbca") + [np.nan], categories=list("cab"))
assert np.nan in c

@pytest.mark.parametrize(
"item, expected",
[
(pd.Interval(0, 1), True),
(1.5, True),
(pd.Interval(0.5, 1.5), False),
("a", False),
(pd.Timestamp(1), False),
(pd.Timedelta(1), False),
],
ids=str,
)
def test_contains_interval(self, item, expected):
# GH 23705
cat = Categorical(pd.IntervalIndex.from_breaks(range(3)))
result = item in cat
assert result is expected

def test_contains_list(self):
# GH#21729
cat = Categorical([1, 2, 3])

assert "a" not in cat

with pytest.raises(TypeError, match="unhashable type"):
["a"] in cat

with pytest.raises(TypeError, match="unhashable type"):
["a", "b"] in cat
26 changes: 26 additions & 0 deletions pandas/tests/arrays/categorical/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

import pandas as pd
from pandas import Categorical
import pandas._testing as tm


Expand Down Expand Up @@ -45,3 +46,28 @@ def test_replace(to_replace, value, expected, flip_categories):

tm.assert_series_equal(expected, result, check_category_order=False)
tm.assert_series_equal(expected, s, check_category_order=False)


@pytest.mark.parametrize(
"to_replace, value, result, expected_error_msg",
[
("b", "c", ["a", "c"], "Categorical.categories are different"),
("c", "d", ["a", "b"], None),
# https://github.com/pandas-dev/pandas/issues/33288
("a", "a", ["a", "b"], None),
("b", None, ["a", None], "Categorical.categories length are different"),
],
)
def test_replace2(to_replace, value, result, expected_error_msg):
# TODO: better name
# GH#26988
cat = Categorical(["a", "b"])
expected = Categorical(result)
result = cat.replace(to_replace, value)
tm.assert_categorical_equal(result, expected)
if to_replace == "b": # the "c" test is supposed to be unchanged
with pytest.raises(AssertionError, match=expected_error_msg):
# ensure non-inplace call does not affect original
tm.assert_categorical_equal(cat, expected)
cat.replace(to_replace, value, inplace=True)
tm.assert_categorical_equal(cat, expected)

0 comments on commit fc38f46

Please sign in to comment.