Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST/CLN: series.duplicated; parametrisation; fix warning #21899

Merged
merged 7 commits into from
Jul 16, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 39 additions & 11 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,19 @@ def tz_aware_fixture(request):
return request.param


@pytest.fixture(params=[str, 'str', 'U'])
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gfyoung this is ok? it follows our existing pattern?

Copy link
Contributor Author

@h-vetinari h-vetinari Jul 14, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback @gfyoung I only added COMPLEX_DTYPES, STRING_DTYPES (which was just giving names to existing collections) and ALL_NUMPY_DTYPES - the rest was just reordering.

ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES

FLOAT_DTYPES = [float, "float32", "float64"]
COMPLEX_DTYPES = [complex, "complex64", "complex128"]
STRING_DTYPES = [str, 'str', 'U']

ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES
ALL_NUMPY_DTYPES = ALL_REAL_DTYPES + STRING_DTYPES + COMPLEX_DTYPES


@pytest.fixture(params=STRING_DTYPES)
def string_dtype(request):
"""Parametrized fixture for string dtypes.

Expand All @@ -259,9 +271,6 @@ def string_dtype(request):
return request.param


FLOAT_DTYPES = [float, "float32", "float64"]


@pytest.fixture(params=FLOAT_DTYPES)
def float_dtype(request):
"""
Expand All @@ -274,7 +283,7 @@ def float_dtype(request):
return request.param


@pytest.fixture(params=[complex, "complex64", "complex128"])
@pytest.fixture(params=COMPLEX_DTYPES)
def complex_dtype(request):
"""
Parameterized fixture for complex dtypes.
Expand All @@ -286,12 +295,6 @@ def complex_dtype(request):
return request.param


UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES


@pytest.fixture(params=SIGNED_INT_DTYPES)
def sint_dtype(request):
"""
Expand Down Expand Up @@ -358,6 +361,31 @@ def any_real_dtype(request):
return request.param


@pytest.fixture(params=ALL_NUMPY_DTYPES)
def any_numpy_dtype(request):
"""
Parameterized fixture for any numpy dtypes.

* int8
* uint8
* int16
* uint16
* int32
* uint32
* int64
* uint64
* float32
* float64
* complex64
* complex128
* str
* 'str'
* 'U'
"""

return request.param


@pytest.fixture
def mock():
"""
Expand Down
168 changes: 2 additions & 166 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,144 +907,6 @@ def test_matmul(self):
pytest.raises(Exception, a.dot, a.values[:3])
pytest.raises(ValueError, a.dot, b.T)

def test_value_counts_nunique(self):

# basics.rst doc example
series = Series(np.random.randn(500))
series[20:500] = np.nan
series[10:20] = 5000
result = series.nunique()
assert result == 11

# GH 18051
s = pd.Series(pd.Categorical([]))
assert s.nunique() == 0
s = pd.Series(pd.Categorical([np.nan]))
assert s.nunique() == 0

def test_unique(self):

# 714 also, dtype=float
s = Series([1.2345] * 100)
s[::2] = np.nan
result = s.unique()
assert len(result) == 2

s = Series([1.2345] * 100, dtype='f4')
s[::2] = np.nan
result = s.unique()
assert len(result) == 2

# NAs in object arrays #714
s = Series(['foo'] * 100, dtype='O')
s[::2] = np.nan
result = s.unique()
assert len(result) == 2

# decision about None
s = Series([1, 2, 3, None, None, None], dtype=object)
result = s.unique()
expected = np.array([1, 2, 3, None], dtype=object)
tm.assert_numpy_array_equal(result, expected)

# GH 18051
s = pd.Series(pd.Categorical([]))
tm.assert_categorical_equal(s.unique(), pd.Categorical([]),
check_dtype=False)
s = pd.Series(pd.Categorical([np.nan]))
tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]),
check_dtype=False)

@pytest.mark.parametrize(
"tc1, tc2",
[
(
Series([1, 2, 3, 3], dtype=np.dtype('int_')),
Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_'))
),
(
Series([1, 2, 3, 3], dtype=np.dtype('uint')),
Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint'))
),
(
Series([1, 2, 3, 3], dtype=np.dtype('float_')),
Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_'))
),
(
Series([1, 2, 3, 3], dtype=np.dtype('unicode_')),
Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_'))
)
]
)
def test_drop_duplicates_non_bool(self, tc1, tc2):
# Test case 1
expected = Series([False, False, False, True])
assert_series_equal(tc1.duplicated(), expected)
assert_series_equal(tc1.drop_duplicates(), tc1[~expected])
sc = tc1.copy()
sc.drop_duplicates(inplace=True)
assert_series_equal(sc, tc1[~expected])

expected = Series([False, False, True, False])
assert_series_equal(tc1.duplicated(keep='last'), expected)
assert_series_equal(tc1.drop_duplicates(keep='last'), tc1[~expected])
sc = tc1.copy()
sc.drop_duplicates(keep='last', inplace=True)
assert_series_equal(sc, tc1[~expected])

expected = Series([False, False, True, True])
assert_series_equal(tc1.duplicated(keep=False), expected)
assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected])
sc = tc1.copy()
sc.drop_duplicates(keep=False, inplace=True)
assert_series_equal(sc, tc1[~expected])

# Test case 2
expected = Series([False, False, False, False, True, True, False])
assert_series_equal(tc2.duplicated(), expected)
assert_series_equal(tc2.drop_duplicates(), tc2[~expected])
sc = tc2.copy()
sc.drop_duplicates(inplace=True)
assert_series_equal(sc, tc2[~expected])

expected = Series([False, True, True, False, False, False, False])
assert_series_equal(tc2.duplicated(keep='last'), expected)
assert_series_equal(tc2.drop_duplicates(keep='last'), tc2[~expected])
sc = tc2.copy()
sc.drop_duplicates(keep='last', inplace=True)
assert_series_equal(sc, tc2[~expected])

expected = Series([False, True, True, False, True, True, False])
assert_series_equal(tc2.duplicated(keep=False), expected)
assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected])
sc = tc2.copy()
sc.drop_duplicates(keep=False, inplace=True)
assert_series_equal(sc, tc2[~expected])

def test_drop_duplicates_bool(self):
tc = Series([True, False, True, False])

expected = Series([False, False, True, True])
assert_series_equal(tc.duplicated(), expected)
assert_series_equal(tc.drop_duplicates(), tc[~expected])
sc = tc.copy()
sc.drop_duplicates(inplace=True)
assert_series_equal(sc, tc[~expected])

expected = Series([True, True, False, False])
assert_series_equal(tc.duplicated(keep='last'), expected)
assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected])
sc = tc.copy()
sc.drop_duplicates(keep='last', inplace=True)
assert_series_equal(sc, tc[~expected])

expected = Series([True, True, True, True])
assert_series_equal(tc.duplicated(keep=False), expected)
assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected])
sc = tc.copy()
sc.drop_duplicates(keep=False, inplace=True)
assert_series_equal(sc, tc[~expected])

def test_clip(self):
val = self.ts.median()

Expand Down Expand Up @@ -1416,7 +1278,8 @@ def test_ptp(self):
N = 1000
arr = np.random.randn(N)
ser = Series(arr)
assert np.ptp(ser) == np.ptp(arr)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
assert np.ptp(ser) == np.ptp(arr)

# GH11163
s = Series([3, 5, np.nan, -3, 10])
Expand Down Expand Up @@ -1457,10 +1320,6 @@ def test_empty_timeseries_redections_return_nat(self):
assert Series([], dtype=dtype).min() is pd.NaT
assert Series([], dtype=dtype).max() is pd.NaT

def test_unique_data_ownership(self):
# it works! #1807
Series(Series(["a", "c", "b"]).unique()).sort_values()

def test_repeat(self):
s = Series(np.random.randn(3), index=['a', 'b', 'c'])

Expand Down Expand Up @@ -1537,29 +1396,6 @@ def test_searchsorted_sorter(self):
e = np.array([0, 2], dtype=np.intp)
tm.assert_numpy_array_equal(r, e)

def test_is_unique(self):
# GH11946
s = Series(np.random.randint(0, 10, size=1000))
assert not s.is_unique
s = Series(np.arange(1000))
assert s.is_unique

def test_is_unique_class_ne(self, capsys):
# GH 20661
class Foo(object):
def __init__(self, val):
self._value = val

def __ne__(self, other):
raise Exception("NEQ not supported")

li = [Foo(i) for i in range(5)]
s = pd.Series(li, index=[i for i in range(5)])
_, err = capsys.readouterr()
s.is_unique
_, err = capsys.readouterr()
assert len(err) == 0

def test_is_monotonic(self):

s = Series(np.random.randint(0, 10, size=1000))
Expand Down
Loading