From 1b96cba61d74375bd8579bbfd30ddc1e9d01e093 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 14:53:00 -0800 Subject: [PATCH 1/8] split tests --- pandas/tests/indexes/categorical/test_astype.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index 854ae8b62db30..fca104d229862 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,11 +73,15 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_categorical_date_roundtrip(self): + @pytest.mark.parametrize("box", [True, False]) + def test_categorical_date_roundtrip(self, box): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) + if box: + obj = obj.array + assert obj.dtype == object cat = obj.astype("category") From 156be4fb3bcaa3440a8ec0c3fc2115877c0080d5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 11:07:00 -0800 Subject: [PATCH 2/8] revert change that fails on some builds --- pandas/tests/indexes/categorical/test_astype.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index fca104d229862..854ae8b62db30 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,15 +73,11 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("box", [True, False]) - def test_categorical_date_roundtrip(self, box): + def test_categorical_date_roundtrip(self): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) - if box: - obj = obj.array - assert obj.dtype == object cat = obj.astype("category") From c622ee8fd360e71cf8f7fad668b32fb9c0917008 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 1 Dec 2021 14:53:00 -0800 Subject: [PATCH 3/8] split tests --- pandas/tests/indexes/categorical/test_astype.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index 854ae8b62db30..fca104d229862 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,11 +73,15 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_categorical_date_roundtrip(self): + @pytest.mark.parametrize("box", [True, False]) + def test_categorical_date_roundtrip(self, box): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) + if box: + obj = obj.array + assert obj.dtype == object cat = obj.astype("category") From e7c95c88726f5d3b76d3854bbd0877a3a82006a0 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 11:07:00 -0800 Subject: [PATCH 4/8] revert change that fails on some builds --- pandas/tests/indexes/categorical/test_astype.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index fca104d229862..854ae8b62db30 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -73,15 +73,11 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("box", [True, False]) - def test_categorical_date_roundtrip(self, box): + def test_categorical_date_roundtrip(self): # astype to categorical and back should preserve date objects v = date.today() obj = Index([v, v]) - if box: - obj = obj.array - assert obj.dtype == object cat = obj.astype("category") From d2588db78af14721333e2a8ec6d8f586c2731ec0 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 15:43:06 -0800 Subject: [PATCH 5/8] remove unnecessary skip --- pandas/tests/series/test_arithmetic.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index bd780be99abc7..5fbb42789d746 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -16,7 +16,6 @@ from pandas import ( Categorical, Index, - IntervalIndex, Series, Timedelta, bdate_range, @@ -828,9 +827,7 @@ def test_series_inplace_ops(self, dtype1, dtype2, dtype_expected, dtype_mul): def test_none_comparison(series_with_simple_index): series = series_with_simple_index - if isinstance(series.index, IntervalIndex): - # IntervalIndex breaks on "series[0] = np.nan" below - pytest.skip("IntervalIndex doesn't support assignment") + if len(series) < 1: pytest.skip("Test doesn't make sense on empty data") From 20c2baf7a9a58e08aa3f991509da47e8af7ce0a3 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 19:15:30 -0800 Subject: [PATCH 6/8] TODOs --- pandas/tests/groupby/test_apply.py | 3 +-- .../indexes/datetimes/test_constructors.py | 18 +++++++++--------- pandas/tests/indexes/test_base.py | 3 +-- pandas/tests/indexes/test_setops.py | 7 +++---- .../indexing/test_chaining_and_caching.py | 4 ++-- pandas/tests/indexing/test_indexing.py | 8 ++++---- pandas/tests/indexing/test_loc.py | 4 ++-- pandas/tests/series/indexing/test_getitem.py | 5 ++--- 8 files changed, 24 insertions(+), 28 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 9e15da1bb0c01..762cc310a7e7b 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1134,9 +1134,8 @@ def test_positional_slice_groups_datetimelike(): tm.assert_frame_equal(result, expected) -def test_doctest_example2(): +def test_groupby_apply_shape_cache_safety(): # GH#42702 this fails if we cache_readonly Block.shape - # TODO: more informative name df = DataFrame({"A": ["a", "a", "b"], "B": [1, 2, 3], "C": [4, 6, 5]}) gb = df.groupby("A") result = gb[["B", "C"]].apply(lambda x: x.astype(float).max() - x.min()) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 86dc6bd0e76ea..b27c5852cb97b 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -69,20 +69,20 @@ def test_shallow_copy_inherits_array_freq(self, index): def test_categorical_preserves_tz(self): # GH#18664 retain tz when going DTI-->Categorical-->DTI - # TODO: parametrize over DatetimeIndex/DatetimeArray - # once pd.CategoricalIndex(DTA) works - dti = DatetimeIndex( [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern" ) - ci = pd.CategoricalIndex(dti) - carr = pd.Categorical(dti) - cser = pd.Series(ci) + for dtobj in [dti, dti._data]: + # works for DatetimeIndex or DatetimeArray + + ci = pd.CategoricalIndex(dtobj) + carr = pd.Categorical(dtobj) + cser = pd.Series(ci) - for obj in [ci, carr, cser]: - result = DatetimeIndex(obj) - tm.assert_index_equal(result, dti) + for obj in [ci, carr, cser]: + result = DatetimeIndex(obj) + tm.assert_index_equal(result, dti) def test_dti_with_period_data_raises(self): # GH#23675 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index bf69f6f48f26d..aba2982b13914 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1364,9 +1364,8 @@ def test_str_to_bytes_raises(self): @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning") def test_index_with_tuple_bool(self): # GH34123 - # TODO: remove tupleize_cols=False once correct behaviour is restored # TODO: also this op right now produces FutureWarning from numpy - idx = Index([("a", "b"), ("b", "c"), ("c", "a")], tupleize_cols=False) + idx = Index([("a", "b"), ("b", "c"), ("c", "a")]) result = idx == ("c", "a") expected = np.array([False, False, True]) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index abe1c4fd03fcd..b3c86f91927ee 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -453,10 +453,9 @@ def test_intersection_difference_match_empty(self, index, sort): @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] ) -def test_setop_with_categorical(index, sort, method): - if isinstance(index, MultiIndex): # TODO: flat_index? - # tested separately in tests.indexes.multi.test_setops - return +def test_setop_with_categorical(index_flat, sort, method): + # MultiIndex tested separately in tests.indexes.multi.test_setops + index = index_flat other = index.astype("category") exact = "equiv" if isinstance(index, RangeIndex) else True diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 90bda69eaf139..e4c7f42cf7b6b 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -504,8 +504,8 @@ def test_iloc_setitem_chained_assignment(self): df["bb"].iloc[0] = 0.13 - # TODO: unused - df_tmp = df.iloc[ck] # noqa + # GH#3970 this lookup used to break the chained setting to 0.15 + df.iloc[ck] df["bb"].iloc[0] = 0.15 assert df["bb"].iloc[0] == 0.15 diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 12d8731842844..688f940e2b3fb 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -232,11 +232,11 @@ def test_dups_fancy_indexing_across_dtypes(self): df.head() str(df) result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]]) - result.columns = list("aaaaaaa") + result.columns = list("aaaaaaa") # GH#3468 - # TODO(wesm): unused? - df_v = df.iloc[:, 4] # noqa - res_v = result.iloc[:, 4] # noqa + # GH#3509 smoke tests for indexing with duplicate columns + df.iloc[:, 4] + result.iloc[:, 4] tm.assert_frame_equal(df, result) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2a9ee81b7a23a..91d3709325f7c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -199,8 +199,8 @@ def test_loc_getitem_single_boolean_arg(self, obj, key, exp): assert res == exp -class TestLoc2: - # TODO: better name, just separating out things that rely on base class +class TestLocBaseIndependent: + # Tests for loc that do not depend on subclassing Base @pytest.mark.parametrize( "msg, key", [ diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 4c17917b949ca..343cc21beb4b6 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -110,8 +110,8 @@ def test_getitem_int64(self, datetime_series): idx = np.int64(5) assert datetime_series[idx] == datetime_series[5] - # TODO: better name/GH ref? - def test_getitem_regression(self): + def test_getitem_full_range(self): + # github.com/pandas-dev/pandas/commit/4f433773141d2eb384325714a2776bcc5b2e20f7 ser = Series(range(5), index=list(range(5))) result = ser[list(range(5))] tm.assert_series_equal(result, ser) @@ -240,7 +240,6 @@ def test_getitem_partial_str_slice_high_reso_with_timedeltaindex(self): result = ser["1 days, 10:11:12.001001"] assert result == ser.iloc[1001] - # TODO: redundant with test_getitem_ndim_deprecated? def test_getitem_slice_2d(self, datetime_series): # GH#30588 multi-dimensional indexing deprecated From 7341f513bd2fb77aec2df53ec0850e28bdd64a01 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 2 Dec 2021 20:02:02 -0800 Subject: [PATCH 7/8] TODOs --- .../tests/arrays/boolean/test_construction.py | 25 ++++++++++--------- pandas/tests/arrays/masked/test_arithmetic.py | 7 +----- pandas/tests/extension/test_sparse.py | 7 ++++-- pandas/tests/frame/methods/test_align.py | 6 ----- pandas/tests/frame/methods/test_quantile.py | 3 ++- pandas/tests/groupby/test_apply.py | 2 +- pandas/tests/indexes/test_base.py | 1 + 7 files changed, 23 insertions(+), 28 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index f080bf7e03412..97523ee65fec6 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -323,17 +323,18 @@ def test_to_numpy_copy(): tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) -# FIXME: don't leave commented out # TODO when BooleanArray coerces to object dtype numpy array, need to do conversion # manually in the indexing code -# def test_indexing_boolean_mask(): -# arr = pd.array([1, 2, 3, 4], dtype="Int64") -# mask = pd.array([True, False, True, False], dtype="boolean") -# result = arr[mask] -# expected = pd.array([1, 3], dtype="Int64") -# tm.assert_extension_array_equal(result, expected) - -# # missing values -> error -# mask = pd.array([True, False, True, None], dtype="boolean") -# with pytest.raises(IndexError): -# result = arr[mask] +# TODO: belongs somewhere else? +@pytest.mark.xfail(reason="fails to raise") +def test_indexing_boolean_mask(): + arr = pd.array([1, 2, 3, 4], dtype="Int64") + mask = pd.array([True, False, True, False], dtype="boolean") + result = arr[mask] + expected = pd.array([1, 3], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + # missing values -> error + mask = pd.array([True, False, True, None], dtype="boolean") + with pytest.raises(IndexError, match="TBD"): + result = arr[mask] diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py index 8ad535eeb6b1c..ab6e5110422ca 100644 --- a/pandas/tests/arrays/masked/test_arithmetic.py +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -7,7 +7,6 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays import ExtensionArray # integer dtypes arrays = [pd.array([1, 2, 3, None], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES] @@ -71,11 +70,7 @@ def test_numpy_array_equivalence(data, all_arithmetic_operators): result = op(data, numpy_array) expected = op(data, pd_array) - if isinstance(expected, ExtensionArray): - tm.assert_extension_array_equal(result, expected) - else: - # TODO div still gives float ndarray -> remove this once we have Float EA - tm.assert_numpy_array_equal(result, expected) + tm.assert_extension_array_equal(result, expected) # Test equivalence with Series and DataFrame ops diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 1f22feff018ec..940f2257b5eba 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -328,11 +328,14 @@ def test_where_series(self, data, na_value): expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) self.assert_series_equal(result, expected) - def test_combine_first(self, data): + def test_combine_first(self, data, request): if data.dtype.subtype == "int": # Right now this is upcasted to float, just like combine_first # for Series[int] - pytest.skip("TODO(SparseArray.__setitem__ will preserve dtype.") + mark = pytest.mark.xfail( + reason="TODO(SparseArray.__setitem__) will preserve dtype." + ) + request.node.add_marker(mark) super().test_combine_first(data) def test_searchsorted(self, data_for_sorting, as_series): diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index a6e6914ba701e..03ea6492c07f3 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -55,9 +55,7 @@ def test_align_float(self, float_frame): # test fill value join_idx = float_frame.index.join(other.index) diff_a = float_frame.index.difference(join_idx) - diff_b = other.index.difference(join_idx) diff_a_vals = af.reindex(diff_a).values - diff_b_vals = bf.reindex(diff_b).values assert (diff_a_vals == -1).all() af, bf = float_frame.align(other, join="right", axis=0) @@ -74,12 +72,8 @@ def test_align_float(self, float_frame): # test fill value join_idx = float_frame.index.join(other.index) diff_a = float_frame.index.difference(join_idx) - diff_b = other.index.difference(join_idx) diff_a_vals = af.reindex(diff_a).values - # TODO(wesm): unused? - diff_b_vals = bf.reindex(diff_b).values # noqa - assert (diff_a_vals == -1).all() af, bf = float_frame.align(other, join="inner", axis=1) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index ed1623cd87aac..74233a4808aaa 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -571,7 +571,7 @@ def test_quantile_item_cache(self, using_array_manager): class TestQuantileExtensionDtype: # TODO: tests for axis=1? - # TODO: empty case? might as well do dt64 and td64 here too + # TODO: empty case? @pytest.fixture( params=[ @@ -581,6 +581,7 @@ class TestQuantileExtensionDtype: ), pd.period_range("2016-01-01", periods=9, freq="D"), pd.date_range("2016-01-01", periods=9, tz="US/Pacific"), + pd.timedelta_range("1 Day", periods=9), pd.array(np.arange(9), dtype="Int64"), pd.array(np.arange(9), dtype="Float64"), ], diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 762cc310a7e7b..e95ab700e12d3 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -360,7 +360,7 @@ def test_apply_frame_not_as_index_column_name(df): grouped = df.groupby(["A", "B"], as_index=False) result = grouped.apply(len) expected = grouped.count().rename(columns={"C": np.nan}).drop(columns="D") - # TODO: Use assert_frame_equal when column name is not np.nan (GH 36306) + # TODO(GH#34306): Use assert_frame_equal when column name is not np.nan tm.assert_index_equal(result.index, expected.index) tm.assert_numpy_array_equal(result.values, expected.values) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index aba2982b13914..71b947e418d28 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1365,6 +1365,7 @@ def test_str_to_bytes_raises(self): def test_index_with_tuple_bool(self): # GH34123 # TODO: also this op right now produces FutureWarning from numpy + # https://github.com/numpy/numpy/issues/11521 idx = Index([("a", "b"), ("b", "c"), ("c", "a")]) result = idx == ("c", "a") expected = np.array([False, False, True]) From 759e3e223ffbf5cc037cd67fddef86e1548591be Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 3 Dec 2021 08:27:39 -0800 Subject: [PATCH 8/8] xfail AM case --- .../tests/arrays/boolean/test_construction.py | 17 ----------------- pandas/tests/frame/methods/test_quantile.py | 13 ++++++++++++- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py index 97523ee65fec6..15f92f2567c1c 100644 --- a/pandas/tests/arrays/boolean/test_construction.py +++ b/pandas/tests/arrays/boolean/test_construction.py @@ -321,20 +321,3 @@ def test_to_numpy_copy(): result = arr.to_numpy(dtype=bool, copy=True) result[0] = False tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) - - -# TODO when BooleanArray coerces to object dtype numpy array, need to do conversion -# manually in the indexing code -# TODO: belongs somewhere else? -@pytest.mark.xfail(reason="fails to raise") -def test_indexing_boolean_mask(): - arr = pd.array([1, 2, 3, 4], dtype="Int64") - mask = pd.array([True, False, True, False], dtype="boolean") - result = arr[mask] - expected = pd.array([1, 3], dtype="Int64") - tm.assert_extension_array_equal(result, expected) - - # missing values -> error - mask = pd.array([True, False, True, None], dtype="boolean") - with pytest.raises(IndexError, match="TBD"): - result = arr[mask] diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 74233a4808aaa..8ff1b211c0db1 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -651,7 +651,18 @@ def test_quantile_ea_with_na(self, obj, index): # TODO(GH#39763): filtering can be removed after GH#39763 is fixed @pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning") - def test_quantile_ea_all_na(self, obj, index, frame_or_series): + def test_quantile_ea_all_na( + self, obj, index, frame_or_series, using_array_manager, request + ): + if ( + using_array_manager + and frame_or_series is DataFrame + and index.dtype == "m8[ns]" + ): + mark = pytest.mark.xfail( + reason="obj.astype fails bc obj is incorrectly dt64 at this point" + ) + request.node.add_marker(mark) obj.iloc[:] = index._na_value