Skip to content

Commit

Permalink
DEPR: Remove array manager branches from tests (#56621)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Dec 26, 2023
1 parent 58b1d12 commit f1de9c7
Show file tree
Hide file tree
Showing 42 changed files with 171 additions and 487 deletions.
8 changes: 0 additions & 8 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1877,14 +1877,6 @@ def indexer_ial(request):
return request.param


@pytest.fixture
def using_array_manager() -> bool:
"""
Fixture to check if the array manager is being used.
"""
return _get_option("mode.data_manager", silent=True) == "array"


@pytest.fixture
def using_copy_on_write() -> bool:
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1487,7 +1487,7 @@ def test_apply_dtype(col):
tm.assert_series_equal(result, expected)


def test_apply_mutating(using_array_manager, using_copy_on_write, warn_copy_on_write):
def test_apply_mutating(using_copy_on_write, warn_copy_on_write):
# GH#35462 case where applied func pins a new BlockManager to a row
df = DataFrame({"a": range(100), "b": range(100, 200)})
df_orig = df.copy()
Expand All @@ -1505,7 +1505,7 @@ def func(row):
result = df.apply(func, axis=1)

tm.assert_frame_equal(result, expected)
if using_copy_on_write or using_array_manager:
if using_copy_on_write:
# INFO(CoW) With copy on write, mutating a viewing row doesn't mutate the parent
# INFO(ArrayManager) With BlockManager, the row is a view and mutated in place,
# with ArrayManager the row is not a view, and thus not mutated in place
Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,16 +586,12 @@ def test_df_div_zero_series_does_not_commute(self):
# ------------------------------------------------------------------
# Mod By Zero

def test_df_mod_zero_df(self, using_array_manager):
def test_df_mod_zero_df(self):
# GH#3590, modulo as ints
df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]})
# this is technically wrong, as the integer portion is coerced to float
first = Series([0, 0, 0, 0])
if not using_array_manager:
# INFO(ArrayManager) BlockManager doesn't preserve dtype per column
# while ArrayManager performs op column-wisedoes and thus preserves
# dtype if possible
first = first.astype("float64")
first = first.astype("float64")
second = Series([np.nan, np.nan, np.nan, 0])
expected = pd.DataFrame({"first": first, "second": second})
result = df % df
Expand Down
9 changes: 1 addition & 8 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -1736,9 +1736,7 @@ def test_td64_div_object_mixed_result(self, box_with_array):
# ------------------------------------------------------------------
# __floordiv__, __rfloordiv__

def test_td64arr_floordiv_td64arr_with_nat(
self, box_with_array, using_array_manager
):
def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array):
# GH#35529
box = box_with_array
xbox = np.ndarray if box is pd.array else box
Expand All @@ -1751,11 +1749,6 @@ def test_td64arr_floordiv_td64arr_with_nat(

expected = np.array([1.0, 1.0, np.nan], dtype=np.float64)
expected = tm.box_expected(expected, xbox)
if box is DataFrame and using_array_manager:
# INFO(ArrayManager) floordiv returns integer, and ArrayManager
# performs ops column-wise and thus preserves int64 dtype for
# columns without missing values
expected[[0, 1]] = expected[[0, 1]].astype("int64")

with tm.maybe_produces_warning(
RuntimeWarning, box is pd.array, check_stacklevel=False
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/copy_view/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_series_values(using_copy_on_write, method):
[lambda df: df.values, lambda df: np.asarray(df)],
ids=["values", "asarray"],
)
def test_dataframe_values(using_copy_on_write, using_array_manager, method):
def test_dataframe_values(using_copy_on_write, method):
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df_orig = df.copy()

Expand All @@ -70,10 +70,7 @@ def test_dataframe_values(using_copy_on_write, using_array_manager, method):
else:
assert arr.flags.writeable is True
arr[0, 0] = 0
if not using_array_manager:
assert df.iloc[0, 0] == 0
else:
tm.assert_frame_equal(df, df_orig)
assert df.iloc[0, 0] == 0


def test_series_to_numpy(using_copy_on_write):
Expand Down Expand Up @@ -157,11 +154,10 @@ def test_dataframe_array_ea_dtypes(using_copy_on_write):
assert arr.flags.writeable is True


def test_dataframe_array_string_dtype(using_copy_on_write, using_array_manager):
def test_dataframe_array_string_dtype(using_copy_on_write):
df = DataFrame({"a": ["a", "b"]}, dtype="string")
arr = np.asarray(df)
if not using_array_manager:
assert np.shares_memory(arr, get_array(df, "a"))
assert np.shares_memory(arr, get_array(df, "a"))
if using_copy_on_write:
assert arr.flags.writeable is False
else:
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/copy_view/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,16 +339,11 @@ def test_dataframe_from_dict_of_series_with_dtype(index):


@pytest.mark.parametrize("copy", [False, None, True])
def test_frame_from_numpy_array(using_copy_on_write, copy, using_array_manager):
def test_frame_from_numpy_array(using_copy_on_write, copy):
arr = np.array([[1, 2], [3, 4]])
df = DataFrame(arr, copy=copy)

if (
using_copy_on_write
and copy is not False
or copy is True
or (using_array_manager and copy is None)
):
if using_copy_on_write and copy is not False or copy is True:
assert not np.shares_memory(get_array(df, 0), arr)
else:
assert np.shares_memory(get_array(df, 0), arr)
Expand Down
90 changes: 23 additions & 67 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,11 @@ def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write):
@pytest.mark.parametrize(
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
)
def test_subset_column_slice(
backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype
):
def test_subset_column_slice(backend, using_copy_on_write, warn_copy_on_write, dtype):
# Case: taking a subset of the columns of a DataFrame using a slice
# + afterwards modifying the subset
dtype_backend, DataFrame, _ = backend
single_block = (
dtype == "int64" and dtype_backend == "numpy"
) and not using_array_manager
single_block = dtype == "int64" and dtype_backend == "numpy"
df = DataFrame(
{"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
)
Expand Down Expand Up @@ -176,7 +172,7 @@ def test_subset_column_slice(
tm.assert_frame_equal(subset, expected)
# original parent dataframe is not modified (also not for BlockManager case,
# except for single block)
if not using_copy_on_write and (using_array_manager or single_block):
if not using_copy_on_write and single_block:
df_orig.iloc[0, 1] = 0
tm.assert_frame_equal(df, df_orig)
else:
Expand All @@ -201,7 +197,6 @@ def test_subset_loc_rows_columns(
dtype,
row_indexer,
column_indexer,
using_array_manager,
using_copy_on_write,
warn_copy_on_write,
):
Expand All @@ -224,14 +219,7 @@ def test_subset_loc_rows_columns(
mutate_parent = (
isinstance(row_indexer, slice)
and isinstance(column_indexer, slice)
and (
using_array_manager
or (
dtype == "int64"
and dtype_backend == "numpy"
and not using_copy_on_write
)
)
and (dtype == "int64" and dtype_backend == "numpy" and not using_copy_on_write)
)

# modifying the subset never modifies the parent
Expand Down Expand Up @@ -265,7 +253,6 @@ def test_subset_iloc_rows_columns(
dtype,
row_indexer,
column_indexer,
using_array_manager,
using_copy_on_write,
warn_copy_on_write,
):
Expand All @@ -288,14 +275,7 @@ def test_subset_iloc_rows_columns(
mutate_parent = (
isinstance(row_indexer, slice)
and isinstance(column_indexer, slice)
and (
using_array_manager
or (
dtype == "int64"
and dtype_backend == "numpy"
and not using_copy_on_write
)
)
and (dtype == "int64" and dtype_backend == "numpy" and not using_copy_on_write)
)

# modifying the subset never modifies the parent
Expand Down Expand Up @@ -422,7 +402,7 @@ def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write):
"dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
)
def test_subset_set_column_with_loc(
backend, using_copy_on_write, warn_copy_on_write, using_array_manager, dtype
backend, using_copy_on_write, warn_copy_on_write, dtype
):
# Case: setting a single column with loc on a viewing subset
# -> subset.loc[:, col] = value
Expand All @@ -440,10 +420,7 @@ def test_subset_set_column_with_loc(
subset.loc[:, "a"] = np.array([10, 11], dtype="int64")
else:
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(
None,
raise_on_extra_warnings=not using_array_manager,
):
with tm.assert_produces_warning(None):
subset.loc[:, "a"] = np.array([10, 11], dtype="int64")

subset._mgr._verify_integrity()
Expand All @@ -461,9 +438,7 @@ def test_subset_set_column_with_loc(
tm.assert_frame_equal(df, df_orig)


def test_subset_set_column_with_loc2(
backend, using_copy_on_write, warn_copy_on_write, using_array_manager
):
def test_subset_set_column_with_loc2(backend, using_copy_on_write, warn_copy_on_write):
# Case: setting a single column with loc on a viewing subset
# -> subset.loc[:, col] = value
# separate test for case of DataFrame of a single column -> takes a separate
Expand All @@ -480,10 +455,7 @@ def test_subset_set_column_with_loc2(
subset.loc[:, "a"] = 0
else:
with pd.option_context("chained_assignment", "warn"):
with tm.assert_produces_warning(
None,
raise_on_extra_warnings=not using_array_manager,
):
with tm.assert_produces_warning(None):
subset.loc[:, "a"] = 0

subset._mgr._verify_integrity()
Expand Down Expand Up @@ -600,7 +572,6 @@ def test_subset_chained_getitem(
method,
dtype,
using_copy_on_write,
using_array_manager,
warn_copy_on_write,
):
# Case: creating a subset using multiple, chained getitem calls using views
Expand All @@ -614,17 +585,10 @@ def test_subset_chained_getitem(
# when not using CoW, it depends on whether we have a single block or not
# and whether we are slicing the columns -> in that case we have a view
test_callspec = request.node.callspec.id
if not using_array_manager:
subset_is_view = test_callspec in (
"numpy-single-block-column-iloc-slice",
"numpy-single-block-column-loc-slice",
)
else:
# with ArrayManager, it doesn't matter whether we have
# single vs mixed block or numpy vs nullable dtypes
subset_is_view = test_callspec.endswith(
("column-iloc-slice", "column-loc-slice")
)
subset_is_view = test_callspec in (
"numpy-single-block-column-iloc-slice",
"numpy-single-block-column-loc-slice",
)

# modify subset -> don't modify parent
subset = method(df)
Expand Down Expand Up @@ -726,9 +690,7 @@ def test_subset_chained_getitem_series(
assert subset.iloc[0] == 0


def test_subset_chained_single_block_row(
using_copy_on_write, using_array_manager, warn_copy_on_write
):
def test_subset_chained_single_block_row(using_copy_on_write, warn_copy_on_write):
# not parametrizing this for dtype backend, since this explicitly tests single block
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df_orig = df.copy()
Expand All @@ -737,7 +699,7 @@ def test_subset_chained_single_block_row(
subset = df[:].iloc[0].iloc[0:2]
with tm.assert_cow_warning(warn_copy_on_write):
subset.iloc[0] = 0
if using_copy_on_write or using_array_manager:
if using_copy_on_write:
tm.assert_frame_equal(df, df_orig)
else:
assert df.iloc[0, 0] == 0
Expand All @@ -747,7 +709,7 @@ def test_subset_chained_single_block_row(
with tm.assert_cow_warning(warn_copy_on_write):
df.iloc[0, 0] = 0
expected = Series([1, 4], index=["a", "b"], name=0)
if using_copy_on_write or using_array_manager:
if using_copy_on_write:
tm.assert_series_equal(subset, expected)
else:
assert subset.iloc[0] == 0
Expand Down Expand Up @@ -967,9 +929,7 @@ def test_del_series(backend):
# Accessing column as Series


def test_column_as_series(
backend, using_copy_on_write, warn_copy_on_write, using_array_manager
):
def test_column_as_series(backend, using_copy_on_write, warn_copy_on_write):
# Case: selecting a single column now also uses Copy-on-Write
dtype_backend, DataFrame, Series = backend
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
Expand All @@ -979,7 +939,7 @@ def test_column_as_series(

assert np.shares_memory(get_array(s, "a"), get_array(df, "a"))

if using_copy_on_write or using_array_manager:
if using_copy_on_write:
s[0] = 0
else:
if warn_copy_on_write:
Expand All @@ -1004,7 +964,7 @@ def test_column_as_series(


def test_column_as_series_set_with_upcast(
backend, using_copy_on_write, using_array_manager, warn_copy_on_write
backend, using_copy_on_write, warn_copy_on_write
):
# Case: selecting a single column now also uses Copy-on-Write -> when
# setting a value causes an upcast, we don't need to update the parent
Expand All @@ -1019,7 +979,7 @@ def test_column_as_series_set_with_upcast(
with pytest.raises(TypeError, match="Invalid value"):
s[0] = "foo"
expected = Series([1, 2, 3], name="a")
elif using_copy_on_write or warn_copy_on_write or using_array_manager:
elif using_copy_on_write or warn_copy_on_write:
# TODO(CoW-warn) assert the FutureWarning for CoW is also raised
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
s[0] = "foo"
Expand Down Expand Up @@ -1063,7 +1023,6 @@ def test_column_as_series_no_item_cache(
method,
using_copy_on_write,
warn_copy_on_write,
using_array_manager,
):
# Case: selecting a single column (which now also uses Copy-on-Write to protect
# the view) should always give a new object (i.e. not make use of a cache)
Expand All @@ -1080,7 +1039,7 @@ def test_column_as_series_no_item_cache(
else:
assert s1 is s2

if using_copy_on_write or using_array_manager:
if using_copy_on_write:
s1.iloc[0] = 0
elif warn_copy_on_write:
with tm.assert_cow_warning():
Expand Down Expand Up @@ -1181,18 +1140,15 @@ def test_series_midx_slice(using_copy_on_write, warn_copy_on_write):
tm.assert_series_equal(ser, expected)


def test_getitem_midx_slice(
using_copy_on_write, warn_copy_on_write, using_array_manager
):
def test_getitem_midx_slice(using_copy_on_write, warn_copy_on_write):
df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2})
df_orig = df.copy()
new_df = df[("a",)]

if using_copy_on_write:
assert not new_df._mgr._has_no_reference(0)

if not using_array_manager:
assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x"))
assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x"))
if using_copy_on_write:
new_df.iloc[0, 0] = 100
tm.assert_frame_equal(df_orig, df)
Expand Down

0 comments on commit f1de9c7

Please sign in to comment.