Skip to content

Commit

Permalink
TST/CLN: Remove unnecessary pyarrow version checking (#51545)
Browse files Browse the repository at this point in the history
* cleanups

* Add back some checks

* Unneeded warning checking

* fix fillna fallback warnings

* Remove tm.assert_produces_warning
  • Loading branch information
mroeschke committed Feb 23, 2023
1 parent 9935690 commit 8baedc1
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 375 deletions.
15 changes: 4 additions & 11 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,14 +569,8 @@ def argsort(
) -> np.ndarray:
order = "ascending" if ascending else "descending"
null_placement = {"last": "at_end", "first": "at_start"}.get(na_position, None)
if null_placement is None or pa_version_under7p0:
# Although pc.array_sort_indices exists in version 6
# there's a bug that affects the pa.ChunkedArray backing
# https://issues.apache.org/jira/browse/ARROW-12042
fallback_performancewarning("7")
return super().argsort(
ascending=ascending, kind=kind, na_position=na_position
)
if null_placement is None:
raise ValueError(f"invalid na_position: {na_position}")

result = pc.array_sort_indices(
self._data, order=order, null_placement=null_placement
Expand Down Expand Up @@ -640,9 +634,8 @@ def fillna(
if limit is not None:
return super().fillna(value=value, method=method, limit=limit)

if method is not None and pa_version_under7p0:
# fill_null_{forward|backward} added in pyarrow 7.0
fallback_performancewarning(version="7")
if method is not None:
fallback_performancewarning()
return super().fillna(value=value, method=method, limit=limit)

if is_array_like(value):
Expand Down
39 changes: 6 additions & 33 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
import numpy as np
import pytest

from pandas.compat import pa_version_under7p0
from pandas.errors import PerformanceWarning

import pandas as pd
from pandas import (
DatetimeIndex,
Expand Down Expand Up @@ -48,16 +45,8 @@ def test_value_counts(index_or_series_obj):
# TODO(GH#32514): Order of entries with the same count is inconsistent
# on CI (gh-32449)
if obj.duplicated().any():
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
result = result.sort_index()
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
expected = expected.sort_index()
result = result.sort_index()
expected = expected.sort_index()
tm.assert_series_equal(result, expected)


Expand Down Expand Up @@ -97,16 +86,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
if obj.duplicated().any():
# TODO(GH#32514):
# Order of entries with the same count is inconsistent on CI (gh-32449)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
expected = expected.sort_index()
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
result = result.sort_index()
expected = expected.sort_index()
result = result.sort_index()

if not isinstance(result.dtype, np.dtype):
# i.e IntegerDtype
Expand All @@ -119,16 +100,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
if obj.duplicated().any():
# TODO(GH#32514):
# Order of entries with the same count is inconsistent on CI (gh-32449)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
expected = expected.sort_index()
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
result = result.sort_index()
expected = expected.sort_index()
result = result.sort_index()
tm.assert_series_equal(result, expected)


Expand Down
111 changes: 14 additions & 97 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@

from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip

pytestmark = pytest.mark.filterwarnings(
"ignore:.* may decrease performance. Upgrade to pyarrow >=7 to possibly"
)


@pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str)
def dtype(request):
Expand Down Expand Up @@ -311,14 +307,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
)
)
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
if pa_version_under7p0:
request.node.add_marker(
pytest.mark.xfail(
raises=pa.ArrowNotImplementedError,
reason=f"pyarrow doesn't support string cast from {pa_dtype}",
)
)
elif is_platform_windows() and is_ci_environment():
if is_platform_windows() and is_ci_environment():
request.node.add_marker(
pytest.mark.xfail(
raises=pa.ArrowInvalid,
Expand Down Expand Up @@ -561,23 +550,7 @@ def test_groupby_extension_transform(self, data_for_grouping, request):
reason=f"{pa_dtype} only has 2 unique possible values",
)
)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_groupby_extension_transform(data_for_grouping)

def test_groupby_extension_apply(
self, data_for_grouping, groupby_apply_op, request
):
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
super().test_groupby_extension_transform(data_for_grouping)

@pytest.mark.parametrize("as_index", [True, False])
def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
Expand All @@ -589,12 +562,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
reason=f"{pa_dtype} only has 2 unique possible values",
)
)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_groupby_extension_agg(as_index, data_for_grouping)
super().test_groupby_extension_agg(as_index, data_for_grouping)

def test_in_numeric_groupby(self, data_for_grouping):
if is_string_dtype(data_for_grouping.dtype):
Expand Down Expand Up @@ -712,14 +680,20 @@ def test_view(self, data):

class TestBaseMissing(base.BaseMissingTests):
def test_fillna_no_op_returns_copy(self, data):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_fillna_no_op_returns_copy(data)
data = data[~data.isna()]

valid = data[0]
result = data.fillna(valid)
assert result is not data
self.assert_extension_array_equal(result, data)
with tm.assert_produces_warning(PerformanceWarning):
result = data.fillna(method="backfill")
assert result is not data
self.assert_extension_array_equal(result, data)

def test_fillna_series_method(self, data_missing, fillna_method):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
PerformanceWarning, fillna_method is not None, check_stacklevel=False
):
super().test_fillna_series_method(data_missing, fillna_method)

Expand Down Expand Up @@ -797,12 +771,6 @@ def test_invert(self, data, request):


class TestBaseMethods(base.BaseMethodsTests):
def test_argsort_missing_array(self, data_missing_for_sorting):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_argsort_missing_array(data_missing_for_sorting)

@pytest.mark.parametrize("periods", [1, -2])
def test_diff(self, data, periods, request):
pa_dtype = data.dtype.pyarrow_dtype
Expand All @@ -817,20 +785,10 @@ def test_diff(self, data, periods, request):
)
super().test_diff(data, periods)

@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("dropna", [True, False])
def test_value_counts(self, all_data, dropna, request):
super().test_value_counts(all_data, dropna)

def test_value_counts_with_normalize(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_value_counts_with_normalize(data)

def test_argmin_argmax(
self, data_for_sorting, data_missing_for_sorting, na_value, request
):
Expand Down Expand Up @@ -878,47 +836,6 @@ def test_argreduce_series(
data_missing_for_sorting, op_name, skipna, expected
)

@pytest.mark.parametrize(
"na_position, expected",
[
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
],
)
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_nargsort(data_missing_for_sorting, na_position, expected)

@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_sort_values(data_for_sorting, ascending, sort_by_key)

@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values_missing(
self, data_missing_for_sorting, ascending, sort_by_key
):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_sort_values_missing(
data_missing_for_sorting, ascending, sort_by_key
)

@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values_frame(self, data_for_sorting, ascending, request):
pa_dtype = data_for_sorting.dtype.pyarrow_dtype
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_sort_values_frame(data_for_sorting, ascending)

def test_factorize(self, data_for_grouping, request):
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
if pa.types.is_boolean(pa_dtype):
Expand Down

0 comments on commit 8baedc1

Please sign in to comment.