TST/CLN: Remove unnecessary pyarrow version checking (#51545)

* cleanups * Add back some checks * Unneeded warning checking * fix fillna fallback warnings * Remove tm.assert_produces_warning
pandas-dev · Feb 23, 2023 · 8baedc1 · 8baedc1
1 parent 9935690
commit 8baedc1
Show file tree

Hide file tree

Showing 10 changed files with 61 additions and 375 deletions.
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -569,14 +569,8 @@ def argsort(
     ) -> np.ndarray:
         order = "ascending" if ascending else "descending"
         null_placement = {"last": "at_end", "first": "at_start"}.get(na_position, None)
-        if null_placement is None or pa_version_under7p0:
-            # Although pc.array_sort_indices exists in version 6
-            # there's a bug that affects the pa.ChunkedArray backing
-            # https://issues.apache.org/jira/browse/ARROW-12042
-            fallback_performancewarning("7")
-            return super().argsort(
-                ascending=ascending, kind=kind, na_position=na_position
-            )
+        if null_placement is None:
+            raise ValueError(f"invalid na_position: {na_position}")
 
         result = pc.array_sort_indices(
             self._data, order=order, null_placement=null_placement
@@ -640,9 +634,8 @@ def fillna(
         if limit is not None:
             return super().fillna(value=value, method=method, limit=limit)
 
-        if method is not None and pa_version_under7p0:
-            # fill_null_{forward|backward} added in pyarrow 7.0
-            fallback_performancewarning(version="7")
+        if method is not None:
+            fallback_performancewarning()
             return super().fillna(value=value, method=method, limit=limit)
 
         if is_array_like(value):

diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
@@ -4,9 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under7p0
-from pandas.errors import PerformanceWarning
-
 import pandas as pd
 from pandas import (
     DatetimeIndex,
@@ -48,16 +45,8 @@ def test_value_counts(index_or_series_obj):
     # TODO(GH#32514): Order of entries with the same count is inconsistent
     #  on CI (gh-32449)
     if obj.duplicated().any():
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
-        ):
-            result = result.sort_index()
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
-        ):
-            expected = expected.sort_index()
+        result = result.sort_index()
+        expected = expected.sort_index()
     tm.assert_series_equal(result, expected)
 
 
@@ -97,16 +86,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
     if obj.duplicated().any():
         # TODO(GH#32514):
         #  Order of entries with the same count is inconsistent on CI (gh-32449)
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
-        ):
-            expected = expected.sort_index()
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
-        ):
-            result = result.sort_index()
+        expected = expected.sort_index()
+        result = result.sort_index()
 
     if not isinstance(result.dtype, np.dtype):
         # i.e IntegerDtype
@@ -119,16 +100,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
     if obj.duplicated().any():
         # TODO(GH#32514):
         #  Order of entries with the same count is inconsistent on CI (gh-32449)
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
-        ):
-            expected = expected.sort_index()
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
-        ):
-            result = result.sort_index()
+        expected = expected.sort_index()
+        result = result.sort_index()
     tm.assert_series_equal(result, expected)
 
 

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -59,10 +59,6 @@
 
 from pandas.core.arrays.arrow.dtype import ArrowDtype  # isort:skip
 
-pytestmark = pytest.mark.filterwarnings(
-    "ignore:.* may decrease performance. Upgrade to pyarrow >=7 to possibly"
-)
-
 
 @pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str)
 def dtype(request):
@@ -311,14 +307,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
                 )
             )
         elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
-            if pa_version_under7p0:
-                request.node.add_marker(
-                    pytest.mark.xfail(
-                        raises=pa.ArrowNotImplementedError,
-                        reason=f"pyarrow doesn't support string cast from {pa_dtype}",
-                    )
-                )
-            elif is_platform_windows() and is_ci_environment():
+            if is_platform_windows() and is_ci_environment():
                 request.node.add_marker(
                     pytest.mark.xfail(
                         raises=pa.ArrowInvalid,
@@ -561,23 +550,7 @@ def test_groupby_extension_transform(self, data_for_grouping, request):
                     reason=f"{pa_dtype} only has 2 unique possible values",
                 )
             )
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
-            check_stacklevel=False,
-        ):
-            super().test_groupby_extension_transform(data_for_grouping)
-
-    def test_groupby_extension_apply(
-        self, data_for_grouping, groupby_apply_op, request
-    ):
-        pa_dtype = data_for_grouping.dtype.pyarrow_dtype
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
-            check_stacklevel=False,
-        ):
-            super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
+        super().test_groupby_extension_transform(data_for_grouping)
 
     @pytest.mark.parametrize("as_index", [True, False])
     def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
@@ -589,12 +562,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
                     reason=f"{pa_dtype} only has 2 unique possible values",
                 )
             )
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
-            check_stacklevel=False,
-        ):
-            super().test_groupby_extension_agg(as_index, data_for_grouping)
+        super().test_groupby_extension_agg(as_index, data_for_grouping)
 
     def test_in_numeric_groupby(self, data_for_grouping):
         if is_string_dtype(data_for_grouping.dtype):
@@ -712,14 +680,20 @@ def test_view(self, data):
 
 class TestBaseMissing(base.BaseMissingTests):
     def test_fillna_no_op_returns_copy(self, data):
-        with tm.maybe_produces_warning(
-            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
-        ):
-            super().test_fillna_no_op_returns_copy(data)
+        data = data[~data.isna()]
+
+        valid = data[0]
+        result = data.fillna(valid)
+        assert result is not data
+        self.assert_extension_array_equal(result, data)
+        with tm.assert_produces_warning(PerformanceWarning):
+            result = data.fillna(method="backfill")
+        assert result is not data
+        self.assert_extension_array_equal(result, data)
 
     def test_fillna_series_method(self, data_missing, fillna_method):
         with tm.maybe_produces_warning(
-            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+            PerformanceWarning, fillna_method is not None, check_stacklevel=False
         ):
             super().test_fillna_series_method(data_missing, fillna_method)
 
@@ -797,12 +771,6 @@ def test_invert(self, data, request):
 
 
 class TestBaseMethods(base.BaseMethodsTests):
-    def test_argsort_missing_array(self, data_missing_for_sorting):
-        with tm.maybe_produces_warning(
-            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
-        ):
-            super().test_argsort_missing_array(data_missing_for_sorting)
-
     @pytest.mark.parametrize("periods", [1, -2])
     def test_diff(self, data, periods, request):
         pa_dtype = data.dtype.pyarrow_dtype
@@ -817,20 +785,10 @@ def test_diff(self, data, periods, request):
             )
         super().test_diff(data, periods)
 
-    @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
     @pytest.mark.parametrize("dropna", [True, False])
     def test_value_counts(self, all_data, dropna, request):
         super().test_value_counts(all_data, dropna)
 
-    def test_value_counts_with_normalize(self, data, request):
-        pa_dtype = data.dtype.pyarrow_dtype
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
-            check_stacklevel=False,
-        ):
-            super().test_value_counts_with_normalize(data)
-
     def test_argmin_argmax(
         self, data_for_sorting, data_missing_for_sorting, na_value, request
     ):
@@ -878,47 +836,6 @@ def test_argreduce_series(
             data_missing_for_sorting, op_name, skipna, expected
         )
 
-    @pytest.mark.parametrize(
-        "na_position, expected",
-        [
-            ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
-            ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
-        ],
-    )
-    def test_nargsort(self, data_missing_for_sorting, na_position, expected):
-        with tm.maybe_produces_warning(
-            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
-        ):
-            super().test_nargsort(data_missing_for_sorting, na_position, expected)
-
-    @pytest.mark.parametrize("ascending", [True, False])
-    def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
-        with tm.maybe_produces_warning(
-            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
-        ):
-            super().test_sort_values(data_for_sorting, ascending, sort_by_key)
-
-    @pytest.mark.parametrize("ascending", [True, False])
-    def test_sort_values_missing(
-        self, data_missing_for_sorting, ascending, sort_by_key
-    ):
-        with tm.maybe_produces_warning(
-            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
-        ):
-            super().test_sort_values_missing(
-                data_missing_for_sorting, ascending, sort_by_key
-            )
-
-    @pytest.mark.parametrize("ascending", [True, False])
-    def test_sort_values_frame(self, data_for_sorting, ascending, request):
-        pa_dtype = data_for_sorting.dtype.pyarrow_dtype
-        with tm.maybe_produces_warning(
-            PerformanceWarning,
-            pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
-            check_stacklevel=False,
-        ):
-            super().test_sort_values_frame(data_for_sorting, ascending)
-
     def test_factorize(self, data_for_grouping, request):
         pa_dtype = data_for_grouping.dtype.pyarrow_dtype
         if pa.types.is_boolean(pa_dtype):