From e70415150597c6ef08beb61796a36f9c3dfb5bad Mon Sep 17 00:00:00 2001 From: AKHIL-149 Date: Sun, 7 Dec 2025 18:52:17 -0500 Subject: [PATCH 1/4] STY: Use strict arg in zip() in pandas/tests/extension I added strict=True parameter to 11 zip() calls across 7 files in the pandas/tests/extension directory to enforce Ruff rule B905. This ensures that all zipped iterables have equal lengths, making the tests more robust and catching potential bugs early during testing. Files modified: - base/methods.py: 3 zip() calls - date/array.py: 1 zip() call - decimal/array.py: 1 zip() call - json/array.py: 3 zip() calls - test_arrow.py: 1 zip() call - test_categorical.py: 1 zip() call - test_interval.py: 1 zip() call --- pandas/tests/extension/base/methods.py | 6 +++--- pandas/tests/extension/date/array.py | 2 +- pandas/tests/extension/decimal/array.py | 2 +- pandas/tests/extension/json/array.py | 6 +++--- pandas/tests/extension/test_arrow.py | 2 +- pandas/tests/extension/test_categorical.py | 2 +- pandas/tests/extension/test_interval.py | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 084ee61243fd0..7331ff1285f4b 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -350,7 +350,7 @@ def test_combine_le(self, data_repeated): result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series( pd.array( - [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2), strict=True)], dtype=self._combine_le_expected_dtype, ) ) @@ -369,7 +369,7 @@ def test_combine_le(self, data_repeated): def _construct_for_combine_add(self, left, right): if isinstance(right, type(left)): return left._from_sequence( - [a + b for (a, b) in zip(list(left), list(right))], + [a + b for (a, b) in zip(list(left), list(right), strict=True)], dtype=left.dtype, ) else: @@ -627,7 +627,7 @@ def test_repeat(self, data, repeats, as_series, use_numpy): result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats) repeats = [repeats] * 3 if isinstance(repeats, int) else repeats - expected = [x for x, n in zip(arr, repeats) for _ in range(n)] + expected = [x for x, n in zip(arr, repeats, strict=True) for _ in range(n)] expected = type(data)._from_sequence(expected, dtype=data.dtype) if as_series: expected = pd.Series(expected, index=arr.index.repeat(repeats)) diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py index dd275b01e734e..b97c20ad24f32 100644 --- a/pandas/tests/extension/date/array.py +++ b/pandas/tests/extension/date/array.py @@ -162,7 +162,7 @@ def __setitem__(self, key: int | slice | np.ndarray, value: Any) -> None: self._day[key] = value.day def __repr__(self) -> str: - return f"DateArray{list(zip(self._year, self._month, self._day))}" + return f"DateArray{list(zip(self._year, self._month, self._day, strict=True))}" def copy(self) -> DateArray: return DateArray((self._year.copy(), self._month.copy(), self._day.copy())) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index ab6e520fcf0b3..7d055e2143112 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -295,7 +295,7 @@ def convert_values(param): # If the operator is not defined for the underlying objects, # a TypeError should be raised - res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] + res = [op(a, b) for (a, b) in zip(lvalues, rvalues, strict=True)] return np.asarray(res, dtype=bool) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 164e0e517d6aa..bfe5078f09bb1 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -128,7 +128,7 @@ def __getitem__(self, item): item = pd.api.indexers.check_array_indexer(self, item) if is_bool_dtype(item.dtype): return type(self)._from_sequence( - [x for x, m in zip(self, item) if m], dtype=self.dtype + [x for x, m in zip(self, item, strict=True) if m], dtype=self.dtype ) # integer return type(self)([self.data[i] for i in item]) @@ -146,12 +146,12 @@ def __setitem__(self, key, value) -> None: if isinstance(key, np.ndarray) and key.dtype == "bool": # masking - for i, (k, v) in enumerate(zip(key, value)): + for i, (k, v) in enumerate(zip(key, value, strict=True)): if k: assert isinstance(v, self.dtype.type) self.data[i] = v else: - for k, v in zip(key, value): + for k, v in zip(key, value, strict=True): assert isinstance(v, self.dtype.type) self.data[k] = v diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index c34d7667c2cca..ba5d257bd59e4 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -282,7 +282,7 @@ def _construct_for_combine_add(self, left, right): if isinstance(right, type(left)): return left._from_sequence( - [a + b for (a, b) in zip(list(left), list(right))], + [a + b for (a, b) in zip(list(left), list(right), strict=True)], dtype=dtype, ) else: diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 275f8e2f859a4..eb671e74f4b25 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -126,7 +126,7 @@ def test_combine_add(self, data_repeated): s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 + x2) expected = pd.Series( - [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))] + [a + b for (a, b) in zip(list(orig_data1), list(orig_data2), strict=True)] ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index ada34e7ace680..5377b7058a3c9 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -34,7 +34,7 @@ def make_data(n: int): left_array = np.random.default_rng(2).uniform(size=n).cumsum() right_array = left_array + np.random.default_rng(2).uniform(size=n) - return [Interval(left, right) for left, right in zip(left_array, right_array)] + return [Interval(left, right) for left, right in zip(left_array, right_array, strict=True)] @pytest.fixture From a001b0f7b748b600f50eca9e55c39d17551f338b Mon Sep 17 00:00:00 2001 From: AKHIL-149 Date: Sun, 7 Dec 2025 19:34:04 -0500 Subject: [PATCH 2/4] FIX: Use strict=False for zip with itertools.cycle I changed two zip() calls in json/array.py from strict=True to strict=False because the 'value' variable can be an itertools.cycle() iterator (line 145), which is infinite. You cannot use strict=True with infinite iterators as they don't have a defined length to compare. Lines changed: - Line 149: enumerate(zip(key, value, strict=False)) - Line 154: zip(key, value, strict=False) --- pandas/tests/extension/json/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index bfe5078f09bb1..1878fac1b8111 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -146,12 +146,12 @@ def __setitem__(self, key, value) -> None: if isinstance(key, np.ndarray) and key.dtype == "bool": # masking - for i, (k, v) in enumerate(zip(key, value, strict=True)): + for i, (k, v) in enumerate(zip(key, value, strict=False)): if k: assert isinstance(v, self.dtype.type) self.data[i] = v else: - for k, v in zip(key, value, strict=True): + for k, v in zip(key, value, strict=False): assert isinstance(v, self.dtype.type) self.data[k] = v From b147c67d0c779d6b6007321c73e16f6d819aa409 Mon Sep 17 00:00:00 2001 From: AKHIL-149 Date: Sun, 7 Dec 2025 19:44:15 -0500 Subject: [PATCH 3/4] STY: Fix line length to comply with E501 I broke long lines into multiple lines to comply with the 88-character line length limit enforced by ruff. Files fixed: - base/methods.py:353 - Split list comprehension with zip across multiple lines - test_interval.py:37 - Split list comprehension with zip across multiple lines This addresses the pre-commit.ci E501 failures. --- pandas/tests/extension/base/methods.py | 7 ++++++- pandas/tests/extension/test_interval.py | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 7331ff1285f4b..009dbdc8f61bb 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -350,7 +350,12 @@ def test_combine_le(self, data_repeated): result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series( pd.array( - [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2), strict=True)], + [ + a <= b + for (a, b) in zip( + list(orig_data1), list(orig_data2), strict=True + ) + ], dtype=self._combine_le_expected_dtype, ) ) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 5377b7058a3c9..47bc26ba4a766 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -34,7 +34,10 @@ def make_data(n: int): left_array = np.random.default_rng(2).uniform(size=n).cumsum() right_array = left_array + np.random.default_rng(2).uniform(size=n) - return [Interval(left, right) for left, right in zip(left_array, right_array, strict=True)] + return [ + Interval(left, right) + for left, right in zip(left_array, right_array, strict=True) + ] @pytest.fixture From ebfb4d5b41045173055408340ab50314e5a6ac0a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Dec 2025 03:05:36 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/tests/extension/base/methods.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 009dbdc8f61bb..a2bf621a04669 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -352,9 +352,7 @@ def test_combine_le(self, data_repeated): pd.array( [ a <= b - for (a, b) in zip( - list(orig_data1), list(orig_data2), strict=True - ) + for (a, b) in zip(list(orig_data1), list(orig_data2), strict=True) ], dtype=self._combine_le_expected_dtype, )