STY: Enable ruff perflint (#54236)

* some check implmented * Include rules * Enable perflint * type
pandas-dev · Jul 25, 2023 · 2faf709 · 2faf709
1 parent b3b1beb
commit 2faf709
Show file tree

Hide file tree

Showing 20 changed files with 102 additions and 111 deletions.
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -348,10 +348,8 @@
     methods = [
         x for x in dir(klass) if not x.startswith("_") or x in ("__iter__", "__array__")
     ]
-
-    for method in methods:
-        # ... and each of its public methods
-        moved_api_pages.append((f"{old}.{method}", f"{new}.{method}"))
+    # ... and each of its public methods
+    moved_api_pages.extend((f"{old}.{method}", f"{new}.{method}") for method in methods)
 
 if include_api:
     html_additional_pages = {

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5815,21 +5815,21 @@ def set_index(
             # GH 49473 Use "lazy copy" with Copy-on-Write
             frame = self.copy(deep=None)
 
-        arrays = []
+        arrays: list[Index] = []
         names: list[Hashable] = []
         if append:
             names = list(self.index.names)
             if isinstance(self.index, MultiIndex):
-                for i in range(self.index.nlevels):
-                    arrays.append(self.index._get_level_values(i))
+                arrays.extend(
+                    self.index._get_level_values(i) for i in range(self.index.nlevels)
+                )
             else:
                 arrays.append(self.index)
 
         to_remove: list[Hashable] = []
         for col in keys:
             if isinstance(col, MultiIndex):
-                for n in range(col.nlevels):
-                    arrays.append(col._get_level_values(n))
+                arrays.extend(col._get_level_values(n) for n in range(col.nlevels))
                 names.extend(col.names)
             elif isinstance(col, (Index, Series)):
                 # if Index then not MultiIndex (treated above)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -345,10 +345,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
             arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg]
         else:
             # list of functions / function names
-            columns = []
-            for f in arg:
-                columns.append(com.get_callable_name(f) or f)
-
+            columns = (com.get_callable_name(f) or f for f in arg)
             arg = zip(columns, arg)
 
         results: dict[base.OutputKey, DataFrame | Series] = {}

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -458,9 +458,7 @@ def get_bool_data(self, copy: bool = False) -> Self:
 
             elif blk.is_object:
                 nbs = blk._split()
-                for nb in nbs:
-                    if nb.is_bool:
-                        new_blocks.append(nb)
+                new_blocks.extend(nb for nb in nbs if nb.is_bool)
 
         return self._combine(new_blocks, copy)
 

diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py
@@ -202,10 +202,12 @@ def _select_data(self) -> DataFrame:
 def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]:
     """Set a convenient order for rows for display."""
     names: list[Hashable] = []
+    seen_names: set[Hashable] = set()
     ldesc_indexes = sorted((x.index for x in ldesc), key=len)
     for idxnames in ldesc_indexes:
         for name in idxnames:
-            if name not in names:
+            if name not in seen_names:
+                seen_names.add(name)
                 names.append(name)
     return names
 

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -467,8 +467,7 @@ def _unstack_multiple(
         new_names = [data.columns.name] + cnames
 
         new_codes = [unstcols.codes[0]]
-        for rec in recons_codes:
-            new_codes.append(rec.take(unstcols.codes[-1]))
+        new_codes.extend(rec.take(unstcols.codes[-1]) for rec in recons_codes)
 
     new_columns = MultiIndex(
         levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
@@ -154,8 +154,7 @@ def get_sheet_data(
                 # add blank rows to our table
                 table.extend([[self.empty_value]] * empty_rows)
                 empty_rows = 0
-                for _ in range(row_repeat):
-                    table.append(table_row)
+                table.extend(table_row for _ in range(row_repeat))
             if file_rows_needed is not None and len(table) >= file_rows_needed:
                 break
 

diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
@@ -44,7 +44,6 @@ def adjoin(space: int, *lists: list[str], **kwargs) -> str:
     strlen = kwargs.pop("strlen", len)
     justfunc = kwargs.pop("justfunc", justify)
 
-    out_lines = []
     newLists = []
     lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
     # not the last one
@@ -55,9 +54,7 @@ def adjoin(space: int, *lists: list[str], **kwargs) -> str:
         nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
         newLists.append(nl)
     toJoin = zip(*newLists)
-    for lines in toJoin:
-        out_lines.append("".join(lines))
-    return "\n".join(out_lines)
+    return "\n".join("".join(lines) for lines in toJoin)
 
 
 def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -865,15 +865,16 @@ def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
         filtered_lines : list of list of Scalars
             The same array of lines with the "empty" ones removed.
         """
-        ret = []
-        for line in lines:
-            # Remove empty lines and lines with only one whitespace value
+        # Remove empty lines and lines with only one whitespace value
+        ret = [
+            line
+            for line in lines
             if (
                 len(line) > 1
                 or len(line) == 1
                 and (not isinstance(line[0], str) or line[0].strip())
-            ):
-                ret.append(line)
+            )
+        ]
         return ret
 
     def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1313,8 +1313,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
     multi_index_list = []
     for k, v in grp_col_dict.items():
         if isinstance(v, list):
-            for value in v:
-                multi_index_list.append([k, value])
+            multi_index_list.extend([k, value] for value in v)
         else:
             multi_index_list.append([k, v])
     multi_index = MultiIndex.from_tuples(tuple(multi_index_list))

diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py
@@ -188,12 +188,12 @@ def test_against_head_and_tail(arg, method, simulated):
         result = grouped._positional_selector[:arg]
 
         if simulated:
-            indices = []
-            for j in range(size):
-                for i in range(n_groups):
-                    if j * n_groups + i < n_groups * n_rows_per_group:
-                        indices.append(j * n_groups + i)
-
+            indices = [
+                j * n_groups + i
+                for j in range(size)
+                for i in range(n_groups)
+                if j * n_groups + i < n_groups * n_rows_per_group
+            ]
             expected = df.iloc[indices]
 
         else:
@@ -203,12 +203,12 @@ def test_against_head_and_tail(arg, method, simulated):
         result = grouped._positional_selector[-arg:]
 
         if simulated:
-            indices = []
-            for j in range(size):
-                for i in range(n_groups):
-                    if (n_rows_per_group + j - size) * n_groups + i >= 0:
-                        indices.append((n_rows_per_group + j - size) * n_groups + i)
-
+            indices = [
+                (n_rows_per_group + j - size) * n_groups + i
+                for j in range(size)
+                for i in range(n_groups)
+                if (n_rows_per_group + j - size) * n_groups + i >= 0
+            ]
             expected = df.iloc[indices]
 
         else:

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1366,9 +1366,9 @@ def test_to_jsonl(self):
 
     # TODO: there is a near-identical test for pytables; can we share?
     @pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError)
-    def test_latin_encoding(self):
-        # GH 13774
-        values = [
+    @pytest.mark.parametrize(
+        "val",
+        [
             [b"E\xc9, 17", b"", b"a", b"b", b"c"],
             [b"E\xc9, 17", b"a", b"b", b"c"],
             [b"EE, 17", b"", b"a", b"b", b"c"],
@@ -1378,26 +1378,20 @@ def test_latin_encoding(self):
             [b"A\xf8\xfc", b"", b"a", b"b", b"c"],
             [np.nan, b"", b"b", b"c"],
             [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
-        ]
-
-        values = [
-            [x.decode("latin-1") if isinstance(x, bytes) else x for x in y]
-            for y in values
-        ]
-
-        examples = []
-        for dtype in ["category", object]:
-            for val in values:
-                examples.append(Series(val, dtype=dtype))
-
-        def roundtrip(s, encoding="latin-1"):
-            with tm.ensure_clean("test.json") as path:
-                s.to_json(path, encoding=encoding)
-                retr = read_json(StringIO(path), encoding=encoding)
-                tm.assert_series_equal(s, retr, check_categorical=False)
-
-        for s in examples:
-            roundtrip(s)
+        ],
+    )
+    @pytest.mark.parametrize("dtype", ["category", object])
+    def test_latin_encoding(self, dtype, val):
+        # GH 13774
+        ser = Series(
+            [x.decode("latin-1") if isinstance(x, bytes) else x for x in val],
+            dtype=dtype,
+        )
+        encoding = "latin-1"
+        with tm.ensure_clean("test.json") as path:
+            ser.to_json(path, encoding=encoding)
+            retr = read_json(StringIO(path), encoding=encoding)
+            tm.assert_series_equal(ser, retr, check_categorical=False)
 
     def test_data_frame_size_after_to_json(self):
         # GH15344

diff --git a/pandas/tests/series/accessors/test_cat_accessor.py b/pandas/tests/series/accessors/test_cat_accessor.py
@@ -193,10 +193,9 @@ def test_dt_accessor_api_for_categorical(self, idx):
         ]
 
         func_defs = [(fname, (), {}) for fname in func_names]
-
-        for f_def in special_func_defs:
-            if f_def[0] in dir(ser.dt):
-                func_defs.append(f_def)
+        func_defs.extend(
+            f_def for f_def in special_func_defs if f_def[0] in dir(ser.dt)
+        )
 
         for func, args, kwargs in func_defs:
             with warnings.catch_warnings():

diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
@@ -297,14 +297,14 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort):
 
         for k, rval in rdict.items():
             if k not in ldict:
-                for rv in rval:
-                    vals.append(
-                        k
-                        + (
-                            np.nan,
-                            rv,
-                        )
+                vals.extend(
+                    k
+                    + (
+                        np.nan,
+                        rv,
                     )
+                    for rv in rval
+                )
 
         def align(df):
             df = df.sort_values(df.columns.tolist())

diff --git a/pyproject.toml b/pyproject.toml
@@ -240,12 +240,15 @@ select = [
   # Ruff-specific rules
   "RUF",
   # flake8-bandit: exec-builtin
-  "S102"
+  "S102",
+  # Perflint
+  "PERF",
 ]
 
 ignore = [
+  ### Intentionally disabled
   # space before : (needed for how black formats slicing)
-  # "E203",  # not yet implemented
+  "E203",
   # module level import not at top of file
   "E402",
   # do not assign a lambda expression, use a def
@@ -302,8 +305,13 @@ ignore = [
   # "PYI027",  # not yet implemented
   # while int | float can be shortened to float, the former is more explicit
   # "PYI041",  # not yet implemented
+  # incorrect-dict-iterator, flags valid Series.items usage
+  "PERF102",
+  # try-except-in-loop, becomes useless in Python 3.11
+  "PERF203",
 
-  # Additional checks that don't pass yet
+
+  ### TODO: Enable gradually
   # Useless statement
   "B018",
   # Within an except clause, raise exceptions with ...

diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py
@@ -42,9 +42,11 @@ def visit(tree: ast.Module) -> dict[int, list[int]]:
             if isinstance(value, ast.AST):
                 nodes.append((next_in_annotation, value))
             elif isinstance(value, list):
-                for value in reversed(value):
-                    if isinstance(value, ast.AST):
-                        nodes.append((next_in_annotation, value))
+                nodes.extend(
+                    (next_in_annotation, value)
+                    for value in reversed(value)
+                    if isinstance(value, ast.AST)
+                )
 
     return to_replace
 

diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
@@ -271,15 +271,15 @@ def pandas_validate(func_name: str):
         )
 
     if doc.see_also:
-        for rel_name in doc.see_also:
-            if rel_name.startswith("pandas."):
-                result["errors"].append(
-                    pandas_error(
-                        "SA05",
-                        reference_name=rel_name,
-                        right_reference=rel_name[len("pandas.") :],
-                    )
-                )
+        result["errors"].extend(
+            pandas_error(
+                "SA05",
+                reference_name=rel_name,
+                right_reference=rel_name[len("pandas.") :],
+            )
+            for rel_name in doc.see_also
+            if rel_name.startswith("pandas.")
+        )
 
     result["examples_errs"] = ""
     if doc.examples:
@@ -300,11 +300,11 @@ def pandas_validate(func_name: str):
                 )
             )
         examples_source_code = "".join(doc.examples_source_code)
-        for wrong_import in ("numpy", "pandas"):
-            if f"import {wrong_import}" in examples_source_code:
-                result["errors"].append(
-                    pandas_error("EX04", imported_library=wrong_import)
-                )
+        result["errors"].extend(
+            pandas_error("EX04", imported_library=wrong_import)
+            for wrong_import in ("numpy", "pandas")
+            if f"import {wrong_import}" in examples_source_code
+        )
 
     if doc.non_hyphenated_array_like():
         result["errors"].append(pandas_error("GL05"))

diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
@@ -392,9 +392,11 @@ def nodefault_used_not_only_for_typing(file_obj: IO[str]) -> Iterable[tuple[int,
             if isinstance(value, ast.AST):
                 nodes.append((next_in_annotation, value))
             elif isinstance(value, list):
-                for value in reversed(value):
-                    if isinstance(value, ast.AST):
-                        nodes.append((next_in_annotation, value))
+                nodes.extend(
+                    (next_in_annotation, value)
+                    for value in reversed(value)
+                    if isinstance(value, ast.AST)
+                )
 
 
 def main(