Skip to content

Commit

Permalink
STY: Enable ruff perflint (#54236)
Browse files Browse the repository at this point in the history
* some check implmented

* Include rules

* Enable perflint

* type
  • Loading branch information
mroeschke committed Jul 25, 2023
1 parent b3b1beb commit 2faf709
Show file tree
Hide file tree
Showing 20 changed files with 102 additions and 111 deletions.
6 changes: 2 additions & 4 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,10 +348,8 @@
methods = [
x for x in dir(klass) if not x.startswith("_") or x in ("__iter__", "__array__")
]

for method in methods:
# ... and each of its public methods
moved_api_pages.append((f"{old}.{method}", f"{new}.{method}"))
# ... and each of its public methods
moved_api_pages.extend((f"{old}.{method}", f"{new}.{method}") for method in methods)

if include_api:
html_additional_pages = {
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5815,21 +5815,21 @@ def set_index(
# GH 49473 Use "lazy copy" with Copy-on-Write
frame = self.copy(deep=None)

arrays = []
arrays: list[Index] = []
names: list[Hashable] = []
if append:
names = list(self.index.names)
if isinstance(self.index, MultiIndex):
for i in range(self.index.nlevels):
arrays.append(self.index._get_level_values(i))
arrays.extend(
self.index._get_level_values(i) for i in range(self.index.nlevels)
)
else:
arrays.append(self.index)

to_remove: list[Hashable] = []
for col in keys:
if isinstance(col, MultiIndex):
for n in range(col.nlevels):
arrays.append(col._get_level_values(n))
arrays.extend(col._get_level_values(n) for n in range(col.nlevels))
names.extend(col.names)
elif isinstance(col, (Index, Series)):
# if Index then not MultiIndex (treated above)
Expand Down
5 changes: 1 addition & 4 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,10 +345,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg]
else:
# list of functions / function names
columns = []
for f in arg:
columns.append(com.get_callable_name(f) or f)

columns = (com.get_callable_name(f) or f for f in arg)
arg = zip(columns, arg)

results: dict[base.OutputKey, DataFrame | Series] = {}
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,9 +458,7 @@ def get_bool_data(self, copy: bool = False) -> Self:

elif blk.is_object:
nbs = blk._split()
for nb in nbs:
if nb.is_bool:
new_blocks.append(nb)
new_blocks.extend(nb for nb in nbs if nb.is_bool)

return self._combine(new_blocks, copy)

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/methods/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,12 @@ def _select_data(self) -> DataFrame:
def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]:
"""Set a convenient order for rows for display."""
names: list[Hashable] = []
seen_names: set[Hashable] = set()
ldesc_indexes = sorted((x.index for x in ldesc), key=len)
for idxnames in ldesc_indexes:
for name in idxnames:
if name not in names:
if name not in seen_names:
seen_names.add(name)
names.append(name)
return names

Expand Down
3 changes: 1 addition & 2 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,7 @@ def _unstack_multiple(
new_names = [data.columns.name] + cnames

new_codes = [unstcols.codes[0]]
for rec in recons_codes:
new_codes.append(rec.take(unstcols.codes[-1]))
new_codes.extend(rec.take(unstcols.codes[-1]) for rec in recons_codes)

new_columns = MultiIndex(
levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
Expand Down
3 changes: 1 addition & 2 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,7 @@ def get_sheet_data(
# add blank rows to our table
table.extend([[self.empty_value]] * empty_rows)
empty_rows = 0
for _ in range(row_repeat):
table.append(table_row)
table.extend(table_row for _ in range(row_repeat))
if file_rows_needed is not None and len(table) >= file_rows_needed:
break

Expand Down
5 changes: 1 addition & 4 deletions pandas/io/formats/printing.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def adjoin(space: int, *lists: list[str], **kwargs) -> str:
strlen = kwargs.pop("strlen", len)
justfunc = kwargs.pop("justfunc", justify)

out_lines = []
newLists = []
lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
# not the last one
Expand All @@ -55,9 +54,7 @@ def adjoin(space: int, *lists: list[str], **kwargs) -> str:
nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
newLists.append(nl)
toJoin = zip(*newLists)
for lines in toJoin:
out_lines.append("".join(lines))
return "\n".join(out_lines)
return "\n".join("".join(lines) for lines in toJoin)


def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:
Expand Down
11 changes: 6 additions & 5 deletions pandas/io/parsers/python_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,15 +865,16 @@ def _remove_empty_lines(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
filtered_lines : list of list of Scalars
The same array of lines with the "empty" ones removed.
"""
ret = []
for line in lines:
# Remove empty lines and lines with only one whitespace value
# Remove empty lines and lines with only one whitespace value
ret = [
line
for line in lines
if (
len(line) > 1
or len(line) == 1
and (not isinstance(line[0], str) or line[0].strip())
):
ret.append(line)
)
]
return ret

def _check_thousands(self, lines: list[list[Scalar]]) -> list[list[Scalar]]:
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1313,8 +1313,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
multi_index_list = []
for k, v in grp_col_dict.items():
if isinstance(v, list):
for value in v:
multi_index_list.append([k, value])
multi_index_list.extend([k, value] for value in v)
else:
multi_index_list.append([k, v])
multi_index = MultiIndex.from_tuples(tuple(multi_index_list))
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/groupby/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,12 @@ def test_against_head_and_tail(arg, method, simulated):
result = grouped._positional_selector[:arg]

if simulated:
indices = []
for j in range(size):
for i in range(n_groups):
if j * n_groups + i < n_groups * n_rows_per_group:
indices.append(j * n_groups + i)

indices = [
j * n_groups + i
for j in range(size)
for i in range(n_groups)
if j * n_groups + i < n_groups * n_rows_per_group
]
expected = df.iloc[indices]

else:
Expand All @@ -203,12 +203,12 @@ def test_against_head_and_tail(arg, method, simulated):
result = grouped._positional_selector[-arg:]

if simulated:
indices = []
for j in range(size):
for i in range(n_groups):
if (n_rows_per_group + j - size) * n_groups + i >= 0:
indices.append((n_rows_per_group + j - size) * n_groups + i)

indices = [
(n_rows_per_group + j - size) * n_groups + i
for j in range(size)
for i in range(n_groups)
if (n_rows_per_group + j - size) * n_groups + i >= 0
]
expected = df.iloc[indices]

else:
Expand Down
40 changes: 17 additions & 23 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1366,9 +1366,9 @@ def test_to_jsonl(self):

# TODO: there is a near-identical test for pytables; can we share?
@pytest.mark.xfail(reason="GH#13774 encoding kwarg not supported", raises=TypeError)
def test_latin_encoding(self):
# GH 13774
values = [
@pytest.mark.parametrize(
"val",
[
[b"E\xc9, 17", b"", b"a", b"b", b"c"],
[b"E\xc9, 17", b"a", b"b", b"c"],
[b"EE, 17", b"", b"a", b"b", b"c"],
Expand All @@ -1378,26 +1378,20 @@ def test_latin_encoding(self):
[b"A\xf8\xfc", b"", b"a", b"b", b"c"],
[np.nan, b"", b"b", b"c"],
[b"A\xf8\xfc", np.nan, b"", b"b", b"c"],
]

values = [
[x.decode("latin-1") if isinstance(x, bytes) else x for x in y]
for y in values
]

examples = []
for dtype in ["category", object]:
for val in values:
examples.append(Series(val, dtype=dtype))

def roundtrip(s, encoding="latin-1"):
with tm.ensure_clean("test.json") as path:
s.to_json(path, encoding=encoding)
retr = read_json(StringIO(path), encoding=encoding)
tm.assert_series_equal(s, retr, check_categorical=False)

for s in examples:
roundtrip(s)
],
)
@pytest.mark.parametrize("dtype", ["category", object])
def test_latin_encoding(self, dtype, val):
# GH 13774
ser = Series(
[x.decode("latin-1") if isinstance(x, bytes) else x for x in val],
dtype=dtype,
)
encoding = "latin-1"
with tm.ensure_clean("test.json") as path:
ser.to_json(path, encoding=encoding)
retr = read_json(StringIO(path), encoding=encoding)
tm.assert_series_equal(ser, retr, check_categorical=False)

def test_data_frame_size_after_to_json(self):
# GH15344
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/series/accessors/test_cat_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,9 @@ def test_dt_accessor_api_for_categorical(self, idx):
]

func_defs = [(fname, (), {}) for fname in func_names]

for f_def in special_func_defs:
if f_def[0] in dir(ser.dt):
func_defs.append(f_def)
func_defs.extend(
f_def for f_def in special_func_defs if f_def[0] in dir(ser.dt)
)

for func, args, kwargs in func_defs:
with warnings.catch_warnings():
Expand Down
14 changes: 7 additions & 7 deletions pandas/tests/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,14 +297,14 @@ def test_int64_overflow_one_to_many_none_match(self, how, sort):

for k, rval in rdict.items():
if k not in ldict:
for rv in rval:
vals.append(
k
+ (
np.nan,
rv,
)
vals.extend(
k
+ (
np.nan,
rv,
)
for rv in rval
)

def align(df):
df = df.sort_values(df.columns.tolist())
Expand Down
14 changes: 11 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,15 @@ select = [
# Ruff-specific rules
"RUF",
# flake8-bandit: exec-builtin
"S102"
"S102",
# Perflint
"PERF",
]

ignore = [
### Intentionally disabled
# space before : (needed for how black formats slicing)
# "E203", # not yet implemented
"E203",
# module level import not at top of file
"E402",
# do not assign a lambda expression, use a def
Expand Down Expand Up @@ -302,8 +305,13 @@ ignore = [
# "PYI027", # not yet implemented
# while int | float can be shortened to float, the former is more explicit
# "PYI041", # not yet implemented
# incorrect-dict-iterator, flags valid Series.items usage
"PERF102",
# try-except-in-loop, becomes useless in Python 3.11
"PERF203",

# Additional checks that don't pass yet

### TODO: Enable gradually
# Useless statement
"B018",
# Within an except clause, raise exceptions with ...
Expand Down
8 changes: 5 additions & 3 deletions scripts/no_bool_in_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ def visit(tree: ast.Module) -> dict[int, list[int]]:
if isinstance(value, ast.AST):
nodes.append((next_in_annotation, value))
elif isinstance(value, list):
for value in reversed(value):
if isinstance(value, ast.AST):
nodes.append((next_in_annotation, value))
nodes.extend(
(next_in_annotation, value)
for value in reversed(value)
if isinstance(value, ast.AST)
)

return to_replace

Expand Down
28 changes: 14 additions & 14 deletions scripts/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,15 +271,15 @@ def pandas_validate(func_name: str):
)

if doc.see_also:
for rel_name in doc.see_also:
if rel_name.startswith("pandas."):
result["errors"].append(
pandas_error(
"SA05",
reference_name=rel_name,
right_reference=rel_name[len("pandas.") :],
)
)
result["errors"].extend(
pandas_error(
"SA05",
reference_name=rel_name,
right_reference=rel_name[len("pandas.") :],
)
for rel_name in doc.see_also
if rel_name.startswith("pandas.")
)

result["examples_errs"] = ""
if doc.examples:
Expand All @@ -300,11 +300,11 @@ def pandas_validate(func_name: str):
)
)
examples_source_code = "".join(doc.examples_source_code)
for wrong_import in ("numpy", "pandas"):
if f"import {wrong_import}" in examples_source_code:
result["errors"].append(
pandas_error("EX04", imported_library=wrong_import)
)
result["errors"].extend(
pandas_error("EX04", imported_library=wrong_import)
for wrong_import in ("numpy", "pandas")
if f"import {wrong_import}" in examples_source_code
)

if doc.non_hyphenated_array_like():
result["errors"].append(pandas_error("GL05"))
Expand Down
8 changes: 5 additions & 3 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,11 @@ def nodefault_used_not_only_for_typing(file_obj: IO[str]) -> Iterable[tuple[int,
if isinstance(value, ast.AST):
nodes.append((next_in_annotation, value))
elif isinstance(value, list):
for value in reversed(value):
if isinstance(value, ast.AST):
nodes.append((next_in_annotation, value))
nodes.extend(
(next_in_annotation, value)
for value in reversed(value)
if isinstance(value, ast.AST)
)


def main(
Expand Down

0 comments on commit 2faf709

Please sign in to comment.