Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TEST-#6885: Switch to black>=24.1.0 #6887

Merged
merged 3 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/python-only
- run: pip install black isort>=5.12
- run: pip install black>=24.1.0 isort>=5.12
# NOTE: keep the black command here in sync with the pre-commit hook in
# /contributing/pre-commit
- run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py
Expand Down
2 changes: 1 addition & 1 deletion environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ dependencies:
- pytest-xdist>=3.2.0

# code linters
- black>=23.1.0
- black>=24.1.0
- flake8>=6.0.0
- flake8-no-implicit-concat>=0.3.4
- flake8-print>=5.0.0
Expand Down
16 changes: 9 additions & 7 deletions modin/config/pubsub.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,15 @@ class ExactStr(str):
for key_value in value.split(",")
for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
},
normalize=lambda value: value
if isinstance(value, dict)
else {
key: int(val) if val.isdigit() else val
for key_value in str(value).split(",")
for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
},
normalize=lambda value: (
value
if isinstance(value, dict)
else {
key: int(val) if val.isdigit() else val
for key_value in str(value).split(",")
for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
}
),
verify=lambda value: isinstance(value, dict)
or (
isinstance(value, str)
Expand Down
6 changes: 3 additions & 3 deletions modin/config/test/test_envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ def test_hdk_envvar():
# This test is intended to check pyhdk internals. If pyhdk is not available, skip the version check test.
pass

os.environ[
cfg.HdkLaunchParameters.varname
] = "enable_union=4,enable_thrift_logs=5,enable_lazy_dict_materialization=6"
os.environ[cfg.HdkLaunchParameters.varname] = (
"enable_union=4,enable_thrift_logs=5,enable_lazy_dict_materialization=6"
)
del cfg.HdkLaunchParameters._value
params = cfg.HdkLaunchParameters.get()
assert params["enable_union"] == 4
Expand Down
36 changes: 22 additions & 14 deletions modin/core/dataframe/pandas/dataframe/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1382,18 +1382,22 @@ def from_labels(self) -> "PandasDataframe":
new_row_labels = pandas.RangeIndex(len(self.index))
if self.index.nlevels > 1:
level_names = [
self.index.names[i]
if self.index.names[i] is not None
else "level_{}".format(i)
(
self.index.names[i]
if self.index.names[i] is not None
else "level_{}".format(i)
)
for i in range(self.index.nlevels)
]
else:
level_names = [
self.index.names[0]
if self.index.names[0] is not None
else "index"
if "index" not in self.columns
else "level_{}".format(0)
(
self.index.names[0]
if self.index.names[0] is not None
else (
"index" if "index" not in self.columns else "level_{}".format(0)
)
)
]
names = tuple(level_names) if len(level_names) > 1 else level_names[0]
new_dtypes = self.index.to_frame(name=names).dtypes
Expand Down Expand Up @@ -2924,9 +2928,11 @@ def apply_select_indices(
# `axis` given may have changed, we currently just recompute it.
# TODO Determine lengths from current lengths if `keep_remaining=False`
lengths_objs = {
axis: [len(apply_indices)]
if not keep_remaining
else [self.row_lengths, self.column_widths][axis],
axis: (
[len(apply_indices)]
if not keep_remaining
else [self.row_lengths, self.column_widths][axis]
),
axis ^ 1: [self.row_lengths, self.column_widths][axis ^ 1],
}
return self.__constructor__(
Expand Down Expand Up @@ -3891,9 +3897,11 @@ def join_cols(df, *cols):
# Getting futures for columns of non-empty partitions
cols = [
part.apply(
lambda df: None
if df.attrs.get(skip_on_aligning_flag, False)
else df.columns
lambda df: (
None
if df.attrs.get(skip_on_aligning_flag, False)
else df.columns
)
)._data
for part in result._partitions.flatten()
]
Expand Down
10 changes: 6 additions & 4 deletions modin/core/dataframe/pandas/dataframe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,13 @@ def get_group(grp, key, df):
if len(non_na_rows) == 1:
groups = [
# taking an empty slice for an index's metadata
pandas.DataFrame(index=df.index[:0], columns=df.columns).astype(
df.dtypes
(
pandas.DataFrame(index=df.index[:0], columns=df.columns).astype(
df.dtypes
)
if key != groupby_codes[0]
else non_na_rows
)
if key != groupby_codes[0]
else non_na_rows
for key in group_keys
]
else:
Expand Down
12 changes: 7 additions & 5 deletions modin/core/dataframe/pandas/metadata/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,11 @@ def _merge_dtypes(
# otherwise, it may indicate missing columns that this 'val' has no info about,
# meaning that we shouldn't try computing a new dtype for this column,
# so marking it as 'unknown'
i: np.dtype(float)
if val._know_all_names and val._remaining_dtype is None
else "unknown"
i: (
np.dtype(float)
if val._know_all_names and val._remaining_dtype is None
else "unknown"
)
},
inplace=True,
)
Expand Down Expand Up @@ -732,8 +734,8 @@ def lazy_get(self, ids: list, numeric_index: bool = False) -> "ModinDtypes":
elif callable(self._value):
new_self = self.copy()
old_value = new_self._value
new_self._value = (
lambda: old_value().iloc[ids] if numeric_index else old_value()[ids]
new_self._value = lambda: (
old_value().iloc[ids] if numeric_index else old_value()[ids]
)
return new_self
ErrorMessage.catch_bugs_and_request_email(
Expand Down
88 changes: 53 additions & 35 deletions modin/core/dataframe/pandas/partitioning/partition_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,13 +383,15 @@ def get_partitions(index):

new_partitions = np.array(
[
partitions_for_apply[i]
if i not in left_indices
else cls._apply_func_to_list_of_partitions_broadcast(
apply_func,
partitions_for_apply[i],
internal_indices=left_indices[i],
**get_partitions(i),
(
partitions_for_apply[i]
if i not in left_indices
else cls._apply_func_to_list_of_partitions_broadcast(
apply_func,
partitions_for_apply[i],
internal_indices=left_indices[i],
**get_partitions(i),
)
)
for i in range(len(partitions_for_apply))
if i in left_indices or keep_remaining
Expand Down Expand Up @@ -946,15 +948,19 @@ def update_bar(f):
return parts
else:
row_lengths = [
row_chunksize
if i + row_chunksize < len(df)
else len(df) % row_chunksize or row_chunksize
(
row_chunksize
if i + row_chunksize < len(df)
else len(df) % row_chunksize or row_chunksize
)
for i in range(0, len(df), row_chunksize)
]
col_widths = [
col_chunksize
if i + col_chunksize < len(df.columns)
else len(df.columns) % col_chunksize or col_chunksize
(
col_chunksize
if i + col_chunksize < len(df.columns)
else len(df.columns) % col_chunksize or col_chunksize
)
for i in range(0, len(df.columns), col_chunksize)
]
return parts, row_lengths, col_widths
Expand Down Expand Up @@ -1206,14 +1212,18 @@ def apply_func_to_select_indices(
else:
result = np.array(
[
partitions_for_apply[i]
if i not in indices
else cls._apply_func_to_list_of_partitions(
func,
partitions_for_apply[i],
func_dict={
idx: dict_func[idx] for idx in indices[i] if idx >= 0
},
(
partitions_for_apply[i]
if i not in indices
else cls._apply_func_to_list_of_partitions(
func,
partitions_for_apply[i],
func_dict={
idx: dict_func[idx]
for idx in indices[i]
if idx >= 0
},
)
)
for i in range(len(partitions_for_apply))
]
Expand All @@ -1239,10 +1249,14 @@ def apply_func_to_select_indices(
# remaining (non-updated) blocks in their original position.
result = np.array(
[
partitions_for_apply[i]
if i not in indices
else cls._apply_func_to_list_of_partitions(
func, partitions_for_apply[i], internal_indices=indices[i]
(
partitions_for_apply[i]
if i not in indices
else cls._apply_func_to_list_of_partitions(
func,
partitions_for_apply[i],
internal_indices=indices[i],
)
)
for i in range(len(partitions_for_apply))
]
Expand Down Expand Up @@ -1331,12 +1345,14 @@ def apply_func_to_select_indices_along_full_axis(
else:
result = np.array(
[
partitions_for_remaining[i]
if i not in indices
else cls._apply_func_to_list_of_partitions(
preprocessed_func,
partitions_for_apply[i],
func_dict={idx: dict_func[idx] for idx in indices[i]},
(
partitions_for_remaining[i]
if i not in indices
else cls._apply_func_to_list_of_partitions(
preprocessed_func,
partitions_for_apply[i],
func_dict={idx: dict_func[idx] for idx in indices[i]},
)
)
for i in range(len(partitions_for_apply))
]
Expand All @@ -1354,10 +1370,12 @@ def apply_func_to_select_indices_along_full_axis(
# See notes in `apply_func_to_select_indices`
result = np.array(
[
partitions_for_remaining[i]
if i not in indices
else partitions_for_apply[i].apply(
preprocessed_func, internal_indices=indices[i]
(
partitions_for_remaining[i]
if i not in indices
else partitions_for_apply[i].apply(
preprocessed_func, internal_indices=indices[i]
)
)
for i in range(len(partitions_for_remaining))
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,11 @@ def deploy_splitting_func(
*partitions,
),
f_kwargs={"extract_metadata": extract_metadata},
num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
if extract_metadata
else num_splits,
num_returns=(
num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
if extract_metadata
else num_splits
),
pure=False,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,11 @@ def deploy_splitting_func(
extract_metadata=False,
):
return _deploy_ray_func.options(
num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
if extract_metadata
else num_splits,
num_returns=(
num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
if extract_metadata
else num_splits
),
).remote(
cls._get_deploy_split_func(),
*f_args,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,11 @@ def deploy_splitting_func(
extract_metadata=False,
):
return _deploy_unidist_func.options(
num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
if extract_metadata
else num_splits,
num_returns=(
num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
if extract_metadata
else num_splits
),
).remote(
cls._get_deploy_split_func(),
axis,
Expand Down
8 changes: 5 additions & 3 deletions modin/core/io/column_stores/column_store_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,11 @@ def build_index(cls, partition_ids):
row_lengths = [index_len] + [0 for _ in range(num_partitions - 1)]
else:
row_lengths = [
index_chunksize
if (i + 1) * index_chunksize < index_len
else max(0, index_len - (index_chunksize * i))
(
index_chunksize
if (i + 1) * index_chunksize < index_len
else max(0, index_len - (index_chunksize * i))
)
for i in range(num_partitions)
]
return index, row_lengths
Expand Down
6 changes: 3 additions & 3 deletions modin/core/io/column_stores/parquet_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,9 +848,9 @@ def func(df, **kw): # pragma: no cover
"""
compression = kwargs["compression"]
partition_idx = kw["partition_idx"]
kwargs[
"path"
] = f"{output_path}/part-{partition_idx:04d}.{compression}.parquet"
kwargs["path"] = (
f"{output_path}/part-{partition_idx:04d}.{compression}.parquet"
)
df.to_parquet(**kwargs)
return pandas.DataFrame()

Expand Down
14 changes: 9 additions & 5 deletions modin/core/io/text/text_file_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,11 +582,15 @@ def _define_metadata(
# if num_splits == 4, len(column_names) == 80 and column_chunksize == 32,
# column_widths will be [32, 32, 16, 0]
column_widths = [
column_chunksize
if len(column_names) > (column_chunksize * (i + 1))
else 0
if len(column_names) < (column_chunksize * i)
else len(column_names) - (column_chunksize * i)
(
column_chunksize
if len(column_names) > (column_chunksize * (i + 1))
else (
0
if len(column_names) < (column_chunksize * i)
else len(column_names) - (column_chunksize * i)
)
)
for i in range(num_splits)
]

Expand Down