modin-project · YarShev · Jan 26, 2024 · Jan 26, 2024 · Jan 26, 2024 · Jan 26, 2024
@@ -32,7 +32,7 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - uses: ./.github/actions/python-only
-      - run: pip install black isort>=5.12
+      - run: pip install black>=24.1.0 isort>=5.12
       # NOTE: keep the black command here in sync with the pre-commit hook in
       # /contributing/pre-commit
       - run: black --check --diff modin/ asv_bench/benchmarks scripts/doc_checker.py

@@ -54,7 +54,7 @@ dependencies:
   - pytest-xdist>=3.2.0
 
   # code linters
-  - black>=23.1.0
+  - black>=24.1.0
   - flake8>=6.0.0
   - flake8-no-implicit-concat>=0.3.4
   - flake8-print>=5.0.0

@@ -153,13 +153,15 @@ class ExactStr(str):
             for key_value in value.split(",")
             for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
         },
-        normalize=lambda value: value
-        if isinstance(value, dict)
-        else {
-            key: int(val) if val.isdigit() else val
-            for key_value in str(value).split(",")
-            for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
-        },
+        normalize=lambda value: (
+            value
+            if isinstance(value, dict)
+            else {
+                key: int(val) if val.isdigit() else val
+                for key_value in str(value).split(",")
+                for key, val in [[v.strip() for v in key_value.split("=", maxsplit=1)]]
+            }
+        ),
         verify=lambda value: isinstance(value, dict)
         or (
             isinstance(value, str)

@@ -118,9 +118,9 @@ def test_hdk_envvar():
         # This test is intended to check pyhdk internals. If pyhdk is not available, skip the version check test.
         pass
 
-    os.environ[
-        cfg.HdkLaunchParameters.varname
-    ] = "enable_union=4,enable_thrift_logs=5,enable_lazy_dict_materialization=6"
+    os.environ[cfg.HdkLaunchParameters.varname] = (
+        "enable_union=4,enable_thrift_logs=5,enable_lazy_dict_materialization=6"
+    )
     del cfg.HdkLaunchParameters._value
     params = cfg.HdkLaunchParameters.get()
     assert params["enable_union"] == 4

@@ -1382,18 +1382,22 @@ def from_labels(self) -> "PandasDataframe":
         new_row_labels = pandas.RangeIndex(len(self.index))
         if self.index.nlevels > 1:
             level_names = [
-                self.index.names[i]
-                if self.index.names[i] is not None
-                else "level_{}".format(i)
+                (
+                    self.index.names[i]
+                    if self.index.names[i] is not None
+                    else "level_{}".format(i)
+                )
                 for i in range(self.index.nlevels)
             ]
         else:
             level_names = [
-                self.index.names[0]
-                if self.index.names[0] is not None
-                else "index"
-                if "index" not in self.columns
-                else "level_{}".format(0)
+                (
+                    self.index.names[0]
+                    if self.index.names[0] is not None
+                    else (
+                        "index" if "index" not in self.columns else "level_{}".format(0)
+                    )
+                )
             ]
         names = tuple(level_names) if len(level_names) > 1 else level_names[0]
         new_dtypes = self.index.to_frame(name=names).dtypes
@@ -2924,9 +2928,11 @@ def apply_select_indices(
             # `axis` given may have changed, we currently just recompute it.
             # TODO Determine lengths from current lengths if `keep_remaining=False`
             lengths_objs = {
-                axis: [len(apply_indices)]
-                if not keep_remaining
-                else [self.row_lengths, self.column_widths][axis],
+                axis: (
+                    [len(apply_indices)]
+                    if not keep_remaining
+                    else [self.row_lengths, self.column_widths][axis]
+                ),
                 axis ^ 1: [self.row_lengths, self.column_widths][axis ^ 1],
             }
             return self.__constructor__(
@@ -3891,9 +3897,11 @@ def join_cols(df, *cols):
                 # Getting futures for columns of non-empty partitions
                 cols = [
                     part.apply(
-                        lambda df: None
-                        if df.attrs.get(skip_on_aligning_flag, False)
-                        else df.columns
+                        lambda df: (
+                            None
+                            if df.attrs.get(skip_on_aligning_flag, False)
+                            else df.columns
+                        )
                     )._data
                     for part in result._partitions.flatten()
                 ]

@@ -406,11 +406,13 @@ def get_group(grp, key, df):
         if len(non_na_rows) == 1:
             groups = [
                 # taking an empty slice for an index's metadata
-                pandas.DataFrame(index=df.index[:0], columns=df.columns).astype(
-                    df.dtypes
+                (
+                    pandas.DataFrame(index=df.index[:0], columns=df.columns).astype(
+                        df.dtypes
+                    )
+                    if key != groupby_codes[0]
+                    else non_na_rows
                 )
-                if key != groupby_codes[0]
-                else non_na_rows
                 for key in group_keys
             ]
         else:

@@ -498,9 +498,11 @@ def _merge_dtypes(
                         # otherwise, it may indicate missing columns that this 'val' has no info about,
                         # meaning that we shouldn't try computing a new dtype for this column,
                         # so marking it as 'unknown'
-                        i: np.dtype(float)
-                        if val._know_all_names and val._remaining_dtype is None
-                        else "unknown"
+                        i: (
+                            np.dtype(float)
+                            if val._know_all_names and val._remaining_dtype is None
+                            else "unknown"
+                        )
                     },
                     inplace=True,
                 )
@@ -732,8 +734,8 @@ def lazy_get(self, ids: list, numeric_index: bool = False) -> "ModinDtypes":
         elif callable(self._value):
             new_self = self.copy()
             old_value = new_self._value
-            new_self._value = (
-                lambda: old_value().iloc[ids] if numeric_index else old_value()[ids]
+            new_self._value = lambda: (
+                old_value().iloc[ids] if numeric_index else old_value()[ids]
             )
             return new_self
         ErrorMessage.catch_bugs_and_request_email(

@@ -383,13 +383,15 @@ def get_partitions(index):
 
         new_partitions = np.array(
             [
-                partitions_for_apply[i]
-                if i not in left_indices
-                else cls._apply_func_to_list_of_partitions_broadcast(
-                    apply_func,
-                    partitions_for_apply[i],
-                    internal_indices=left_indices[i],
-                    **get_partitions(i),
+                (
+                    partitions_for_apply[i]
+                    if i not in left_indices
+                    else cls._apply_func_to_list_of_partitions_broadcast(
+                        apply_func,
+                        partitions_for_apply[i],
+                        internal_indices=left_indices[i],
+                        **get_partitions(i),
+                    )
                 )
                 for i in range(len(partitions_for_apply))
                 if i in left_indices or keep_remaining
@@ -946,15 +948,19 @@ def update_bar(f):
             return parts
         else:
             row_lengths = [
-                row_chunksize
-                if i + row_chunksize < len(df)
-                else len(df) % row_chunksize or row_chunksize
+                (
+                    row_chunksize
+                    if i + row_chunksize < len(df)
+                    else len(df) % row_chunksize or row_chunksize
+                )
                 for i in range(0, len(df), row_chunksize)
             ]
             col_widths = [
-                col_chunksize
-                if i + col_chunksize < len(df.columns)
-                else len(df.columns) % col_chunksize or col_chunksize
+                (
+                    col_chunksize
+                    if i + col_chunksize < len(df.columns)
+                    else len(df.columns) % col_chunksize or col_chunksize
+                )
                 for i in range(0, len(df.columns), col_chunksize)
             ]
             return parts, row_lengths, col_widths
@@ -1206,14 +1212,18 @@ def apply_func_to_select_indices(
             else:
                 result = np.array(
                     [
-                        partitions_for_apply[i]
-                        if i not in indices
-                        else cls._apply_func_to_list_of_partitions(
-                            func,
-                            partitions_for_apply[i],
-                            func_dict={
-                                idx: dict_func[idx] for idx in indices[i] if idx >= 0
-                            },
+                        (
+                            partitions_for_apply[i]
+                            if i not in indices
+                            else cls._apply_func_to_list_of_partitions(
+                                func,
+                                partitions_for_apply[i],
+                                func_dict={
+                                    idx: dict_func[idx]
+                                    for idx in indices[i]
+                                    if idx >= 0
+                                },
+                            )
                         )
                         for i in range(len(partitions_for_apply))
                     ]
@@ -1239,10 +1249,14 @@ def apply_func_to_select_indices(
                 # remaining (non-updated) blocks in their original position.
                 result = np.array(
                     [
-                        partitions_for_apply[i]
-                        if i not in indices
-                        else cls._apply_func_to_list_of_partitions(
-                            func, partitions_for_apply[i], internal_indices=indices[i]
+                        (
+                            partitions_for_apply[i]
+                            if i not in indices
+                            else cls._apply_func_to_list_of_partitions(
+                                func,
+                                partitions_for_apply[i],
+                                internal_indices=indices[i],
+                            )
                         )
                         for i in range(len(partitions_for_apply))
                     ]
@@ -1331,12 +1345,14 @@ def apply_func_to_select_indices_along_full_axis(
             else:
                 result = np.array(
                     [
-                        partitions_for_remaining[i]
-                        if i not in indices
-                        else cls._apply_func_to_list_of_partitions(
-                            preprocessed_func,
-                            partitions_for_apply[i],
-                            func_dict={idx: dict_func[idx] for idx in indices[i]},
+                        (
+                            partitions_for_remaining[i]
+                            if i not in indices
+                            else cls._apply_func_to_list_of_partitions(
+                                preprocessed_func,
+                                partitions_for_apply[i],
+                                func_dict={idx: dict_func[idx] for idx in indices[i]},
+                            )
                         )
                         for i in range(len(partitions_for_apply))
                     ]
@@ -1354,10 +1370,12 @@ def apply_func_to_select_indices_along_full_axis(
                 # See notes in `apply_func_to_select_indices`
                 result = np.array(
                     [
-                        partitions_for_remaining[i]
-                        if i not in indices
-                        else partitions_for_apply[i].apply(
-                            preprocessed_func, internal_indices=indices[i]
+                        (
+                            partitions_for_remaining[i]
+                            if i not in indices
+                            else partitions_for_apply[i].apply(
+                                preprocessed_func, internal_indices=indices[i]
+                            )
                         )
                         for i in range(len(partitions_for_remaining))
                     ]

@@ -94,9 +94,11 @@ def deploy_splitting_func(
                 *partitions,
             ),
             f_kwargs={"extract_metadata": extract_metadata},
-            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
-            if extract_metadata
-            else num_splits,
+            num_returns=(
+                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
+                if extract_metadata
+                else num_splits
+            ),
             pure=False,
         )
 

@@ -110,9 +110,11 @@ def deploy_splitting_func(
         extract_metadata=False,
     ):
         return _deploy_ray_func.options(
-            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
-            if extract_metadata
-            else num_splits,
+            num_returns=(
+                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
+                if extract_metadata
+                else num_splits
+            ),
         ).remote(
             cls._get_deploy_split_func(),
             *f_args,

@@ -112,9 +112,11 @@ def deploy_splitting_func(
         extract_metadata=False,
     ):
         return _deploy_unidist_func.options(
-            num_returns=num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
-            if extract_metadata
-            else num_splits,
+            num_returns=(
+                num_splits * (1 + cls._PARTITIONS_METADATA_LEN)
+                if extract_metadata
+                else num_splits
+            ),
         ).remote(
             cls._get_deploy_split_func(),
             axis,

@@ -135,9 +135,11 @@ def build_index(cls, partition_ids):
             row_lengths = [index_len] + [0 for _ in range(num_partitions - 1)]
         else:
             row_lengths = [
-                index_chunksize
-                if (i + 1) * index_chunksize < index_len
-                else max(0, index_len - (index_chunksize * i))
+                (
+                    index_chunksize
+                    if (i + 1) * index_chunksize < index_len
+                    else max(0, index_len - (index_chunksize * i))
+                )
                 for i in range(num_partitions)
             ]
         return index, row_lengths

@@ -848,9 +848,9 @@ def func(df, **kw):  # pragma: no cover
             """
             compression = kwargs["compression"]
             partition_idx = kw["partition_idx"]
-            kwargs[
-                "path"
-            ] = f"{output_path}/part-{partition_idx:04d}.{compression}.parquet"
+            kwargs["path"] = (
+                f"{output_path}/part-{partition_idx:04d}.{compression}.parquet"
+            )
             df.to_parquet(**kwargs)
             return pandas.DataFrame()
 

@@ -582,11 +582,15 @@ def _define_metadata(
             # if num_splits == 4, len(column_names) == 80 and column_chunksize == 32,
             # column_widths will be [32, 32, 16, 0]
             column_widths = [
-                column_chunksize
-                if len(column_names) > (column_chunksize * (i + 1))
-                else 0
-                if len(column_names) < (column_chunksize * i)
-                else len(column_names) - (column_chunksize * i)
+                (
+                    column_chunksize
+                    if len(column_names) > (column_chunksize * (i + 1))
+                    else (
+                        0
+                        if len(column_names) < (column_chunksize * i)
+                        else len(column_names) - (column_chunksize * i)
+                    )
+                )
                 for i in range(num_splits)
             ]