modin-project · YarShev · Feb 23, 2024 · Feb 22, 2024
@@ -12,11 +12,11 @@ Experimental API Reference
 .. autofunction:: read_sql
 .. autofunction:: read_csv_glob
 .. autofunction:: read_custom_text
-.. autofunction:: read_pickle_distributed
+.. autofunction:: read_pickle_glob
 .. autofunction:: read_parquet_glob
 .. autofunction:: read_json_glob
 .. autofunction:: read_xml_glob
-.. automethod:: modin.pandas.DataFrame.modin::to_pickle_distributed
+.. automethod:: modin.pandas.DataFrame.modin::to_pickle_glob
 .. automethod:: modin.pandas.DataFrame.modin::to_parquet_glob
 .. automethod:: modin.pandas.DataFrame.modin::to_json_glob
 .. automethod:: modin.pandas.DataFrame.modin::to_xml_glob
@@ -433,7 +433,7 @@ default to pandas.
 | ``to_period``              | `to_period`_              | D                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``to_pickle``              | `to_pickle`_              | D                      | Experimental implementation:                       |
-|                            |                           |                        | DataFrame.modin.to_pickle_distributed              |
+|                            |                           |                        | DataFrame.modin.to_pickle_glob                     |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+
 | ``to_records``             | `to_records`_             | D                      |                                                    |
 +----------------------------+---------------------------+------------------------+----------------------------------------------------+

@@ -68,7 +68,7 @@ default to pandas.
 | `read_sas`_       | D                               |                                                        |
 +-------------------+---------------------------------+--------------------------------------------------------+
 | `read_pickle`_    | D                               | Experimental implementation:                           |
-|                   |                                 | read_pickle_distributed                                |
+|                   |                                 | read_pickle_glob                                       |
 +-------------------+---------------------------------+--------------------------------------------------------+
 | `read_sql`_       | Y                               |                                                        |
 +-------------------+---------------------------------+--------------------------------------------------------+

@@ -41,11 +41,11 @@ Modin also supports these experimental APIs on top of pandas that are under acti
 - :py:func:`~modin.experimental.pandas.read_csv_glob` -- read multiple files in a directory
 - :py:func:`~modin.experimental.pandas.read_sql` -- add optional parameters for the database connection
 - :py:func:`~modin.experimental.pandas.read_custom_text` -- read custom text data from file
-- :py:func:`~modin.experimental.pandas.read_pickle_distributed`  -- read multiple pickle files in a directory
+- :py:func:`~modin.experimental.pandas.read_pickle_glob`  -- read multiple pickle files in a directory
 - :py:func:`~modin.experimental.pandas.read_parquet_glob`  -- read multiple parquet files in a directory
 - :py:func:`~modin.experimental.pandas.read_json_glob`  -- read multiple json files in a directory
 - :py:func:`~modin.experimental.pandas.read_xml_glob`  -- read multiple xml files in a directory
-- :py:meth:`~modin.pandas.DataFrame.modin.to_pickle_distributed` -- write to multiple pickle files in a directory
+- :py:meth:`~modin.pandas.DataFrame.modin.to_pickle_glob` -- write to multiple pickle files in a directory
 - :py:meth:`~modin.pandas.DataFrame.modin.to_parquet_glob` -- write to multiple parquet files in a directory
 - :py:meth:`~modin.pandas.DataFrame.modin.to_json_glob` -- write to multiple json files in a directory
 - :py:meth:`~modin.pandas.DataFrame.modin.to_xml_glob` -- write to multiple xml files in a directory

@@ -111,10 +111,10 @@ def __make_write(*classes, build_args=build_args):
         ExperimentalGlobDispatcher,
         build_args={**build_args, "base_write": BaseIO.to_xml},
     )
-    read_pickle_distributed = __make_read(
+    read_pickle_glob = __make_read(
         ExperimentalPandasPickleParser, ExperimentalGlobDispatcher
     )
-    to_pickle_distributed = __make_write(
+    to_pickle_glob = __make_write(
         ExperimentalGlobDispatcher,
         build_args={**build_args, "base_write": BaseIO.to_pickle},
     )

@@ -197,9 +197,9 @@ def read_csv_glob(cls, **kwargs):
         return cls.get_factory()._read_csv_glob(**kwargs)
 
     @classmethod
-    @_inherit_docstrings(factories.PandasOnRayFactory._read_pickle_distributed)
-    def read_pickle_distributed(cls, **kwargs):
-        return cls.get_factory()._read_pickle_distributed(**kwargs)
+    @_inherit_docstrings(factories.PandasOnRayFactory._read_pickle_glob)
+    def read_pickle_glob(cls, **kwargs):
+        return cls.get_factory()._read_pickle_glob(**kwargs)
 
     @classmethod
     @_inherit_docstrings(factories.BaseFactory._read_json)
@@ -292,9 +292,9 @@ def to_pickle(cls, *args, **kwargs):
         return cls.get_factory()._to_pickle(*args, **kwargs)
 
     @classmethod
-    @_inherit_docstrings(factories.PandasOnRayFactory._to_pickle_distributed)
-    def to_pickle_distributed(cls, *args, **kwargs):
-        return cls.get_factory()._to_pickle_distributed(*args, **kwargs)
+    @_inherit_docstrings(factories.PandasOnRayFactory._to_pickle_glob)
+    def to_pickle_glob(cls, *args, **kwargs):
+        return cls.get_factory()._to_pickle_glob(*args, **kwargs)
 
     @classmethod
     @_inherit_docstrings(factories.PandasOnRayFactory._read_parquet_glob)

@@ -476,13 +476,13 @@ def _read_csv_glob(cls, **kwargs):
         source="Pickle files",
         params=_doc_io_method_kwargs_params,
     )
-    def _read_pickle_distributed(cls, **kwargs):
+    def _read_pickle_glob(cls, **kwargs):
         current_execution = get_current_execution()
         if current_execution not in supported_executions:
             raise NotImplementedError(
-                f"`_read_pickle_distributed()` is not implemented for {current_execution} execution."
+                f"`_read_pickle_glob()` is not implemented for {current_execution} execution."
             )
-        return cls.io_cls.read_pickle_distributed(**kwargs)
+        return cls.io_cls.read_pickle_glob(**kwargs)
 
     @classmethod
     @doc(
@@ -526,7 +526,7 @@ def _read_custom_text(cls, **kwargs):
         return cls.io_cls.read_custom_text(**kwargs)
 
     @classmethod
-    def _to_pickle_distributed(cls, *args, **kwargs):
+    def _to_pickle_glob(cls, *args, **kwargs):
         """
         Distributed pickle query compiler object.
 
@@ -540,9 +540,9 @@ def _to_pickle_distributed(cls, *args, **kwargs):
         current_execution = get_current_execution()
         if current_execution not in supported_executions:
             raise NotImplementedError(
-                f"`_to_pickle_distributed()` is not implemented for {current_execution} execution."
+                f"`_to_pickle_glob()` is not implemented for {current_execution} execution."
             )
-        return cls.io_cls.to_pickle_distributed(*args, **kwargs)
+        return cls.io_cls.to_pickle_glob(*args, **kwargs)
 
     @classmethod
     @doc(

@@ -113,10 +113,10 @@ def __make_write(*classes, build_args=build_args):
         ExperimentalGlobDispatcher,
         build_args={**build_args, "base_write": RayIO.to_xml},
     )
-    read_pickle_distributed = __make_read(
+    read_pickle_glob = __make_read(
         ExperimentalPandasPickleParser, ExperimentalGlobDispatcher
     )
-    to_pickle_distributed = __make_write(
+    to_pickle_glob = __make_write(
         ExperimentalGlobDispatcher,
         build_args={**build_args, "base_write": RayIO.to_pickle},
     )

@@ -113,10 +113,10 @@ def __make_write(*classes, build_args=build_args):
         ExperimentalGlobDispatcher,
         build_args={**build_args, "base_write": UnidistIO.to_xml},
     )
-    read_pickle_distributed = __make_read(
+    read_pickle_glob = __make_read(
         ExperimentalPandasPickleParser, ExperimentalGlobDispatcher
     )
-    to_pickle_distributed = __make_write(
+    to_pickle_glob = __make_write(
         ExperimentalGlobDispatcher,
         build_args={**build_args, "base_write": UnidistIO.to_pickle},
     )

@@ -33,13 +33,21 @@
 """
 
 from modin.pandas import *  # noqa F401, F403
+from modin.utils import func_from_deprecated_location
 
 from .io import (  # noqa F401
     read_csv_glob,
     read_custom_text,
     read_json_glob,
     read_parquet_glob,
-    read_pickle_distributed,
+    read_pickle_glob,
     read_sql,
     read_xml_glob,
 )
+
+read_pickle_distributed = func_from_deprecated_location(
+    "read_pickle_glob",
+    "modin.experimental.pandas.io",
+    "`modin.experimental.pandas.read_pickle_distributed` is deprecated and will be removed in a future version. "
+    + "Please use `modin.experimental.pandas.to_pickle_glob` instead.",
+)
@@ -303,7 +303,7 @@ def _read(**kwargs) -> DataFrame:
 
 
 @expanduser_path_arg("filepath_or_buffer")
-def read_pickle_distributed(
+def read_pickle_glob(
     filepath_or_buffer,
     compression: Optional[str] = "infer",
     storage_options: StorageOptions = None,
@@ -313,7 +313,7 @@ def read_pickle_distributed(
 
     This experimental feature provides parallel reading from multiple pickle files which are
     defined by glob pattern. The files must contain parts of one dataframe, which can be
-    obtained, for example, by `DataFrame.modin.to_pickle_distributed` function.
+    obtained, for example, by `DataFrame.modin.to_pickle_glob` function.
 
     Parameters
     ----------
@@ -344,11 +344,11 @@ def read_pickle_distributed(
 
     from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher
 
-    return DataFrame(query_compiler=FactoryDispatcher.read_pickle_distributed(**kwargs))
+    return DataFrame(query_compiler=FactoryDispatcher.read_pickle_glob(**kwargs))
 
 
 @expanduser_path_arg("filepath_or_buffer")
-def to_pickle_distributed(
+def to_pickle_glob(
     self,
     filepath_or_buffer,
     compression: CompressionOptions = "infer",
@@ -392,7 +392,7 @@ def to_pickle_distributed(
 
     if isinstance(self, DataFrame):
         obj = self._query_compiler
-    FactoryDispatcher.to_pickle_distributed(
+    FactoryDispatcher.to_pickle_glob(
         obj,
         filepath_or_buffer=filepath_or_buffer,
         compression=compression,

@@ -249,7 +249,11 @@ def _pandas_read_csv_glob(path, storage_options):
 @pytest.mark.parametrize(
     "filename", ["test_default_to_pickle.pkl", "test_to_pickle*.pkl"]
 )
-def test_distributed_pickling(tmp_path, filename, compression, pathlike):
+@pytest.mark.parametrize("read_func", ["read_pickle_glob", "read_pickle_distributed"])
+@pytest.mark.parametrize("to_func", ["to_pickle_glob", "to_pickle_distributed"])
+def test_distributed_pickling(
+    tmp_path, filename, compression, pathlike, read_func, to_func
+):
     data = test_data["int_data"]
     df = pd.DataFrame(data)
 
@@ -264,10 +268,8 @@ def test_distributed_pickling(tmp_path, filename, compression, pathlike):
         if filename_param == "test_default_to_pickle.pkl"
         else contextlib.nullcontext()
     ):
-        df.modin.to_pickle_distributed(
-            str(tmp_path / filename), compression=compression
-        )
-        pickled_df = pd.read_pickle_distributed(
+        getattr(df.modin, to_func)(str(tmp_path / filename), compression=compression)
+        pickled_df = getattr(pd, read_func)(
             str(tmp_path / filename), compression=compression
         )
     df_equals(pickled_df, df)

@@ -22,6 +22,7 @@
 """
 
 import pickle
+import warnings
 
 import pandas
 from pandas._typing import CompressionOptions, StorageOptions
@@ -209,7 +210,7 @@ class ExperimentalFunctions:
     def __init__(self, data):
         self._data = data
 
-    def to_pickle_distributed(
+    def to_pickle_glob(
         self,
         filepath_or_buffer,
         compression: CompressionOptions = "infer",
@@ -248,16 +249,32 @@ def to_pickle_distributed(
             this argument with a non-fsspec URL. See the fsspec and backend storage
             implementation docs for the set of allowed keys and values.
         """
-        from modin.experimental.pandas.io import to_pickle_distributed
+        from modin.experimental.pandas.io import to_pickle_glob
 
-        to_pickle_distributed(
+        to_pickle_glob(
             self._data,
             filepath_or_buffer=filepath_or_buffer,
             compression=compression,
             protocol=protocol,
             storage_options=storage_options,
         )
 
+    def to_pickle_distributed(
+        self,
+        filepath_or_buffer,
+        compression: CompressionOptions = "infer",
+        protocol: int = pickle.HIGHEST_PROTOCOL,
+        storage_options: StorageOptions = None,
+    ) -> None:  # noqa
+        warnings.warn(
+            "`DataFrame.modin.to_pickle_distributed` is deprecated and will be removed in a future version. "
+            + "Please use `DataFrame.modin.to_pickle_glob` instead.",
+            category=FutureWarning,
+        )
+        return self.to_pickle_glob(
+            filepath_or_buffer, compression, protocol, storage_options
+        )
+
     def to_parquet_glob(
         self,
         path,