modin-project · YarShev · Jan 10, 2024 · Jan 10, 2024 · Jan 10, 2024
@@ -66,7 +66,6 @@ jobs:
             asv_bench/benchmarks/__init__.py asv_bench/benchmarks/io/__init__.py \
             asv_bench/benchmarks/scalability/__init__.py \
             modin/core/io \
-            modin/experimental/core/execution/ray/implementations/pyarrow_on_ray \
             modin/pandas/series.py \
             modin/core/execution/python \
             modin/pandas/dataframe.py \
@@ -90,7 +89,6 @@ jobs:
           python scripts/doc_checker.py modin/experimental/pandas/io.py \
             modin/experimental/pandas/__init__.py
       - run: python scripts/doc_checker.py modin/core/storage_formats/base
-      - run: python scripts/doc_checker.py modin/experimental/core/storage_formats/pyarrow
       - run: python scripts/doc_checker.py modin/core/storage_formats/pandas
       - run: |
           python scripts/doc_checker.py \

@@ -683,36 +683,6 @@ jobs:
       - run: python -m pytest modin/pandas/test/test_io.py --verbose
       - uses: ./.github/actions/upload-coverage
 
-  test-pyarrow:
-    needs: [lint-flake8, lint-black-isort]
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash -l {0}
-    strategy:
-      matrix:
-        python-version: ["3.9"]
-    env:
-      MODIN_STORAGE_FORMAT: pyarrow
-      MODIN_EXPERIMENTAL: "True"
-    name: test (pyarrow, python ${{matrix.python-version}})
-    services:
-      moto:
-        image: motoserver/moto
-        ports:
-          - 5000:5000
-        env:
-          AWS_ACCESS_KEY_ID: foobar_key
-          AWS_SECRET_ACCESS_KEY: foobar_secret
-    steps:
-      - uses: actions/checkout@v3
-      - uses: ./.github/actions/mamba-env
-        with:
-          environment-file: environment-dev.yml
-          python-version: ${{matrix.python-version}}
-      - run: sudo apt update && sudo apt install -y libhdf5-dev
-      - run: python -m pytest modin/pandas/test/test_io.py::TestCsv --verbose
-
   test-spreadsheet:
     needs: [lint-flake8, lint-black-isort]
     runs-on: ubuntu-latest

@@ -47,4 +47,4 @@
 assert ASV_USE_IMPL in ("modin", "pandas")
 assert ASV_DATASET_SIZE in ("big", "small")
 assert ASV_USE_ENGINE in ("ray", "dask", "python", "native", "unidist")
-assert ASV_USE_STORAGE_FORMAT in ("pandas", "hdk", "pyarrow")
+assert ASV_USE_STORAGE_FORMAT in ("pandas", "hdk")
@@ -56,7 +56,7 @@ For the simplicity the other execution systems - Dask and MPI are omitted and on
   on a selected storage format and mapping or compiling the Dataframe Algebra DAG to and actual
   execution sequence.
 * Storage formats module is responsible for mapping the abstract operation to an actual executor call, e.g. pandas,
-  PyArrow, custom format.
+  HDK, custom format.
 * Orchestration subsystem is responsible for spawning and controlling the actual execution environment for the
   selected execution. It spawns the actual nodes, fires up the execution environment, e.g. Ray, monitors the state
   of executors and provides telemetry
@@ -228,10 +228,6 @@ documentation page on :doc:`contributing </development/contributing>`.
     - Uses HDK as an engine.
     - The storage format is `hdk` and the in-memory partition type is a pyarrow Table. When defaulting to pandas, the pandas DataFrame is used.
     - For more information on the execution path, see the :doc:`HDK on Native </flow/modin/experimental/core/execution/native/implementations/hdk_on_native/index>` page.
-- :doc:`Pyarrow on Ray </development/using_pyarrow_on_ray>` (experimental)
-    - Uses the Ray_ execution framework.
-    - The storage format is `pyarrow` and the in-memory partition type is a pyarrow Table.
-    - For more information on the execution path, see the :doc:`Pyarrow on Ray </flow/modin/experimental/core/execution/ray/implementations/pyarrow_on_ray>` page.
 - cuDF on Ray (experimental)
     - Uses the Ray_ execution framework.
     - The storage format is `cudf` and the in-memory partition type is a cuDF DataFrame.
@@ -252,7 +248,7 @@ following figure illustrates this concept.
    :align: center
 
 Currently, the main in-memory format of each partition is a `pandas DataFrame`_ (:doc:`pandas storage format </flow/modin/core/storage_formats/pandas/index>`).
-:doc:`HDK </flow/modin/experimental/core/storage_formats/hdk/index>`, :doc:`PyArrow </flow/modin/experimental/core/storage_formats/pyarrow/index>`
+:doc:`HDK </flow/modin/experimental/core/storage_formats/hdk/index>`
 and cuDF are also supported as experimental in-memory formats in Modin.
 
 
@@ -333,8 +329,7 @@ details. The documentation covers most modules, with more docs being added every
    │   │   │   │       └───implementations
    │   │   │   │           └─── :doc:`hdk_on_native </flow/modin/experimental/core/execution/native/implementations/hdk_on_native/index>`
    │   │   │   ├─── :doc:`storage_formats </flow/modin/experimental/core/storage_formats/index>`
-   |   │   │   |   ├─── :doc:`hdk </flow/modin/experimental/core/storage_formats/hdk/index>`
-   │   │   │   |   └─── :doc:`pyarrow </flow/modin/experimental/core/storage_formats/pyarrow/index>`
+   |   │   │   |   └───:doc:`hdk </flow/modin/experimental/core/storage_formats/hdk/index>`
    |   |   |   └─── :doc:`io </flow/modin/experimental/core/io/index>`
    │   │   ├─── :doc:`pandas </flow/modin/experimental/pandas>`
    │   │   ├─── :doc:`sklearn </flow/modin/experimental/sklearn>`

@@ -12,7 +12,6 @@ Development
     using_pandas_on_python
     using_pandas_on_mpi
     using_hdk
-    using_pyarrow_on_ray
 
 .. meta::
     :description lang=en:

@@ -8,9 +8,8 @@ of objects that are stored in the partitions of the selected Core Modin Datafram
 The base storage format in Modin is pandas. In that format, Modin Dataframe operates with
 partitions that hold ``pandas.DataFrame`` objects. Pandas is the most natural storage format
 since high-level DataFrame objects mirror its API, however, Modin's storage formats are not
-limited to the objects that conform to pandas API. There are formats that are able to store
-``pyarrow.Table`` (:doc:`pyarrow storage format </flow/modin/experimental/core/storage_formats/pyarrow/index>`) or even instances of
-SQL-like databases (:doc:`HDK storage format </flow/modin/experimental/core/storage_formats/hdk/index>`)
+limited to the objects that conform to pandas API. There is format that are able to store
+even instances of SQL-like databases (:doc:`HDK storage format </flow/modin/experimental/core/storage_formats/hdk/index>`)
 inside Modin Dataframe's partitions.
 
 The storage format + execution engine (Ray, Dask, etc.) form the execution backend. 

@@ -7,11 +7,9 @@ Experimental storage formats
 and provides a limited set of functionality:
 
 * :doc:`hdk <hdk/index>`
-* :doc:`pyarrow <pyarrow/index>`
 
 
 .. toctree::
     :hidden:
 
     hdk/index
-    pyarrow/index
@@ -266,7 +266,7 @@ class StorageFormat(EnvironmentVariable, type=str):
 
     varname = "MODIN_STORAGE_FORMAT"
     default = "Pandas"
-    choices = ("Pandas", "Hdk", "Pyarrow", "Cudf")
+    choices = ("Pandas", "Hdk", "Cudf")
 
 
 class IsExperimental(EnvironmentVariable, type=bool):

@@ -570,21 +570,6 @@ def prepare(cls):
 # that have little coverage of implemented functionality or are not stable enough.
 
 
-@doc(_doc_factory_class, execution_name="experimental PyarrowOnRay")
-class ExperimentalPyarrowOnRayFactory(BaseFactory):  # pragma: no cover
-    @classmethod
-    @doc(_doc_factory_prepare_method, io_module_name="experimental ``PyarrowOnRayIO``")
-    def prepare(cls):
-        from modin.experimental.core.execution.ray.implementations.pyarrow_on_ray.io import (
-            PyarrowOnRayIO,
-        )
-
-        if not IsExperimental.get():
-            raise ValueError("'PyarrowOnRay' only works in experimental mode.")
-
-        cls.io_cls = PyarrowOnRayIO
-
-
 @doc(_doc_factory_class, execution_name="experimental HdkOnNative")
 class ExperimentalHdkOnNativeFactory(BaseFactory):
     @classmethod