Move use_pyarrow args to top-level functions

pola-rs · Jul 25, 2021 · 2003042 · 2003042
1 parent 5003bd1
commit 2003042
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 30 deletions.
diff --git a/py-polars/polars/eager/frame.py b/py-polars/polars/eager/frame.py
@@ -23,7 +23,6 @@
 
 import numpy as np
 import pyarrow as pa
-import pyarrow.feather
 import pyarrow.parquet
 
 import polars as pl
@@ -545,7 +544,6 @@ def read_csv(
     def read_parquet(
         file: Union[str, BinaryIO],
         stop_after_n_rows: Optional[int] = None,
-        use_pyarrow: bool = False,
     ) -> "DataFrame":
         """
         Read into a DataFrame from a parquet file.
@@ -556,40 +554,25 @@ def read_parquet(
             Path to a file or a file like object. Any valid filepath can be used.
         stop_after_n_rows
             Only read specified number of rows of the dataset. After `n` stops reading.
-        use_pyarrow
-            Use pyarrow instead of the rust native parquet reader. The pyarrow reader is more stable.
         """
-        if use_pyarrow:
-            if stop_after_n_rows:
-                raise ValueError(
-                    "stop_after_n_rows can not be used with 'use_pyarrow==True'."
-                )
-            tbl = pa.parquet.read_table(file)
-            return DataFrame.from_arrow(tbl)
         self = DataFrame.__new__(DataFrame)
         self._df = PyDataFrame.read_parquet(file, stop_after_n_rows)
         return self
 
     @staticmethod
-    def read_ipc(file: Union[str, BinaryIO], use_pyarrow: bool = True) -> "DataFrame":
+    def read_ipc(file: Union[str, BinaryIO]) -> "DataFrame":
         """
         Read into a DataFrame from Arrow IPC stream format. This is also called the feather format.
 
         Parameters
         ----------
         file
             Path to a file or a file like object.
-        use_pyarrow
-            Use pyarrow or rust arrow backend.
 
         Returns
         -------
         DataFrame
         """
-        if use_pyarrow:
-            tbl = pa.feather.read_table(file)
-            return DataFrame.from_arrow(tbl)
-
         self = DataFrame.__new__(DataFrame)
         self._df = PyDataFrame.read_ipc(file)
         return self

diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py
@@ -19,6 +19,7 @@
 
 import pyarrow as pa
 import pyarrow.csv
+import pyarrow.feather
 import pyarrow.parquet
 
 import polars as pl
@@ -400,7 +401,7 @@ def read_ipc(
         Path to a file or a file like object.
         If ``fsspec`` is installed, it will be used to open remote files
     use_pyarrow
-        Use pyarrow or rust arrow backend.
+        Use pyarrow or the native rust reader.
     storage_options
         Extra options that make sense for ``fsspec.open()`` or a particular storage connection, e.g. host, port, username, password, etc.
 
@@ -410,11 +411,15 @@ def read_ipc(
     """
     storage_options = storage_options or {}
     with _prepare_file_arg(file, **storage_options) as data:
-        return pl.DataFrame.read_ipc(data, use_pyarrow)
+        if use_pyarrow:
+            tbl = pa.feather.read_table(data)
+            return pl.DataFrame.from_arrow(tbl)
+        return pl.DataFrame.read_ipc(data)
 
 
 def read_parquet(
     source: Union[str, List[str], Path, BinaryIO],
+    use_pyarrow: bool = True,
     stop_after_n_rows: Optional[int] = None,
     memory_map: bool = True,
     columns: Optional[List[str]] = None,
@@ -427,16 +432,20 @@ def read_parquet(
     Parameters
     ----------
     source
-        Path to a file | list of files, or a file like object. If the path is a directory, that directory will be used
+        Path to a file, list of files, or a file like object. If the path is a directory, that directory will be used
         as partition aware scan.
         If ``fsspec`` is installed, it will be used to open remote files
+    use_pyarrow
+            Use pyarrow instead of the rust native parquet reader. The pyarrow reader is more stable.
     stop_after_n_rows
-        After n rows are read from the parquet, it stops reading. Note: this cannot be used in partition aware parquet
-        reads.
+        After n rows are read from the parquet, it stops reading.
+        Only valid when 'use_pyarrow==False'
     memory_map
         Memory map underlying file. This will likely increase performance.
+        Only used when 'use_pyarrow==True'
     columns
         Columns to project/ select.
+        Only valid when 'use_pyarrow==True'
     storage_options
         Extra options that make sense for ``fsspec.open()`` or a particular storage connection, e.g. host, port, username, password, etc.
     **kwargs
@@ -446,16 +455,24 @@ def read_parquet(
     -------
     DataFrame
     """
+    if use_pyarrow:
+        if stop_after_n_rows:
+            raise ValueError(
+                "'stop_after_n_rows' cannot be used with 'use_pyarrow==True'."
+            )
+    else:
+        if columns:
+            raise ValueError("'columns' cannot be used with 'use_pyarrow==False'.")
     storage_options = storage_options or {}
     with _prepare_file_arg(source, **storage_options) as source_prep:
-        if stop_after_n_rows is not None:
-            return pl.DataFrame.read_parquet(
-                source_prep, stop_after_n_rows=stop_after_n_rows
-            )
-        return from_arrow(  # type: ignore[return-value]
-            pa.parquet.read_table(
-                source_prep, memory_map=memory_map, columns=columns, **kwargs
+        if use_pyarrow:
+            return from_arrow(  # type: ignore[return-value]
+                pa.parquet.read_table(
+                    source_prep, memory_map=memory_map, columns=columns, **kwargs
+                )
             )
+        return pl.DataFrame.read_parquet(
+            source_prep, stop_after_n_rows=stop_after_n_rows
         )