Skip to content

Commit

Permalink
python: prefer pyarrow when we can memory map the file (#4182)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jul 29, 2022
1 parent 08f6f73 commit 3e665fd
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
10 changes: 9 additions & 1 deletion py-polars/polars/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,14 @@ def scan_ipc(
# Map legacy arguments to current ones and remove them from kwargs.
n_rows = kwargs.pop("stop_after_n_rows", n_rows)

if (
_PYARROW_AVAILABLE
and row_count_name is None
and row_count_offset is None
and "*" not in file
):
read_ipc(file=file, n_rows=n_rows, use_pyarrow=True, cache=False, rechunk=False)

return LazyFrame.scan_ipc(
file=file,
n_rows=n_rows,
Expand Down Expand Up @@ -729,7 +737,7 @@ def read_ipc(
file: str | BinaryIO | BytesIO | Path | bytes,
columns: list[int] | list[str] | None = None,
n_rows: int | None = None,
use_pyarrow: bool = False,
use_pyarrow: bool = _PYARROW_AVAILABLE,
memory_map: bool = True,
storage_options: dict | None = None,
row_count_name: str | None = None,
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/io/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_from_to_buffer(df: pl.DataFrame, compressions: list[str]) -> None:
buf = io.BytesIO()
df.write_ipc(buf, compression=compression) # type: ignore[arg-type]
buf.seek(0)
read_df = pl.read_ipc(buf)
read_df = pl.read_ipc(buf, use_pyarrow=False)
assert_frame_equal_local_categoricals(df, read_df)


Expand All @@ -35,7 +35,7 @@ def test_from_to_file(
for compression in compressions:
for f in (str(f_ipc), Path(f_ipc)):
df.write_ipc(f, compression=compression) # type: ignore[arg-type]
df_read = pl.read_ipc(f) # type: ignore[arg-type]
df_read = pl.read_ipc(f, use_pyarrow=False) # type: ignore[arg-type]
assert_frame_equal_local_categoricals(df, df_read)


Expand Down Expand Up @@ -134,7 +134,7 @@ def test_glob_ipc(io_test_dir: str) -> None:
if os.name != "nt":
path = os.path.join(io_test_dir, "small*.ipc")
assert pl.scan_ipc(path).collect().shape == (3, 12)
assert pl.read_ipc(path).shape == (3, 12)
assert pl.read_ipc(path, use_pyarrow=False).shape == (3, 12)


def test_from_float16() -> None:
Expand All @@ -143,4 +143,4 @@ def test_from_float16() -> None:
f = io.BytesIO()
pandas_df.to_feather(f)
f.seek(0)
assert pl.read_ipc(f).dtypes == [pl.Float32]
assert pl.read_ipc(f, use_pyarrow=False).dtypes == [pl.Float32]

0 comments on commit 3e665fd

Please sign in to comment.