Skip to content

Commit

Permalink
expose 'rechunk' param from "read_ipc" for consistency; default behav…
Browse files Browse the repository at this point in the history
…iour unchanged (#3088)
  • Loading branch information
alexander-beedie committed Apr 8, 2022
1 parent ca5208f commit 837a548
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
5 changes: 4 additions & 1 deletion py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@ def _read_ipc(
n_rows: Optional[int] = None,
row_count_name: Optional[str] = None,
row_count_offset: int = 0,
rechunk: bool = True,
) -> DF:
"""
Read into a DataFrame from Arrow IPC stream format. This is also called the Feather (v2) format.
Expand All @@ -686,6 +687,8 @@ def _read_ipc(
Columns to select. Accepts a list of column indices (starting at zero) or a list of column names.
n_rows
Stop reading from IPC file after reading ``n_rows``.
rechunk
Make sure that all data is contiguous.
Returns
-------
Expand All @@ -700,7 +703,7 @@ def _read_ipc(
scan = scan_ipc(
file,
n_rows=n_rows,
rechunk=True,
rechunk=rechunk,
row_count_name=row_count_name,
row_count_offset=row_count_offset,
)
Expand Down
12 changes: 8 additions & 4 deletions py-polars/polars/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ def _prepare_file_arg(
Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]).
Returned value is always usable as a context.
A `StringIO`, `BytesIO` file is returned as a `BytesIO`
A local path is returned as a string
An http url is read into a buffer and returned as a `BytesIO`
A `StringIO`, `BytesIO` file is returned as a `BytesIO`.
A local path is returned as a string.
An http URL is read into a buffer and returned as a `BytesIO`.
When fsspec is installed, remote file(s) is (are) opened with
`fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`.
Expand Down Expand Up @@ -742,6 +742,7 @@ def read_ipc(
storage_options: Optional[Dict] = None,
row_count_name: Optional[str] = None,
row_count_offset: int = 0,
rechunk: bool = True,
**kwargs: Any,
) -> DataFrame:
"""
Expand All @@ -768,6 +769,8 @@ def read_ipc(
If not None, this will insert a row count column with give name into the DataFrame
row_count_offset
Offset to start the row_count column (only use if the name is set)
rechunk
Make sure that all data is contiguous.
Returns
-------
Expand Down Expand Up @@ -797,14 +800,15 @@ def read_ipc(
)

tbl = pa.feather.read_table(data, memory_map=memory_map, columns=columns)
return DataFrame._from_arrow(tbl)
return DataFrame._from_arrow(tbl, rechunk=rechunk)

return DataFrame._read_ipc(
data,
columns=columns,
n_rows=n_rows,
row_count_name=row_count_name,
row_count_offset=row_count_offset,
rechunk=rechunk,
)


Expand Down

0 comments on commit 837a548

Please sign in to comment.