Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): Avoid importing interchange module by default #12927

Merged
merged 1 commit into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from polars.config import Config
from polars.convert import (
from_arrow,
from_dataframe,
from_dict,
from_dicts,
from_numpy,
Expand Down Expand Up @@ -160,7 +161,6 @@
when,
zeros,
)
from polars.interchange.from_dataframe import from_dataframe
from polars.io import (
read_avro,
read_csv,
Expand Down
54 changes: 54 additions & 0 deletions py-polars/polars/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
if TYPE_CHECKING:
from polars import DataFrame, Series
from polars.dependencies import numpy as np
from polars.interchange.protocol import SupportsInterchange
from polars.type_aliases import Orientation, SchemaDefinition, SchemaDict


Expand Down Expand Up @@ -730,3 +731,56 @@ def from_pandas(
raise TypeError(
f"expected pandas DataFrame or Series, got {type(data).__name__!r}"
)


def from_dataframe(df: SupportsInterchange, *, allow_copy: bool = True) -> DataFrame:
"""
Build a Polars DataFrame from any dataframe supporting the interchange protocol.

Parameters
----------
df
Object supporting the dataframe interchange protocol, i.e. must have implemented
the `__dataframe__` method.
allow_copy
Allow memory to be copied to perform the conversion. If set to False, causes
conversions that are not zero-copy to fail.

Notes
-----
Details on the Python dataframe interchange protocol:
https://data-apis.org/dataframe-protocol/latest/index.html

Using a dedicated function like :func:`from_pandas` or :func:`from_arrow` is a more
efficient method of conversion.

Polars currently relies on pyarrow's implementation of the dataframe interchange
protocol for `from_dataframe`. Therefore, pyarrow>=11.0.0 is required for this
function to work.

Because Polars can not currently guarantee zero-copy conversion from Arrow for
categorical columns, `allow_copy=False` will not work if the dataframe contains
categorical data.

Examples
--------
Convert a pandas dataframe to Polars through the interchange protocol.

>>> import pandas as pd
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["x", "y"]})
>>> dfi = df_pd.__dataframe__()
>>> pl.from_dataframe(dfi)
shape: (2, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 3.0 ┆ x │
│ 2 ┆ 4.0 ┆ y │
└─────┴─────┴─────┘

"""
from polars.interchange.from_dataframe import from_dataframe

return from_dataframe(df, allow_copy=allow_copy)
35 changes: 0 additions & 35 deletions py-polars/polars/interchange/from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,41 +26,6 @@ def from_dataframe(df: SupportsInterchange, *, allow_copy: bool = True) -> DataF
allow_copy
Allow memory to be copied to perform the conversion. If set to False, causes
conversions that are not zero-copy to fail.

Notes
-----
Details on the Python dataframe interchange protocol:
https://data-apis.org/dataframe-protocol/latest/index.html

Using a dedicated function like :func:`from_pandas` or :func:`from_arrow` is a more
efficient method of conversion.

Polars currently relies on pyarrow's implementation of the dataframe interchange
protocol for `from_dataframe`. Therefore, pyarrow>=11.0.0 is required for this
function to work.

Because Polars can not currently guarantee zero-copy conversion from Arrow for
categorical columns, `allow_copy=False` will not work if the dataframe contains
categorical data.

Examples
--------
Convert a pandas dataframe to Polars through the interchange protocol.

>>> import pandas as pd
>>> df_pd = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["x", "y"]})
>>> dfi = df_pd.__dataframe__()
>>> pl.from_dataframe(dfi)
shape: (2, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 3.0 ┆ x │
│ 2 ┆ 4.0 ┆ y │
└─────┴─────┴─────┘

"""
if isinstance(df, pl.DataFrame):
return df
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/interchange/test_from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def test_from_dataframe_pyarrow_min_version(monkeypatch: Any) -> None:
dfi = pl.DataFrame({"a": [1, 2]}).to_arrow().__dataframe__()

monkeypatch.setattr(
pl.convert.pa, # type: ignore[attr-defined]
pl.interchange.from_dataframe.pa, # type: ignore[attr-defined]
"__version__",
"10.0.0",
)
Expand Down