plotly · alexcjohnson · Jun 30, 2023 · Sep 16, 2021 · Sep 30, 2022 · Jun 12, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
 
 ### Updated
 - Updated Plotly.js from version 2.24.1 to version 2.24.2. See the [plotly.js CHANGELOG](https://github.com/plotly/plotly.js/blob/master/CHANGELOG.md#2242----2023-06-09) for more information. These changes are reflected in the auto-generated `plotly.graph_objects` module.
+- `px` methods now accept data-frame-like objects that support a [dataframe interchange protocol](https://data-apis.org/dataframe-protocol/latest/index.html), such as polars, vaex, modin etc. This protocol has priority on `to_pandas` call, but will only be used if pandas>=2.0.2 is installed in the environment.
 
 ## [5.15.0] - 2023-06-08
 

diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py
@@ -7,6 +7,7 @@
 from _plotly_utils.basevalidators import ColorscaleValidator
 from plotly.colors import qualitative, sequential
 import math
+from packaging import version
 import pandas as pd
 import numpy as np
 
@@ -1307,7 +1308,25 @@ def build_dataframe(args, constructor):
     # Cast data_frame argument to DataFrame (it could be a numpy array, dict etc.)
     df_provided = args["data_frame"] is not None
     if df_provided and not isinstance(args["data_frame"], pd.DataFrame):
-        if hasattr(args["data_frame"], "to_pandas"):
+        if hasattr(args["data_frame"], "__dataframe__") and version.parse(
+            pd.__version__
+        ) >= version.parse("2.0.2"):
+            import pandas.api.interchange
+
+            df_not_pandas = args["data_frame"]
+            try:
+                df_pandas = pandas.api.interchange.from_dataframe(df_not_pandas)
+            except (ImportError, NotImplementedError) as exc:
+                # temporary workaround; developers of third-party libraries themselves
+                # should try a different implementation, if available. For example:
+                # def __dataframe__(self, ...):
+                #   if not some_condition:
+                #     self.to_pandas(...)
+                if not hasattr(df_not_pandas, "to_pandas"):
+                    raise exc
+                df_pandas = df_not_pandas.to_pandas()
+            args["data_frame"] = df_pandas
+        elif hasattr(args["data_frame"], "to_pandas"):
             args["data_frame"] = args["data_frame"].to_pandas()
         else:
             args["data_frame"] = pd.DataFrame(args["data_frame"])

diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py
@@ -3,9 +3,25 @@
 import numpy as np
 import pandas as pd
 import pytest
+from packaging import version
+import unittest.mock as mock
 from plotly.express._core import build_dataframe
 from pandas.testing import assert_frame_equal
 
+# Fixtures
+# --------
+@pytest.fixture
+def add_interchange_module_for_old_pandas():
+    if not hasattr(pd.api, "interchange"):
+        pd.api.interchange = mock.MagicMock()
+        # to make the following import work: `import pandas.api.interchange`
+        with mock.patch.dict(
+            "sys.modules", {"pandas.api.interchange": pd.api.interchange}
+        ):
+            yield
+    else:
+        yield
+
 
 def test_numpy():
     fig = px.scatter(x=[1, 2, 3], y=[2, 3, 4], color=[1, 3, 9])
@@ -233,6 +249,47 @@ def test_build_df_with_index():
     assert_frame_equal(tips.reset_index()[out["data_frame"].columns], out["data_frame"])
 
 
+def test_build_df_using_interchange_protocol_mock(
+    add_interchange_module_for_old_pandas,
+):
+    class CustomDataFrame:
+        def __dataframe__(self):
+            pass
+
+    input_dataframe = CustomDataFrame()
+    args = dict(data_frame=input_dataframe, x="petal_width", y="sepal_length")
+
+    iris_pandas = px.data.iris()
+
+    with mock.patch("pandas.__version__", "2.0.2"):
+        with mock.patch(
+            "pandas.api.interchange.from_dataframe", return_value=iris_pandas
+        ) as mock_from_dataframe:
+            build_dataframe(args, go.Scatter)
+        mock_from_dataframe.assert_called_once_with(input_dataframe)
+
+
+@pytest.mark.skipif(
+    version.parse(pd.__version__) < version.parse("2.0.2"),
+    reason="plotly doesn't use a dataframe interchange protocol for pandas < 2.0.2",
+)
+@pytest.mark.parametrize("test_lib", ["vaex", "polars"])
+def test_build_df_from_vaex_and_polars(test_lib):
+    if test_lib == "vaex":
+        import vaex as lib
+    else:
+        import polars as lib
+
+    # take out the 'species' columns since the vaex implementation does not cover strings yet
+    iris_pandas = px.data.iris()[["petal_width", "sepal_length"]]
+    iris_vaex = lib.from_pandas(iris_pandas)
+    args = dict(data_frame=iris_vaex, x="petal_width", y="sepal_length")
+    out = build_dataframe(args, go.Scatter)
+    assert_frame_equal(
+        iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"]
+    )
+
+
 def test_timezones():
     df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
     df["date"] = pd.to_datetime(df["date"])

diff --git a/packages/python/plotly/test_requirements/requirements_39_pandas_2_optional.txt b/packages/python/plotly/test_requirements/requirements_39_pandas_2_optional.txt
@@ -1,6 +1,6 @@
 requests==2.25.1
 tenacity==6.2.0
-pandas==2.0.1
+pandas==2.0.2
 numpy==1.20.3
 xarray==0.17.0
 statsmodels
@@ -19,3 +19,5 @@ matplotlib==2.2.3
 scikit-image==0.18.1
 psutil==5.7.0
 kaleido
+vaex
+polars