plotly · alexcjohnson · Aug 15, 2023 · Aug 13, 2023 · Aug 14, 2023 · Aug 15, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
 
+## UNRELEASED
+
+### Fixed
+- Fixed issue with necessary columns from complex arguments dropped when interchanging dataframes [[#4324](https://github.com/plotly/plotly.py/pull/4324)]
 
 ## [5.16.0] - 2023-08-11
 

diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py
@@ -1419,9 +1419,13 @@ def build_dataframe(args, constructor):
             else:
                 # Save precious resources by only interchanging columns that are
                 # actually going to be plotted.
-                columns = [
+                necessary_columns = {
                     i for i in args.values() if isinstance(i, str) and i in columns
-                ]
+                }
+                for field in args:
+                    if args[field] is not None and field in array_attrables:
+                        necessary_columns.update(i for i in args[field] if i in columns)
+                columns = list(necessary_columns)
                 args["data_frame"] = pd.api.interchange.from_dataframe(
                     args["data_frame"].select_columns_by_name(columns)
                 )

diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py
@@ -8,6 +8,7 @@
 from plotly.express._core import build_dataframe
 from pandas.testing import assert_frame_equal
 
+
 # Fixtures
 # --------
 @pytest.fixture
@@ -292,9 +293,10 @@ def __dataframe__(self):
         ) as mock_from_dataframe:
             build_dataframe(args, go.Scatter)
         mock_from_dataframe.assert_called_once_with(interchange_dataframe_reduced)
-        interchange_dataframe.select_columns_by_name.assert_called_with(
-            ["petal_width", "sepal_length"]
-        )
+        assert set(interchange_dataframe.select_columns_by_name.call_args[0][0]) == {
+            "petal_width",
+            "sepal_length",
+        }
 
         args = dict(data_frame=input_dataframe_reduced, color=None)
         with mock.patch(
@@ -327,6 +329,35 @@ def test_build_df_from_vaex_and_polars(test_lib):
     )
 
 
+@pytest.mark.skipif(
+    version.parse(pd.__version__) < version.parse("2.0.2"),
+    reason="plotly doesn't use a dataframe interchange protocol for pandas < 2.0.2",
+)
+@pytest.mark.parametrize("test_lib", ["vaex", "polars"])
+@pytest.mark.parametrize(
+    "hover_data", [["sepal_width"], {"sepal_length": False, "sepal_width": ":.2f"}]
+)
+def test_build_df_with_hover_data_from_vaex_and_polars(test_lib, hover_data):
+    if test_lib == "vaex":
+        import vaex as lib
+    else:
+        import polars as lib
+
+    # take out the 'species' columns since the vaex implementation does not cover strings yet
+    iris_pandas = px.data.iris()[["petal_width", "sepal_length", "sepal_width"]]
+    iris_vaex = lib.from_pandas(iris_pandas)
+    args = dict(
+        data_frame=iris_vaex,
+        x="petal_width",
+        y="sepal_length",
+        hover_data=hover_data,
+    )
+    out = build_dataframe(args, go.Scatter)
+    assert_frame_equal(
+        iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"]
+    )
+
+
 def test_timezones():
     df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
     df["date"] = pd.to_datetime(df["date"])