Skip to content

Commit

Permalink
Merge pull request #4324 from guyrosin/fix-necessary-columns
Browse files Browse the repository at this point in the history
Consider necessary columns from complex arguments when interchanging dataframes
  • Loading branch information
alexcjohnson committed Aug 15, 2023
2 parents 91060d3 + 1625d35 commit 402c9b4
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).

## UNRELEASED

### Fixed
- Fixed issue with necessary columns from complex arguments dropped when interchanging dataframes [[#4324](https://github.com/plotly/plotly.py/pull/4324)]

## [5.16.0] - 2023-08-11

Expand Down
8 changes: 6 additions & 2 deletions packages/python/plotly/plotly/express/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1419,9 +1419,13 @@ def build_dataframe(args, constructor):
else:
# Save precious resources by only interchanging columns that are
# actually going to be plotted.
columns = [
necessary_columns = {
i for i in args.values() if isinstance(i, str) and i in columns
]
}
for field in args:
if args[field] is not None and field in array_attrables:
necessary_columns.update(i for i in args[field] if i in columns)
columns = list(necessary_columns)
args["data_frame"] = pd.api.interchange.from_dataframe(
args["data_frame"].select_columns_by_name(columns)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from plotly.express._core import build_dataframe
from pandas.testing import assert_frame_equal


# Fixtures
# --------
@pytest.fixture
Expand Down Expand Up @@ -292,9 +293,10 @@ def __dataframe__(self):
) as mock_from_dataframe:
build_dataframe(args, go.Scatter)
mock_from_dataframe.assert_called_once_with(interchange_dataframe_reduced)
interchange_dataframe.select_columns_by_name.assert_called_with(
["petal_width", "sepal_length"]
)
assert set(interchange_dataframe.select_columns_by_name.call_args[0][0]) == {
"petal_width",
"sepal_length",
}

args = dict(data_frame=input_dataframe_reduced, color=None)
with mock.patch(
Expand Down Expand Up @@ -327,6 +329,35 @@ def test_build_df_from_vaex_and_polars(test_lib):
)


@pytest.mark.skipif(
version.parse(pd.__version__) < version.parse("2.0.2"),
reason="plotly doesn't use a dataframe interchange protocol for pandas < 2.0.2",
)
@pytest.mark.parametrize("test_lib", ["vaex", "polars"])
@pytest.mark.parametrize(
"hover_data", [["sepal_width"], {"sepal_length": False, "sepal_width": ":.2f"}]
)
def test_build_df_with_hover_data_from_vaex_and_polars(test_lib, hover_data):
if test_lib == "vaex":
import vaex as lib
else:
import polars as lib

# take out the 'species' columns since the vaex implementation does not cover strings yet
iris_pandas = px.data.iris()[["petal_width", "sepal_length", "sepal_width"]]
iris_vaex = lib.from_pandas(iris_pandas)
args = dict(
data_frame=iris_vaex,
x="petal_width",
y="sepal_length",
hover_data=hover_data,
)
out = build_dataframe(args, go.Scatter)
assert_frame_equal(
iris_pandas.reset_index()[out["data_frame"].columns], out["data_frame"]
)


def test_timezones():
df = pd.DataFrame({"date": ["2015-04-04 19:31:30+1:00"], "value": [3]})
df["date"] = pd.to_datetime(df["date"])
Expand Down

0 comments on commit 402c9b4

Please sign in to comment.