Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add user-friendly exception if dataframe exceeds max config of Pandas Styler #7497

Merged
merged 3 commits into from Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 19 additions & 8 deletions lib/streamlit/elements/lib/pandas_styler_utils.py
Expand Up @@ -14,9 +14,10 @@

from typing import TYPE_CHECKING, Any, List, Mapping, TypeVar

from pandas import DataFrame
import pandas as pd

from streamlit import type_util
from streamlit.errors import StreamlitAPIException
from streamlit.proto.Arrow_pb2 import Arrow as ArrowProto

if TYPE_CHECKING:
Expand All @@ -38,6 +39,16 @@ def marshall_styler(proto: ArrowProto, styler: "Styler", default_uuid: str) -> N
If pandas.Styler uuid is not provided, this value will be used.

"""
styler_data_df: pd.DataFrame = styler.data
if styler_data_df.size > int(pd.options.styler.render.max_elements):
raise StreamlitAPIException(
f"The dataframe has `{styler_data_df.size}` cells, but the maximum number "
"of cells allowed to be rendered by Pandas Styler is configured to "
f"`{pd.options.styler.render.max_elements}`. To allow more cells to be "
'styled, you can change the `"styler.render.max_elements"` config. For example: '
f'`pd.set_option("styler.render.max_elements", {styler_data_df.size})`'
)

# pandas.Styler uuid should be set before _compute is called.
_marshall_uuid(proto, styler, default_uuid)

Expand All @@ -49,7 +60,7 @@ def marshall_styler(proto: ArrowProto, styler: "Styler", default_uuid: str) -> N

_marshall_caption(proto, styler)
_marshall_styles(proto, styler, pandas_styles)
_marshall_display_values(proto, styler.data, pandas_styles)
_marshall_display_values(proto, styler_data_df, pandas_styles)


def _marshall_uuid(proto: ArrowProto, styler: "Styler", default_uuid: str) -> None:
Expand Down Expand Up @@ -204,7 +215,7 @@ def _pandas_style_to_css(


def _marshall_display_values(
proto: ArrowProto, df: DataFrame, styles: Mapping[str, Any]
proto: ArrowProto, df: pd.DataFrame, styles: Mapping[str, Any]
) -> None:
"""Marshall pandas.Styler display values into an Arrow proto.

Expand All @@ -224,7 +235,7 @@ def _marshall_display_values(
proto.styler.display_values = type_util.data_frame_to_bytes(new_df)


def _use_display_values(df: DataFrame, styles: Mapping[str, Any]) -> DataFrame:
def _use_display_values(df: pd.DataFrame, styles: Mapping[str, Any]) -> pd.DataFrame:
"""Create a new pandas.DataFrame where display values are used instead of original ones.

Parameters
Expand All @@ -248,9 +259,9 @@ def _use_display_values(df: DataFrame, styles: Mapping[str, Any]) -> DataFrame:
rows = styles["body"]
for row in rows:
for cell in row:
match = cell_selector_regex.match(cell["id"])
if match:
r, c = map(int, match.groups())
new_df.iat[r, c] = str(cell["display_value"])
if "id" in cell:
if match := cell_selector_regex.match(cell["id"]):
r, c = map(int, match.groups())
new_df.iat[r, c] = str(cell["display_value"])

return new_df
10 changes: 10 additions & 0 deletions lib/tests/streamlit/elements/arrow_dataframe_test.py
Expand Up @@ -25,6 +25,7 @@

import streamlit as st
from streamlit.elements.lib.column_config_utils import INDEX_IDENTIFIER
from streamlit.errors import StreamlitAPIException
from streamlit.type_util import bytes_to_data_frame, pyarrow_table_to_bytes
from tests.delta_generator_test_case import DeltaGeneratorTestCase
from tests.testutil import create_snowpark_session
Expand Down Expand Up @@ -151,6 +152,15 @@ def test_display_values(self):
bytes_to_data_frame(proto.styler.display_values), expected
)

def test_throw_exception_if_data_exceeds_styler_config(self):
"""Test that an exception is thrown if the dataframe exceeds the styler.render.max_elements config."""
pd.set_option("styler.render.max_elements", 5000)
# big example with default styler.render.max_elements
df = pd.DataFrame(list(range(5001)))
with self.assertRaises(StreamlitAPIException):
st.dataframe(df.style.format("{:03d}"))
pd.reset_option("styler.render.max_elements")

@patch(
"streamlit.type_util.is_pandas_version_less_than",
MagicMock(return_value=False),
Expand Down