Skip to content

Commit

Permalink
Support for non-string column names in st.data_editor (#7485)
Browse files Browse the repository at this point in the history
* Fix non-string column names with data editor

* Fix test

* Apply additional fixes

* Change back

* Put into function

* Fix stringifying headers

* Migrate data editor input data test to playwright

* Update snapshots

* Add unit test to check column names

* Clean test script
  • Loading branch information
LukasMasuch committed Oct 4, 2023
1 parent 8291419 commit c9af91b
Show file tree
Hide file tree
Showing 179 changed files with 133 additions and 68 deletions.
33 changes: 0 additions & 33 deletions e2e/scripts/st_data_editor_input_data.py

This file was deleted.

35 changes: 0 additions & 35 deletions e2e/specs/st_data_editor_input_data.spec.js

This file was deleted.

Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
56 changes: 56 additions & 0 deletions e2e_playwright/st_data_editor_input_data.py
@@ -0,0 +1,56 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import random

import numpy as np
import pandas as pd

import streamlit as st
from streamlit.type_util import DataFormat
from tests.streamlit.data_mocks import SHARED_TEST_CASES, TestCaseMetadata

np.random.seed(0)
random.seed(0)

st.set_page_config(layout="wide")

activate_dynamic_editing = st.toggle("Activate dynamic editing")
show_return_data = st.toggle("Show return data")

TEST_CASES = SHARED_TEST_CASES.copy()
TEST_CASES.append(
(
pd.DataFrame(
np.random.randn(3, 3),
columns=pd.MultiIndex.from_tuples(
[("A", "foo"), ("A", "bar"), ("B", "foo")]
),
), # Explicitly set the range index to have the same behavior across versions
TestCaseMetadata(0, 2, DataFormat.PANDAS_DATAFRAME),
),
)

# # Render all test cases with st.data_editor:
for i, test_case in enumerate(TEST_CASES):
data = test_case[0]
data_format = str(test_case[1].expected_data_format)
st.subheader(data_format)
return_df_fixed = st.data_editor(
data,
key=f"data_editor-{i}",
num_rows="dynamic" if activate_dynamic_editing else "fixed",
)
if show_return_data:
st.dataframe(return_df_fixed)
31 changes: 31 additions & 0 deletions e2e_playwright/st_data_editor_input_data_test.py
@@ -0,0 +1,31 @@
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from playwright.sync_api import Page, expect

from e2e_playwright.conftest import ImageCompareFunction


def test_data_editor_input_format_rendering(
app: Page, assert_snapshot: ImageCompareFunction
):
"""Test that st.data_editor renders various data formats correctly via snapshot testing."""
dataframe_elements = app.get_by_test_id("stDataFrame")
expect(dataframe_elements).to_have_count(35)

# The data editor might require a bit more time for rendering the canvas
app.wait_for_timeout(250)

for i, element in enumerate(dataframe_elements.all()):
assert_snapshot(element, name=f"st_data_editor-input_data_{i}")
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
Diff not rendered.
24 changes: 24 additions & 0 deletions lib/streamlit/elements/widgets/data_editor.py
Expand Up @@ -397,13 +397,35 @@ def _is_supported_index(df_index: pd.Index) -> bool:
)


def _fix_column_headers(data_df: pd.DataFrame) -> None:
"""Fix the column headers of the provided dataframe inplace to work
correctly for data editing."""

if isinstance(data_df.columns, pd.MultiIndex):
# Flatten hierarchical column headers to a single level:
data_df.columns = [
"_".join(map(str, header)) for header in data_df.columns.to_flat_index()
]
elif pd.api.types.infer_dtype(data_df.columns) != "string":
# If the column names are not all strings, we need to convert them to strings
# to avoid issues with editing:
data_df.rename(
columns={column: str(column) for column in data_df.columns},
inplace=True,
)


def _check_column_names(data_df: pd.DataFrame):
"""Check if the column names in the provided dataframe are valid.
It's not allowed to have duplicate column names or column names that are
named ``_index``. If the column names are not valid, a ``StreamlitAPIException``
is raised.
"""

if data_df.columns.empty:
return

# Check if the column names are unique and raise an exception if not.
# Add the names of the duplicated columns to the exception message.
duplicated_columns = data_df.columns[data_df.columns.duplicated()]
Expand Down Expand Up @@ -760,6 +782,8 @@ def data_editor(
column_config_mapping, data_df, data_format, check_arrow_compatibility=True
)

# Fix the column headers to work correctly for data editing:
_fix_column_headers(data_df)
# Temporary workaround: We hide range indices if num_rows is dynamic.
# since the current way of handling this index during editing is a bit confusing.
if isinstance(data_df.index, pd.RangeIndex) and num_rows == "dynamic":
Expand Down
22 changes: 22 additions & 0 deletions lib/tests/streamlit/elements/data_editor_test.py
Expand Up @@ -557,6 +557,20 @@ def test_with_old_supported_index(self):
return_df = st.data_editor(df)
self.assertIsInstance(return_df, pd.DataFrame)

def test_works_with_multiindex_column_headers(self):
"""Test that it works with multiindex column headers."""
df = pd.DataFrame(
index=[0, 1],
columns=[[2, 3, 4], ["c1", "c2", "c3"]],
data=np.arange(0, 6, 1).reshape(2, 3),
)

return_df = st.data_editor(df)

proto = self.get_delta_from_queue().new_element.arrow_data_frame
pd.testing.assert_frame_equal(bytes_to_data_frame(proto.data), return_df)
self.assertEqual(return_df.columns.to_list(), ["2_c1", "3_c2", "4_c3"])

def test_pandas_styler_support(self):
"""Test that it supports Pandas styler styles."""
df = pd.DataFrame(
Expand Down Expand Up @@ -598,6 +612,14 @@ def test_duplicate_column_names_raise_exception(self):
with self.assertRaises(StreamlitAPIException):
_check_column_names(df)

def test_non_string_column_names_are_converted_to_string(self):
"""Test that non-string column names are converted to string."""
# create a dataframe with non-string columns
df = pd.DataFrame(0, ["John", "Sarah", "Jane"], list(range(1, 4)))
self.assertNotEqual(pd.api.types.infer_dtype(df.columns), "string")
return_df = st.data_editor(df)
self.assertEqual(pd.api.types.infer_dtype(return_df.columns), "string")

def test_index_column_name_raises_exception(self):
"""Test that an index column name raises an exception."""
# create a dataframe with a column named "_index"
Expand Down

0 comments on commit c9af91b

Please sign in to comment.