From 555c60f36d0af6f03d507c13415a77d05acf8d64 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Tue, 17 Nov 2020 22:17:35 +0800 Subject: [PATCH 1/3] Improve warning message when values specified as attributes (#142) --- lux/core/frame.py | 1 + lux/processor/Validator.py | 32 +++++++++++++++++++++----------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 3c6b3977..fe8b9b97 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -266,6 +266,7 @@ def set_intent(self, intent: List[Union[str, Clause]]): self._parse_validate_compile_intent() def _parse_validate_compile_intent(self): + self.maintain_metadata() from lux.processor.Parser import Parser from lux.processor.Validator import Validator diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py index a497045a..ce7d4a02 100644 --- a/lux/processor/Validator.py +++ b/lux/processor/Validator.py @@ -53,23 +53,28 @@ def validate_intent(intent: List[Clause], ldf: LuxDataFrame) -> None: """ def validate_clause(clause): + warn_msg = "" if not ( (clause.attribute and clause.attribute == "?") or (clause.value and clause.value == "?") ): if isinstance(clause.attribute, list): for attr in clause.attribute: if attr not in list(ldf.columns): - warnings.warn( - f"The input attribute '{attr}' does not exist in the DataFrame." - ) + warn_msg = f"\nThe input attribute '{attr}' does not exist in the DataFrame." else: if clause.attribute != "Record": # we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation if clause.attribute and not is_datetime_string(clause.attribute): if not clause.attribute in list(ldf.columns): - warnings.warn( - f"The input attribute '{clause.attribute}' does not exist in the DataFrame." - ) + search_val = clause.attribute + match_attr = False + for attr, val_list in ldf.unique_values.items(): + if search_val in val_list: + match_attr = attr + if match_attr: + warn_msg = f"\n- The input '{search_val}' looks like a value that belongs to the '{match_attr}' attribute. \n Please specify the value fully, as something like {match_attr}={search_val}." + else: + warn_msg = f"\n- The input attribute '{clause.attribute}' does not exist in the DataFrame. \n Please check your input intent for typos." if clause.value and clause.attribute and clause.filter_op == "=": series = ldf[clause.attribute] if not is_datetime_series(series): @@ -80,13 +85,18 @@ def validate_clause(clause): for val in vals: # (not series.str.contains(val).any()): if val not in series.values: - warnings.warn( - f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." - ) + warn_msg = f"\nThe input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." + return warn_msg + warn_msg = "" for clause in intent: if type(clause) is list: for s in clause: - validate_clause(s) + warn_msg += validate_clause(s) else: - validate_clause(clause) + warn_msg += validate_clause(clause) + if warn_msg != "": + warnings.warn( + "\nThe following issues are ecountered when validating the parsed intent:" + warn_msg, + stacklevel=2, + ) \ No newline at end of file From 00a2c3cbefbbf5babd4b5e8039206e95cae81cd8 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Tue, 17 Nov 2020 23:05:35 +0800 Subject: [PATCH 2/3] added test, ran black --- lux/processor/Validator.py | 9 ++++---- tests/test_error_warning.py | 4 ++-- tests/test_parser.py | 41 +++++++++++++++++++++++++++++++------ 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py index ce7d4a02..cb48d297 100644 --- a/lux/processor/Validator.py +++ b/lux/processor/Validator.py @@ -60,7 +60,9 @@ def validate_clause(clause): if isinstance(clause.attribute, list): for attr in clause.attribute: if attr not in list(ldf.columns): - warn_msg = f"\nThe input attribute '{attr}' does not exist in the DataFrame." + warn_msg = ( + f"\n- The input attribute '{attr}' does not exist in the DataFrame." + ) else: if clause.attribute != "Record": # we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation @@ -83,9 +85,8 @@ def validate_clause(clause): else: vals = [clause.value] for val in vals: - # (not series.str.contains(val).any()): if val not in series.values: - warn_msg = f"\nThe input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." + warn_msg = f"\n- The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." return warn_msg warn_msg = "" @@ -99,4 +100,4 @@ def validate_clause(clause): warnings.warn( "\nThe following issues are ecountered when validating the parsed intent:" + warn_msg, stacklevel=2, - ) \ No newline at end of file + ) diff --git a/tests/test_error_warning.py b/tests/test_error_warning.py index a34b349f..6ad1f69c 100644 --- a/tests/test_error_warning.py +++ b/tests/test_error_warning.py @@ -18,10 +18,10 @@ from lux.vis.Vis import Vis # Test suite for checking if the expected errors and warnings are showing up correctly -def test_context_str_error(): +def test_intent_str_error(): df = pd.read_csv("lux/data/college.csv") with pytest.raises(TypeError, match="Input intent must be a list"): - df.set_intent("bad string input") + df.intent = "bad string input" def test_export_b4_widget_created(): diff --git a/tests/test_parser.py b/tests/test_parser.py index 67021583..b37e5db7 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -14,6 +14,7 @@ import pandas as pd import lux +import pytest def test_case1(): @@ -80,9 +81,37 @@ def test_case6(): assert ldf._intent[1].value == ["USA", "Japan", "Europe"] -# TODO: Need to support this case -""" -lux.set_intent(["Horsepower","MPG","Acceleration"],"Origin") - lux.set_intent("Horsepower/MPG/Acceleration", "Origin") - --> [Clause(attr= ["Horsepower","MPG","Acceleration"], type= "attributeGroup")] -""" +def test_case7(): + df = pd.read_csv("lux/data/car.csv") + df.intent = [["Horsepower", "MilesPerGal", "Acceleration"], "Origin"] + df._repr_html_() + assert len(df.current_vis) == 3 + + +def test_validator_invalid_value(): + df = pd.read_csv("lux/data/college.csv") + with pytest.warns( + UserWarning, + match="The input value 'bob' does not exist for the attribute 'Region' for the DataFrame.", + ): + df.intent = ["Region=bob"] + + +def test_validator_invalid_filter(): + df = pd.read_csv("lux/data/college.csv") + + with pytest.raises(KeyError, match="'New England'"): + with pytest.warns( + UserWarning, + match="The input 'New England' looks like a value that belongs to the 'Region' attribute.", + ): + df.intent = ["New England", "Southeast", "Far West"] + + +def test_validator_invalid_attribute(): + df = pd.read_csv("lux/data/college.csv") + with pytest.raises(KeyError, match="'blah'"): + with pytest.warns( + UserWarning, match="The input attribute 'blah' does not exist in the DataFrame." + ): + df.intent = ["blah"] From fc1695a69ac643be882175d14eac1f591c389bc6 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Tue, 17 Nov 2020 23:14:23 +0800 Subject: [PATCH 3/3] bugfix test --- tests/test_error_warning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_error_warning.py b/tests/test_error_warning.py index 6ad1f69c..f114ec68 100644 --- a/tests/test_error_warning.py +++ b/tests/test_error_warning.py @@ -20,7 +20,7 @@ # Test suite for checking if the expected errors and warnings are showing up correctly def test_intent_str_error(): df = pd.read_csv("lux/data/college.csv") - with pytest.raises(TypeError, match="Input intent must be a list"): + with pytest.raises(TypeError, match="Input intent must be either a list"): df.intent = "bad string input"