From 7430e011d30c234bba9b81eebf92912a15775970 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Tue, 17 Nov 2020 23:24:08 +0800 Subject: [PATCH] Improve warning message when values specified as attributes (#143) * Improve warning message when values specified as attributes (#142) * added test, ran black * bugfix test --- lux/core/frame.py | 1 + lux/processor/Validator.py | 33 +++++++++++++++++++---------- tests/test_error_warning.py | 6 +++--- tests/test_parser.py | 41 +++++++++++++++++++++++++++++++------ 4 files changed, 61 insertions(+), 20 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 3c6b3977..fe8b9b97 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -266,6 +266,7 @@ def set_intent(self, intent: List[Union[str, Clause]]): self._parse_validate_compile_intent() def _parse_validate_compile_intent(self): + self.maintain_metadata() from lux.processor.Parser import Parser from lux.processor.Validator import Validator diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py index a497045a..cb48d297 100644 --- a/lux/processor/Validator.py +++ b/lux/processor/Validator.py @@ -53,23 +53,30 @@ def validate_intent(intent: List[Clause], ldf: LuxDataFrame) -> None: """ def validate_clause(clause): + warn_msg = "" if not ( (clause.attribute and clause.attribute == "?") or (clause.value and clause.value == "?") ): if isinstance(clause.attribute, list): for attr in clause.attribute: if attr not in list(ldf.columns): - warnings.warn( - f"The input attribute '{attr}' does not exist in the DataFrame." + warn_msg = ( + f"\n- The input attribute '{attr}' does not exist in the DataFrame." ) else: if clause.attribute != "Record": # we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation if clause.attribute and not is_datetime_string(clause.attribute): if not clause.attribute in list(ldf.columns): - warnings.warn( - f"The input attribute '{clause.attribute}' does not exist in the DataFrame." - ) + search_val = clause.attribute + match_attr = False + for attr, val_list in ldf.unique_values.items(): + if search_val in val_list: + match_attr = attr + if match_attr: + warn_msg = f"\n- The input '{search_val}' looks like a value that belongs to the '{match_attr}' attribute. \n Please specify the value fully, as something like {match_attr}={search_val}." + else: + warn_msg = f"\n- The input attribute '{clause.attribute}' does not exist in the DataFrame. \n Please check your input intent for typos." if clause.value and clause.attribute and clause.filter_op == "=": series = ldf[clause.attribute] if not is_datetime_series(series): @@ -78,15 +85,19 @@ def validate_clause(clause): else: vals = [clause.value] for val in vals: - # (not series.str.contains(val).any()): if val not in series.values: - warnings.warn( - f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." - ) + warn_msg = f"\n- The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." + return warn_msg + warn_msg = "" for clause in intent: if type(clause) is list: for s in clause: - validate_clause(s) + warn_msg += validate_clause(s) else: - validate_clause(clause) + warn_msg += validate_clause(clause) + if warn_msg != "": + warnings.warn( + "\nThe following issues are ecountered when validating the parsed intent:" + warn_msg, + stacklevel=2, + ) diff --git a/tests/test_error_warning.py b/tests/test_error_warning.py index a34b349f..f114ec68 100644 --- a/tests/test_error_warning.py +++ b/tests/test_error_warning.py @@ -18,10 +18,10 @@ from lux.vis.Vis import Vis # Test suite for checking if the expected errors and warnings are showing up correctly -def test_context_str_error(): +def test_intent_str_error(): df = pd.read_csv("lux/data/college.csv") - with pytest.raises(TypeError, match="Input intent must be a list"): - df.set_intent("bad string input") + with pytest.raises(TypeError, match="Input intent must be either a list"): + df.intent = "bad string input" def test_export_b4_widget_created(): diff --git a/tests/test_parser.py b/tests/test_parser.py index 67021583..b37e5db7 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -14,6 +14,7 @@ import pandas as pd import lux +import pytest def test_case1(): @@ -80,9 +81,37 @@ def test_case6(): assert ldf._intent[1].value == ["USA", "Japan", "Europe"] -# TODO: Need to support this case -""" -lux.set_intent(["Horsepower","MPG","Acceleration"],"Origin") - lux.set_intent("Horsepower/MPG/Acceleration", "Origin") - --> [Clause(attr= ["Horsepower","MPG","Acceleration"], type= "attributeGroup")] -""" +def test_case7(): + df = pd.read_csv("lux/data/car.csv") + df.intent = [["Horsepower", "MilesPerGal", "Acceleration"], "Origin"] + df._repr_html_() + assert len(df.current_vis) == 3 + + +def test_validator_invalid_value(): + df = pd.read_csv("lux/data/college.csv") + with pytest.warns( + UserWarning, + match="The input value 'bob' does not exist for the attribute 'Region' for the DataFrame.", + ): + df.intent = ["Region=bob"] + + +def test_validator_invalid_filter(): + df = pd.read_csv("lux/data/college.csv") + + with pytest.raises(KeyError, match="'New England'"): + with pytest.warns( + UserWarning, + match="The input 'New England' looks like a value that belongs to the 'Region' attribute.", + ): + df.intent = ["New England", "Southeast", "Far West"] + + +def test_validator_invalid_attribute(): + df = pd.read_csv("lux/data/college.csv") + with pytest.raises(KeyError, match="'blah'"): + with pytest.warns( + UserWarning, match="The input attribute 'blah' does not exist in the DataFrame." + ): + df.intent = ["blah"]