Skip to content

Commit

Permalink
Improve warning message when values specified as attributes (#143)
Browse files Browse the repository at this point in the history
* Improve warning message when values specified as attributes (#142)

* added test, ran black

* bugfix test
  • Loading branch information
dorisjlee committed Nov 17, 2020
1 parent ea20833 commit 7430e01
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 20 deletions.
1 change: 1 addition & 0 deletions lux/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def set_intent(self, intent: List[Union[str, Clause]]):
self._parse_validate_compile_intent()

def _parse_validate_compile_intent(self):
self.maintain_metadata()
from lux.processor.Parser import Parser
from lux.processor.Validator import Validator

Expand Down
33 changes: 22 additions & 11 deletions lux/processor/Validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,23 +53,30 @@ def validate_intent(intent: List[Clause], ldf: LuxDataFrame) -> None:
"""

def validate_clause(clause):
warn_msg = ""
if not (
(clause.attribute and clause.attribute == "?") or (clause.value and clause.value == "?")
):
if isinstance(clause.attribute, list):
for attr in clause.attribute:
if attr not in list(ldf.columns):
warnings.warn(
f"The input attribute '{attr}' does not exist in the DataFrame."
warn_msg = (
f"\n- The input attribute '{attr}' does not exist in the DataFrame."
)
else:
if clause.attribute != "Record":
# we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation
if clause.attribute and not is_datetime_string(clause.attribute):
if not clause.attribute in list(ldf.columns):
warnings.warn(
f"The input attribute '{clause.attribute}' does not exist in the DataFrame."
)
search_val = clause.attribute
match_attr = False
for attr, val_list in ldf.unique_values.items():
if search_val in val_list:
match_attr = attr
if match_attr:
warn_msg = f"\n- The input '{search_val}' looks like a value that belongs to the '{match_attr}' attribute. \n Please specify the value fully, as something like {match_attr}={search_val}."
else:
warn_msg = f"\n- The input attribute '{clause.attribute}' does not exist in the DataFrame. \n Please check your input intent for typos."
if clause.value and clause.attribute and clause.filter_op == "=":
series = ldf[clause.attribute]
if not is_datetime_series(series):
Expand All @@ -78,15 +85,19 @@ def validate_clause(clause):
else:
vals = [clause.value]
for val in vals:
# (not series.str.contains(val).any()):
if val not in series.values:
warnings.warn(
f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame."
)
warn_msg = f"\n- The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame."
return warn_msg

warn_msg = ""
for clause in intent:
if type(clause) is list:
for s in clause:
validate_clause(s)
warn_msg += validate_clause(s)
else:
validate_clause(clause)
warn_msg += validate_clause(clause)
if warn_msg != "":
warnings.warn(
"\nThe following issues are ecountered when validating the parsed intent:" + warn_msg,
stacklevel=2,
)
6 changes: 3 additions & 3 deletions tests/test_error_warning.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
from lux.vis.Vis import Vis

# Test suite for checking if the expected errors and warnings are showing up correctly
def test_context_str_error():
def test_intent_str_error():
df = pd.read_csv("lux/data/college.csv")
with pytest.raises(TypeError, match="Input intent must be a list"):
df.set_intent("bad string input")
with pytest.raises(TypeError, match="Input intent must be either a list"):
df.intent = "bad string input"


def test_export_b4_widget_created():
Expand Down
41 changes: 35 additions & 6 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import pandas as pd
import lux
import pytest


def test_case1():
Expand Down Expand Up @@ -80,9 +81,37 @@ def test_case6():
assert ldf._intent[1].value == ["USA", "Japan", "Europe"]


# TODO: Need to support this case
"""
lux.set_intent(["Horsepower","MPG","Acceleration"],"Origin")
lux.set_intent("Horsepower/MPG/Acceleration", "Origin")
--> [Clause(attr= ["Horsepower","MPG","Acceleration"], type= "attributeGroup")]
"""
def test_case7():
df = pd.read_csv("lux/data/car.csv")
df.intent = [["Horsepower", "MilesPerGal", "Acceleration"], "Origin"]
df._repr_html_()
assert len(df.current_vis) == 3


def test_validator_invalid_value():
df = pd.read_csv("lux/data/college.csv")
with pytest.warns(
UserWarning,
match="The input value 'bob' does not exist for the attribute 'Region' for the DataFrame.",
):
df.intent = ["Region=bob"]


def test_validator_invalid_filter():
df = pd.read_csv("lux/data/college.csv")

with pytest.raises(KeyError, match="'New England'"):
with pytest.warns(
UserWarning,
match="The input 'New England' looks like a value that belongs to the 'Region' attribute.",
):
df.intent = ["New England", "Southeast", "Far West"]


def test_validator_invalid_attribute():
df = pd.read_csv("lux/data/college.csv")
with pytest.raises(KeyError, match="'blah'"):
with pytest.warns(
UserWarning, match="The input attribute 'blah' does not exist in the DataFrame."
):
df.intent = ["blah"]

0 comments on commit 7430e01

Please sign in to comment.