Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve warning message when values specified as attributes #143

Merged
merged 3 commits into from
Nov 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lux/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def set_intent(self, intent: List[Union[str, Clause]]):
self._parse_validate_compile_intent()

def _parse_validate_compile_intent(self):
self.maintain_metadata()
from lux.processor.Parser import Parser
from lux.processor.Validator import Validator

Expand Down
33 changes: 22 additions & 11 deletions lux/processor/Validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,23 +53,30 @@ def validate_intent(intent: List[Clause], ldf: LuxDataFrame) -> None:
"""

def validate_clause(clause):
warn_msg = ""
if not (
(clause.attribute and clause.attribute == "?") or (clause.value and clause.value == "?")
):
if isinstance(clause.attribute, list):
for attr in clause.attribute:
if attr not in list(ldf.columns):
warnings.warn(
f"The input attribute '{attr}' does not exist in the DataFrame."
warn_msg = (
f"\n- The input attribute '{attr}' does not exist in the DataFrame."
)
else:
if clause.attribute != "Record":
# we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation
if clause.attribute and not is_datetime_string(clause.attribute):
if not clause.attribute in list(ldf.columns):
warnings.warn(
f"The input attribute '{clause.attribute}' does not exist in the DataFrame."
)
search_val = clause.attribute
match_attr = False
for attr, val_list in ldf.unique_values.items():
if search_val in val_list:
match_attr = attr
if match_attr:
warn_msg = f"\n- The input '{search_val}' looks like a value that belongs to the '{match_attr}' attribute. \n Please specify the value fully, as something like {match_attr}={search_val}."
else:
warn_msg = f"\n- The input attribute '{clause.attribute}' does not exist in the DataFrame. \n Please check your input intent for typos."
if clause.value and clause.attribute and clause.filter_op == "=":
series = ldf[clause.attribute]
if not is_datetime_series(series):
Expand All @@ -78,15 +85,19 @@ def validate_clause(clause):
else:
vals = [clause.value]
for val in vals:
# (not series.str.contains(val).any()):
if val not in series.values:
warnings.warn(
f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame."
)
warn_msg = f"\n- The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame."
return warn_msg

warn_msg = ""
for clause in intent:
if type(clause) is list:
for s in clause:
validate_clause(s)
warn_msg += validate_clause(s)
else:
validate_clause(clause)
warn_msg += validate_clause(clause)
if warn_msg != "":
warnings.warn(
"\nThe following issues are ecountered when validating the parsed intent:" + warn_msg,
stacklevel=2,
)
6 changes: 3 additions & 3 deletions tests/test_error_warning.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
from lux.vis.Vis import Vis

# Test suite for checking if the expected errors and warnings are showing up correctly
def test_context_str_error():
def test_intent_str_error():
df = pd.read_csv("lux/data/college.csv")
with pytest.raises(TypeError, match="Input intent must be a list"):
df.set_intent("bad string input")
with pytest.raises(TypeError, match="Input intent must be either a list"):
df.intent = "bad string input"


def test_export_b4_widget_created():
Expand Down
41 changes: 35 additions & 6 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import pandas as pd
import lux
import pytest


def test_case1():
Expand Down Expand Up @@ -80,9 +81,37 @@ def test_case6():
assert ldf._intent[1].value == ["USA", "Japan", "Europe"]


# TODO: Need to support this case
"""
lux.set_intent(["Horsepower","MPG","Acceleration"],"Origin")
lux.set_intent("Horsepower/MPG/Acceleration", "Origin")
--> [Clause(attr= ["Horsepower","MPG","Acceleration"], type= "attributeGroup")]
"""
def test_case7():
df = pd.read_csv("lux/data/car.csv")
df.intent = [["Horsepower", "MilesPerGal", "Acceleration"], "Origin"]
df._repr_html_()
assert len(df.current_vis) == 3


def test_validator_invalid_value():
df = pd.read_csv("lux/data/college.csv")
with pytest.warns(
UserWarning,
match="The input value 'bob' does not exist for the attribute 'Region' for the DataFrame.",
):
df.intent = ["Region=bob"]


def test_validator_invalid_filter():
df = pd.read_csv("lux/data/college.csv")

with pytest.raises(KeyError, match="'New England'"):
with pytest.warns(
UserWarning,
match="The input 'New England' looks like a value that belongs to the 'Region' attribute.",
):
df.intent = ["New England", "Southeast", "Far West"]


def test_validator_invalid_attribute():
df = pd.read_csv("lux/data/college.csv")
with pytest.raises(KeyError, match="'blah'"):
with pytest.warns(
UserWarning, match="The input attribute 'blah' does not exist in the DataFrame."
):
df.intent = ["blah"]