Skip to content

Commit

Permalink
Supporting dataframe with integer columns (#203)
Browse files Browse the repository at this point in the history
* bugfix for describe and convert_dtypes

* added back metadata series test

* black

* default to pandas display when df.dtypes printed

* various fixes to support int columns
  • Loading branch information
dorisjlee committed Jan 7, 2021
1 parent 459b4bf commit 3393b9f
Show file tree
Hide file tree
Showing 18 changed files with 172 additions and 134 deletions.
4 changes: 2 additions & 2 deletions lux/action/enhance.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def enhance(ldf):
# Collect variables that already exist in the intent
attr_specs = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent))
fltr_str = [fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters]
attr_str = [clause.attribute for clause in attr_specs]
intended_attrs = '<p class="highlight-intent">' + ", ".join(attr_str + fltr_str) + "</p>"
attr_str = [str(clause.attribute) for clause in attr_specs]
intended_attrs = f'<p class="highlight-intent">{", ".join(attr_str + fltr_str)}</p>'
if len(attr_specs) == 1:
recommendation = {
"action": "Enhance",
Expand Down
2 changes: 1 addition & 1 deletion lux/action/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def get_complementary_ops(fltr_op):
else:
intended_attrs = ", ".join(
[
clause.attribute
str(clause.attribute)
for clause in ldf._intent
if clause.value == "" and clause.attribute != "Record"
]
Expand Down
6 changes: 3 additions & 3 deletions lux/action/generalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def generalize(ldf):
filters = utils.get_filter_specs(ldf._intent)

fltr_str = [fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters]
attr_str = [clause.attribute for clause in attributes]
intended_attrs = '<p class="highlight-intent">' + ", ".join(attr_str + fltr_str) + "</p>"
attr_str = [str(clause.attribute) for clause in attributes]
intended_attrs = f'<p class="highlight-intent">{", ".join(attr_str + fltr_str)}</p>'

recommendation = {
"action": "Generalize",
Expand All @@ -66,7 +66,7 @@ def generalize(ldf):
temp_vis.remove_column_from_spec(column, remove_first=True)
excluded_columns.append(column)
output.append(temp_vis)
elif type(columns) == str:
else:
if columns not in excluded_columns:
temp_vis = Vis(ldf.copy_intent(), score=1)
temp_vis.remove_column_from_spec(columns, remove_first=True)
Expand Down
6 changes: 3 additions & 3 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ def execute(vislist: VisList, ldf: LuxDataFrame):
# Select relevant data based on attribute information
attributes = set([])
for clause in vis._inferred_intent:
if clause.attribute:
if clause.attribute != "Record":
attributes.add(clause.attribute)
if clause.attribute != "Record":
attributes.add(clause.attribute)
# TODO: Add some type of cap size on Nrows ?
vis._vis_data = vis.data[list(attributes)]

if vis.mark == "bar" or vis.mark == "line":
PandasExecutor.execute_aggregate(vis, isFiltered=filter_executed)
elif vis.mark == "histogram":
Expand Down
59 changes: 30 additions & 29 deletions lux/processor/Parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]:
)
import re

# intent = ldf.get_context()
new_context = []
# checks for and converts users' string inputs into lux specifications
for clause in intent:
Expand All @@ -59,37 +58,40 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]:
valid_values.append(v)
temp_spec = Clause(attribute=valid_values)
new_context.append(temp_spec)
elif isinstance(clause, str):
# case where user specifies a filter
if "=" in clause:
eqInd = clause.index("=")
var = clause[0:eqInd]
if "|" in clause:
values = clause[eqInd + 1 :].split("|")
for v in values:
# if v in ldf.unique_values[var]: #TODO: Move validation check to Validator
valid_values.append(v)
elif isinstance(clause, Clause):
new_context.append(clause)
else:
if isinstance(clause, str):
# case where user specifies a filter
if "=" in clause:
eqInd = clause.index("=")
var = clause[0:eqInd]
if "|" in clause:
values = clause[eqInd + 1 :].split("|")
for v in values:
# if v in ldf.unique_values[var]: #TODO: Move validation check to Validator
valid_values.append(v)
else:
valid_values = clause[eqInd + 1 :]
# if var in list(ldf.columns): #TODO: Move validation check to Validator
temp_spec = Clause(attribute=var, filter_op="=", value=valid_values)
new_context.append(temp_spec)
# case where user specifies a variable
else:
valid_values = clause[eqInd + 1 :]
# if var in list(ldf.columns): #TODO: Move validation check to Validator
temp_spec = Clause(attribute=var, filter_op="=", value=valid_values)
new_context.append(temp_spec)
# case where user specifies a variable
if "|" in clause:
values = clause.split("|")
for v in values:
# if v in list(ldf.columns): #TODO: Move validation check to Validator
valid_values.append(v)
else:
valid_values = clause
temp_spec = Clause(attribute=valid_values)
new_context.append(temp_spec)
else:
if "|" in clause:
values = clause.split("|")
for v in values:
# if v in list(ldf.columns): #TODO: Move validation check to Validator
valid_values.append(v)
else:
valid_values = clause
temp_spec = Clause(attribute=valid_values)
temp_spec = Clause(attribute=clause)
new_context.append(temp_spec)
elif type(clause) is Clause:
new_context.append(clause)
intent = new_context
# ldf._intent = new_context

intent = new_context
for clause in intent:
if clause.description:
# TODO: Move validation check to Validator
Expand All @@ -112,4 +114,3 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]:
else: # then it is probably a value
clause.value = clause.description
return intent
# ldf._intent = intent
12 changes: 5 additions & 7 deletions lux/processor/Validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,7 @@ def validate_intent(intent: List[Clause], ldf: LuxDataFrame) -> None:

def validate_clause(clause):
warn_msg = ""
if not (
(clause.attribute and clause.attribute == "?") or (clause.value and clause.value == "?")
):
if not (clause.attribute == "?" or clause.value == "?" or clause.attribute == ""):
if isinstance(clause.attribute, list):
for attr in clause.attribute:
if attr not in list(ldf.columns):
Expand All @@ -69,7 +67,9 @@ def validate_clause(clause):
else:
if clause.attribute != "Record":
# we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation
if clause.attribute and not is_datetime_string(clause.attribute):
if isinstance(clause.attribute, str) and not is_datetime_string(
clause.attribute
):
if not clause.attribute in list(ldf.columns):
search_val = clause.attribute
match_attr = False
Expand All @@ -80,9 +80,7 @@ def validate_clause(clause):
warn_msg = f"\n- The input '{search_val}' looks like a value that belongs to the '{match_attr}' attribute. \n Please specify the value fully, as something like {match_attr}={search_val}."
else:
warn_msg = f"\n- The input attribute '{clause.attribute}' does not exist in the DataFrame. \n Please check your input intent for typos."
if clause.value and clause.attribute and clause.filter_op == "=":
import math

if clause.value != "" and clause.attribute != "" and clause.filter_op == "=":
# Skip check for NaN filter values
if not lux.utils.utils.like_nan(clause.value):
series = ldf[clause.attribute]
Expand Down
15 changes: 8 additions & 7 deletions lux/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,17 @@ def check_import_lux_widget():


def get_agg_title(clause):
attr = str(clause.attribute)
if clause.aggregation is None:
if len(clause.attribute) > 25:
return clause.attribute[:15] + "..." + clause.attribute[-10:]
return f"{clause.attribute}"
elif clause.attribute == "Record":
if len(attr) > 25:
return attr[:15] + "..." + attr[-10:]
return f"{attr}"
elif attr == "Record":
return f"Number of Records"
else:
if len(clause.attribute) > 15:
return f"{clause._aggregation_name.capitalize()} of {clause.attribute[:15]}..."
return f"{clause._aggregation_name.capitalize()} of {clause.attribute}"
if len(attr) > 15:
return f"{clause._aggregation_name.capitalize()} of {attr[:15]}..."
return f"{clause._aggregation_name.capitalize()} of {attr}"


def check_if_id_like(df, attribute):
Expand Down
20 changes: 10 additions & 10 deletions lux/vis/Clause.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def to_string(self):
if isinstance(self.attribute, list):
clauseStr = "|".join(self.attribute)
elif self.value == "":
clauseStr = self.attribute
clauseStr = str(self.attribute)
else:
clauseStr = f"{self.attribute}{self.filter_op}{self.value}"
return clauseStr
Expand All @@ -126,23 +126,23 @@ def __repr__(self):
if self.description != "":
attributes.append(f" description: {self.description}")
if self.channel != "":
attributes.append(" channel: " + self.channel)
if len(self.attribute) != 0:
attributes.append(" attribute: " + str(self.attribute))
attributes.append(f" channel: {self.channel}")
if self.attribute != "":
attributes.append(f" attribute: {str(self.attribute)}")
if self.filter_op != "=":
attributes.append(f" filter_op: {str(self.filter_op)}")
if self.aggregation != "" and self.aggregation is not None:
attributes.append(" aggregation: " + self._aggregation_name)
if self.value != "" or len(self.value) != 0:
attributes.append(" value: " + str(self.value))
attributes.append(f" value: {str(self.value)}")
if self.data_model != "":
attributes.append(" data_model: " + self.data_model)
attributes.append(f" data_model: {self.data_model}")
if len(self.data_type) != 0:
attributes.append(" data_type: " + str(self.data_type))
if self.bin_size != None:
attributes.append(" bin_size: " + str(self.bin_size))
attributes.append(f" data_type: {str(self.data_type)}")
if self.bin_size != 0:
attributes.append(f" bin_size: {str(self.bin_size)}")
if len(self.exclude) != 0:
attributes.append(" exclude: " + str(self.exclude))
attributes.append(f" exclude: {str(self.exclude)}")
attributes[0] = "<Clause" + attributes[0][7:]
attributes[len(attributes) - 1] += " >"
return ",\n".join(attributes)
11 changes: 7 additions & 4 deletions lux/vis/Vis.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def __repr__(self):
if hasattr(clause, "attribute"):
if clause.attribute != "":
if clause.aggregation != "" and clause.aggregation is not None:
attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")"
attribute = f"{clause._aggregation_name.upper()}({clause.attribute})"
elif clause.bin_size > 0:
attribute = "BIN(" + clause.attribute + ")"
attribute = f"BIN({clause.attribute})"
else:
attribute = clause.attribute
if clause.channel == "x":
Expand All @@ -64,7 +64,7 @@ def __repr__(self):
channels.extend(additional_channels)
str_channels = ""
for channel in channels:
str_channels += channel[0] + ": " + channel[1] + ", "
str_channels += f"{channel[0]}: {channel[1]}, "

if filter_intents:
return f"<Vis ({str_channels[:-2]} -- [{filter_intents.attribute}{filter_intents.filter_op}{filter_intents.value}]) mark: {self._mark}, score: {self.score} >"
Expand Down Expand Up @@ -324,5 +324,8 @@ def check_not_vislist_intent(self):

for i in range(len(self._intent)):
clause = self._intent[i]
if type(clause) != Clause and ("|" in clause or type(clause) == list or "?" in clause):
if isinstance(clause, str):
if "|" in clause or "?" in clause:
raise TypeError(syntaxMsg)
if isinstance(clause, list):
raise TypeError(syntaxMsg)
23 changes: 12 additions & 11 deletions lux/vis/VisList.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,16 +133,17 @@ def __repr__(self):
for vis in self._collection:
filter_intents = None
for clause in vis._inferred_intent:
attr = str(clause.attribute)
if clause.value != "":
filter_intents = clause

if clause.aggregation != "" and clause.aggregation is not None:
attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")"
attribute = clause._aggregation_name.upper() + f"({attr})"
elif clause.bin_size > 0:
attribute = "BIN(" + clause.attribute + ")"
attribute = f"BIN({attr})"
else:
attribute = clause.attribute

attribute = attr
attribute = str(attribute)
if clause.channel == "x" and len(x_channel) < len(attribute):
x_channel = attribute
if clause.channel == "y" and len(y_channel) < len(attribute):
Expand All @@ -151,9 +152,9 @@ def __repr__(self):
largest_mark = len(vis.mark)
if (
filter_intents
and len(str(filter_intents.value)) + len(filter_intents.attribute) > largest_filter
and len(str(filter_intents.value)) + len(str(filter_intents.attribute)) > largest_filter
):
largest_filter = len(str(filter_intents.value)) + len(filter_intents.attribute)
largest_filter = len(str(filter_intents.value)) + len(str(filter_intents.attribute))
vis_repr = []
largest_x_length = len(x_channel)
largest_y_length = len(y_channel)
Expand All @@ -164,16 +165,16 @@ def __repr__(self):
y_channel = ""
additional_channels = []
for clause in vis._inferred_intent:
attr = str(clause.attribute)
if clause.value != "":
filter_intents = clause

if clause.aggregation != "" and clause.aggregation is not None and vis.mark != "scatter":
attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")"
attribute = clause._aggregation_name.upper() + f"({attr})"
elif clause.bin_size > 0:
attribute = "BIN(" + clause.attribute + ")"
attribute = f"BIN({attr})"
else:
attribute = clause.attribute

attribute = attr
if clause.channel == "x":
x_channel = attribute.ljust(largest_x_length)
elif clause.channel == "y":
Expand All @@ -197,7 +198,7 @@ def __repr__(self):
if filter_intents:
aligned_filter = (
" -- ["
+ filter_intents.attribute
+ str(filter_intents.attribute)
+ filter_intents.filter_op
+ str(filter_intents.value)
+ "]"
Expand Down
14 changes: 12 additions & 2 deletions lux/vislib/altair/AltairChart.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,17 @@ def encode_color(self):
timeUnit = compute_date_granularity(self.vis.data[color_attr_name])
self.chart = self.chart.encode(
color=alt.Color(
color_attr_name,
str(color_attr_name),
type=color_attr_type,
timeUnit=timeUnit,
title=color_attr_name,
)
)
self.code += f"chart = chart.encode(color=alt.Color('{color_attr_name}',type='{color_attr_type}',timeUnit='{timeUnit}',title='{color_attr_name}'))"
else:
self.chart = self.chart.encode(color=alt.Color(color_attr_name, type=color_attr_type))
self.chart = self.chart.encode(
color=alt.Color(str(color_attr_name), type=color_attr_type)
)
self.code += f"chart = chart.encode(color=alt.Color('{color_attr_name}',type='{color_attr_type}'))\n"
elif len(color_attr) > 1:
raise ValueError(
Expand All @@ -111,3 +113,11 @@ def add_title(self):

def initialize_chart(self):
return NotImplemented

@classmethod
def sanitize_dataframe(self, df):
for attr in df.columns:
# Altair can not visualize non-string columns
# convert all non-string columns in to strings
df = df.rename(columns={attr: str(attr)})
return df
11 changes: 6 additions & 5 deletions lux/vislib/altair/AltairRenderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,12 @@ def create_vis(self, vis, standalone=True):
vis.data[attr].iloc[0], pd.Interval
):
vis.data[attr] = vis.data[attr].astype(str)
if "." in attr:
attr_clause = vis.get_attr_by_attr_name(attr)[0]
# Suppress special character ".", not displayable in Altair
# attr_clause.attribute = attr_clause.attribute.replace(".", "")
vis._vis_data = vis.data.rename(columns={attr: attr.replace(".", "")})
if isinstance(attr, str):
if "." in attr:
attr_clause = vis.get_attr_by_attr_name(attr)[0]
# Suppress special character ".", not displayable in Altair
# attr_clause.attribute = attr_clause.attribute.replace(".", "")
vis._vis_data = vis.data.rename(columns={attr: attr.replace(".", "")})
if vis.mark == "histogram":
chart = Histogram(vis)
elif vis.mark == "bar":
Expand Down

0 comments on commit 3393b9f

Please sign in to comment.