Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Inline comments breaking to new lines #137

Merged
merged 29 commits into from
Nov 16, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lux/action/column_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def column_group(ldf):
ldf_flat = ldf
if isinstance(ldf.columns, pd.DatetimeIndex):
ldf_flat.columns = ldf_flat.columns.format()
ldf_flat = (
ldf_flat.reset_index()
) # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis

# use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
ldf_flat = ldf_flat.reset_index()
if ldf.index.nlevels == 1:
if ldf.index.name:
index_column_name = ldf.index.name
Expand Down
5 changes: 2 additions & 3 deletions lux/action/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
"description": "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes.",
}
ignore_rec_flag = False
if (
len(ldf) < 5
): # Doesn't make sense to compute correlation if less than 4 data values
# Doesn't make sense to compute correlation if less than 4 data values
if len(ldf) < 5:
ignore_rec_flag = True
# Then use the data populated in the vis list to compute score
for vis in vlist:
Expand Down
5 changes: 2 additions & 3 deletions lux/action/enhance.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ def enhance(ldf):
"action": "Enhance",
"description": f"Further breaking down current {intended_attrs} intent by additional attribute.",
}
elif (
len(attr_specs) > 2
): # if there are too many column attributes, return don't generate Enhance recommendations
# if there are too many column attributes, return don't generate Enhance recommendations
elif len(attr_specs) > 2:
recommendation = {"action": "Enhance"}
recommendation["collection"] = []
return recommendation
Expand Down
4 changes: 2 additions & 2 deletions lux/action/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ def get_complementary_ops(fltr_op):
new_spec.append(new_filter)
temp_vis = Vis(new_spec, score=1)
output.append(temp_vis)

else: # if no existing filters, create filters using unique values from all categorical variables in the dataset
# if no existing filters, create filters using unique values from all categorical variables in the dataset
else:
intended_attrs = ", ".join(
[
clause.attribute
Expand Down
10 changes: 4 additions & 6 deletions lux/action/univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,8 @@ def univariate(ldf, *args):
"action": "Distribution",
"description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p> attributes.",
}
if (
len(ldf) < 5
): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
# Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
if len(ldf) < 5:
ignore_rec_flag = True
elif data_type_constraint == "nominal":
intent = [lux.Clause("?", data_type="nominal")]
Expand All @@ -76,9 +75,8 @@ def univariate(ldf, *args):
"action": "Temporal",
"description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.",
}
if (
len(ldf) < 3
): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
# Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
if len(ldf) < 3:
ignore_rec_flag = True
if ignore_rec_flag:
recommendation["collection"] = []
Expand Down
10 changes: 4 additions & 6 deletions lux/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,8 @@ def maintain_recs(self):
)
rec_df._prev = None # reset _prev

if (
not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh
): # Check that recs has not yet been computed
# Check that recs has not yet been computed
if not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh:
rec_infolist = []
from lux.action.custom import custom
from lux.action.custom import custom_actions
Expand Down Expand Up @@ -550,9 +549,8 @@ def maintain_recs(self):
rec_df.recommendation[action_type] = vlist
rec_df._rec_info = rec_infolist
self._widget = rec_df.render_widget()
elif (
show_prev
): # re-render widget for the current dataframe if previous rec is not recomputed
# re-render widget for the current dataframe if previous rec is not recomputed
elif show_prev:
self._widget = rec_df.render_widget()
self._recs_fresh = True

Expand Down
12 changes: 6 additions & 6 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,8 @@ def execute(vislist: VisList, ldf: LuxDataFrame):
"""
PandasExecutor.execute_sampling(ldf)
for vis in vislist:
vis._vis_data = (
ldf._sampled
) # The vis data starts off being original or sampled dataframe
# The vis data starts off being original or sampled dataframe
vis._vis_data = ldf._sampled
filter_executed = PandasExecutor.execute_filter(vis)
# Select relevant data based on attribute information
attributes = set([])
Expand Down Expand Up @@ -220,9 +219,10 @@ def execute_aggregate(vis: Vis, isFiltered=True):
) == N_unique_vals * len(
color_attr_vals
), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute, color_attr.attribute}`."
vis._vis_data = vis.data.iloc[
:, :3
] # Keep only the three relevant columns not the *_right columns resulting from merge

# Keep only the three relevant columns not the *_right columns resulting from merge
vis._vis_data = vis.data.iloc[:, :3]

else:
df = pd.DataFrame({columns[0]: attr_unique_vals})

Expand Down
63 changes: 26 additions & 37 deletions lux/processor/Compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,13 @@ def __repr__(self):
@staticmethod
def compile_vis(ldf: LuxDataFrame, vis: Vis) -> VisList:
if vis:
vis_collection = Compiler.populate_data_type_model(
ldf, [vis]
) # autofill data type/model information
vis_collection = Compiler.remove_all_invalid(
vis_collection
) # remove invalid visualizations from collection
# autofill data type/model information
vis_collection = Compiler.populate_data_type_model(ldf, [vis])
# remove invalid visualizations from collection
vis_collection = Compiler.remove_all_invalid(vis_collection)
for vis in vis_collection:
Compiler.determine_encoding(
ldf, vis
) # autofill viz related information
# autofill viz related information
Compiler.determine_encoding(ldf, vis)
ldf._compiled = True
return vis_collection

Expand All @@ -72,17 +69,14 @@ def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList
"""
if _inferred_intent:
vis_collection = Compiler.enumerate_collection(_inferred_intent, ldf)
vis_collection = Compiler.populate_data_type_model(
ldf, vis_collection
) # autofill data type/model information
# autofill data type/model information
vis_collection = Compiler.populate_data_type_model(ldf, vis_collection)
# remove invalid visualizations from collection
if len(vis_collection) >= 1:
vis_collection = Compiler.remove_all_invalid(
vis_collection
) # remove invalid visualizations from collection
vis_collection = Compiler.remove_all_invalid(vis_collection)
for vis in vis_collection:
Compiler.determine_encoding(
ldf, vis
) # autofill viz related information
# autofill viz related information
Compiler.determine_encoding(ldf, vis)
ldf._compiled = True
return vis_collection

Expand Down Expand Up @@ -121,9 +115,8 @@ def combine(col_attrs, accum):
for i in range(n):
column_list = copy.deepcopy(accum + [col_attrs[0][i]])
if last:
if (
len(filters) > 0
): # if we have filters, generate combinations for each row.
# if we have filters, generate combinations for each row.
if len(filters) > 0:
for row in filters:
_inferred_intent = copy.deepcopy(column_list + [row])
vis = Vis(_inferred_intent)
Expand Down Expand Up @@ -164,19 +157,17 @@ def populate_data_type_model(ldf, vis_collection) -> VisList:
if clause.description == "?":
clause.description = ""
# TODO: Note that "and not is_datetime_string(clause.attribute))" is a temporary hack and breaks the `test_row_column_group` example
if (
clause.attribute != "" and clause.attribute != "Record"
): # and not is_datetime_string(clause.attribute):
# and not is_datetime_string(clause.attribute):
if clause.attribute != "" and clause.attribute != "Record":
if clause.data_type == "":
clause.data_type = ldf.data_type_lookup[clause.attribute]
if clause.data_type == "id":
clause.data_type = "nominal"
if clause.data_model == "":
clause.data_model = ldf.data_model_lookup[clause.attribute]
if clause.value != "":
if (
vis.title == ""
): # If user provided title for Vis, then don't override.
# If user provided title for Vis, then don't override.
if vis.title == "":
if isinstance(clause.value, np.datetime64):
chart_title = date_utils.date_formatter(clause.value, ldf)
else:
Expand Down Expand Up @@ -303,10 +294,9 @@ def line_or_bar(ldf, dimension: Clause, measure: Clause):
dimension = d2
color_attr = d1
else:
# if same attribute then remove_column_from_spec will remove both dims, we only want to remove one
if d1.attribute == d2.attribute:
vis._inferred_intent.pop(
0
) # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one
vis._inferred_intent.pop(0)
else:
vis.remove_column_from_spec(d2.attribute)
dimension = d1
Expand Down Expand Up @@ -380,12 +370,10 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]):
ValueError
Ensures no more than one attribute is placed in the same channel.
"""
result_dict = (
{}
) # result of enforcing specified channel will be stored in result_dict
specified_dict = (
{}
) # specified_dict={"x":[],"y":[list of Dobj with y specified as channel]}
# result of enforcing specified channel will be stored in result_dict
result_dict = {}
# specified_dict={"x":[],"y":[list of Dobj with y specified as channel]}
specified_dict = {}
# create a dictionary of specified channels in the given dobj
for val in auto_channel.keys():
specified_dict[val] = vis.get_attr_by_channel(val)
Expand All @@ -395,9 +383,10 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]):
if len(sAttr) == 1: # if specified in dobj
# remove the specified channel from auto_channel (matching by value, since channel key may not be same)
for i in list(auto_channel.keys()):
# need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name)
if (auto_channel[i].attribute == sAttr[0].attribute) and (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rewrite this to be inline?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't able to make it inline, but I split it up so that the two conditions are on separate lines

auto_channel[i].channel == sVal
): # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name)
):
auto_channel.pop(i)
break
sAttr[0].channel = sVal
Expand Down
10 changes: 4 additions & 6 deletions lux/processor/Parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]:
if isinstance(clause, list):
valid_values = []
for v in clause:
if (
type(v) is str
): # and v in list(ldf.columns): #TODO: Move validation check to Validator
# and v in list(ldf.columns): #TODO: Move validation check to Validator
if type(v) is str:
valid_values.append(v)
temp_spec = Clause(attribute=valid_values)
new_context.append(temp_spec)
Expand Down Expand Up @@ -95,9 +94,8 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]:
if clause.description:
# TODO: Move validation check to Validator
# if ((clause.description in list(ldf.columns)) or clause.description == "?"):# if clause.description in the list of attributes
if any(
ext in [">", "<", "=", "!="] for ext in clause.description
): # clause.description contain ">","<". or "="
# clause.description contain ">","<". or "="
if any(ext in [">", "<", "=", "!="] for ext in clause.description):
# then parse it and assign to clause.attribute, clause.filter_op, clause.values
clause.filter_op = re.findall(
r"/.*/|>|=|<|>=|<=|!=", clause.description
Expand Down
5 changes: 2 additions & 3 deletions lux/processor/Validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,8 @@ def validate_clause(clause):
else:
vals = [clause.value]
for val in vals:
if (
val not in series.values
): # (not series.str.contains(val).any()):
# (not series.str.contains(val).any()):
if val not in series.values:
warnings.warn(
f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame."
)
Expand Down
18 changes: 7 additions & 11 deletions lux/utils/date_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ def date_formatter(time_stamp, ldf):
"""
datetime = pd.to_datetime(time_stamp)
if ldf.data_type["temporal"]:
date_column = ldf[
ldf.data_type["temporal"][0]
] # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future
# assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future
date_column = ldf[ldf.data_type["temporal"][0]]

granularity = compute_date_granularity(date_column)
date_str = ""
if granularity == "year":
Expand Down Expand Up @@ -80,16 +80,12 @@ def compute_date_granularity(date_column: pd.core.series.Series):
field: str
A str specifying the granularity of dates for the inspected temporal column
"""
date_fields = [
"day",
"month",
"year",
] # supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html)
# supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html)
date_fields = ["day", "month", "year"]
date_index = pd.DatetimeIndex(date_column)
for field in date_fields:
if (
hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1
): # can be changed to sum(getattr(date_index, field)) != 0
# can be changed to sum(getattr(date_index, field)) != 0
if hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1:
return field
return "year" # if none, then return year by default

Expand Down
5 changes: 2 additions & 3 deletions lux/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,8 @@ def check_if_id_like(df, attribute):
import re

# Strong signals
high_cardinality = (
df.cardinality[attribute] > 500
) # so that aggregated reset_index fields don't get misclassified
# so that aggregated reset_index fields don't get misclassified
high_cardinality = df.cardinality[attribute] > 500
attribute_contain_id = re.search(r"id", str(attribute)) is not None
almost_all_vals_unique = df.cardinality[attribute] >= 0.98 * len(df)
is_string = pd.api.types.is_string_dtype(df[attribute])
Expand Down
6 changes: 3 additions & 3 deletions lux/vis/Vis.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,9 +322,9 @@ def refresh_source(self, ldf): # -> Vis:
from lux.processor.Parser import Parser
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make line 55-59 of Vis.py a single line?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Simmiliarly, line 80-82 in Vis.py

from lux.processor.Validator import Validator
from lux.processor.Compiler import Compiler
from lux.executor.PandasExecutor import (
PandasExecutor,
) # TODO: temporary (generalize to executor)
from lux.executor.PandasExecutor import PandasExecutor

# TODO: temporary (generalize to executor)

self.check_not_vislist_intent()

Expand Down
12 changes: 4 additions & 8 deletions lux/vis/VisList.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,8 @@ def __repr__(self):
y_channel = ""
largest_mark = 0
largest_filter = 0
for (
vis
) in self._collection: # finds longest x attribute among all visualizations
# finds longest x attribute among all visualizations
for vis in self._collection:
filter_intents = None
for clause in vis._inferred_intent:
if clause.value != "":
Expand Down Expand Up @@ -163,11 +162,8 @@ def __repr__(self):
vis_repr = []
largest_x_length = len(x_channel)
largest_y_length = len(y_channel)
for (
vis
) in (
self._collection
): # pads the shorter visualizations with spaces before the y attribute
# pads the shorter visualizations with spaces before the y attribute
for vis in self._collection:
filter_intents = None
x_channel = ""
y_channel = ""
Expand Down
10 changes: 4 additions & 6 deletions lux/vislib/altair/BarChart.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,12 @@ def add_text(self):
self.chart = self.chart + self.text
self.code += self._topkcode

def encode_color(
self,
): # override encode_color in AltairChart to enforce add_text occurs afterwards
# override encode_color in AltairChart to enforce add_text occurs afterwards
def encode_color(self):
AltairChart.encode_color(self)
self.add_text()
self.chart = self.chart.configure_mark(
tooltip=alt.TooltipContent("encoding")
) # Setting tooltip as non-null
# Setting tooltip as non-null
self.chart = self.chart.configure_mark(tooltip=alt.TooltipContent("encoding"))
self.code += (
f"""chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding'))"""
)
5 changes: 2 additions & 3 deletions lux/vislib/altair/Heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,8 @@ def initialize_chart(self):
)
)
chart = chart.configure_scale(minOpacity=0.1, maxOpacity=1)
chart = chart.configure_mark(
tooltip=alt.TooltipContent("encoding")
) # Setting tooltip as non-null
# Setting tooltip as non-null
chart = chart.configure_mark(tooltip=alt.TooltipContent("encoding"))
chart = chart.interactive() # Enable Zooming and Panning

####################################
Expand Down
Loading