lux-org · dorisjlee · Nov 16, 2020 · Oct 27, 2020 · Oct 27, 2020 · Oct 29, 2020
diff --git a/lux/action/column_group.py b/lux/action/column_group.py
@@ -31,9 +31,9 @@ def column_group(ldf):
     ldf_flat = ldf
     if isinstance(ldf.columns, pd.DatetimeIndex):
         ldf_flat.columns = ldf_flat.columns.format()
-    ldf_flat = (
-        ldf_flat.reset_index()
-    )  # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
+
+    # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
+    ldf_flat = ldf_flat.reset_index()
     if ldf.index.nlevels == 1:
         if ldf.index.name:
             index_column_name = ldf.index.name

diff --git a/lux/action/correlation.py b/lux/action/correlation.py
@@ -53,9 +53,8 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
         "description": "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes.",
     }
     ignore_rec_flag = False
-    if (
-        len(ldf) < 5
-    ):  # Doesn't make sense to compute correlation if less than 4 data values
+    # Doesn't make sense to compute correlation if less than 4 data values
+    if len(ldf) < 5:
         ignore_rec_flag = True
     # Then use the data populated in the vis list to compute score
     for vis in vlist:

diff --git a/lux/action/enhance.py b/lux/action/enhance.py
@@ -53,9 +53,8 @@ def enhance(ldf):
             "action": "Enhance",
             "description": f"Further breaking down current {intended_attrs} intent by additional attribute.",
         }
-    elif (
-        len(attr_specs) > 2
-    ):  # if there are too many column attributes, return don't generate Enhance recommendations
+    # if there are too many column attributes, return don't generate Enhance recommendations
+    elif len(attr_specs) > 2:
         recommendation = {"action": "Enhance"}
         recommendation["collection"] = []
         return recommendation

diff --git a/lux/action/filter.py b/lux/action/filter.py
@@ -86,8 +86,8 @@ def get_complementary_ops(fltr_op):
             new_spec.append(new_filter)
             temp_vis = Vis(new_spec, score=1)
             output.append(temp_vis)
-
-    else:  # if no existing filters, create filters using unique values from all categorical variables in the dataset
+    # if no existing filters, create filters using unique values from all categorical variables in the dataset
+    else:
         intended_attrs = ", ".join(
             [
                 clause.attribute

diff --git a/lux/action/univariate.py b/lux/action/univariate.py
@@ -58,9 +58,8 @@ def univariate(ldf, *args):
             "action": "Distribution",
             "description": "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes.",
         }
-        if (
-            len(ldf) < 5
-        ):  # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
+        # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
+        if len(ldf) < 5:
             ignore_rec_flag = True
     elif data_type_constraint == "nominal":
         intent = [lux.Clause("?", data_type="nominal")]
@@ -76,9 +75,8 @@ def univariate(ldf, *args):
             "action": "Temporal",
             "description": "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.",
         }
-        if (
-            len(ldf) < 3
-        ):  # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
+        # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
+        if len(ldf) < 3:
             ignore_rec_flag = True
     if ignore_rec_flag:
         recommendation["collection"] = []

diff --git a/lux/core/frame.py b/lux/core/frame.py
@@ -482,9 +482,8 @@ def maintain_recs(self):
             )
         rec_df._prev = None  # reset _prev
 
-        if (
-            not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh
-        ):  # Check that recs has not yet been computed
+        # Check that recs has not yet been computed
+        if not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh:
             rec_infolist = []
             from lux.action.custom import custom
             from lux.action.custom import custom_actions
@@ -550,9 +549,8 @@ def maintain_recs(self):
                     rec_df.recommendation[action_type] = vlist
             rec_df._rec_info = rec_infolist
             self._widget = rec_df.render_widget()
-        elif (
-            show_prev
-        ):  # re-render widget for the current dataframe if previous rec is not recomputed
+        # re-render widget for the current dataframe if previous rec is not recomputed
+        elif show_prev:
             self._widget = rec_df.render_widget()
         self._recs_fresh = True
 

diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py
@@ -80,9 +80,8 @@ def execute(vislist: VisList, ldf: LuxDataFrame):
         """
         PandasExecutor.execute_sampling(ldf)
         for vis in vislist:
-            vis._vis_data = (
-                ldf._sampled
-            )  # The vis data starts off being original or sampled dataframe
+            # The vis data starts off being original or sampled dataframe
+            vis._vis_data = ldf._sampled
             filter_executed = PandasExecutor.execute_filter(vis)
             # Select relevant data based on attribute information
             attributes = set([])
@@ -220,9 +219,10 @@ def execute_aggregate(vis: Vis, isFiltered=True):
                         ) == N_unique_vals * len(
                             color_attr_vals
                         ), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute, color_attr.attribute}`."
-                        vis._vis_data = vis.data.iloc[
-                            :, :3
-                        ]  # Keep only the three relevant columns not the *_right columns resulting from merge
+
+                        # Keep only the three relevant columns not the *_right columns resulting from merge
+                        vis._vis_data = vis.data.iloc[:, :3]
+
                     else:
                         df = pd.DataFrame({columns[0]: attr_unique_vals})
 

diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py
@@ -37,16 +37,13 @@ def __repr__(self):
     @staticmethod
     def compile_vis(ldf: LuxDataFrame, vis: Vis) -> VisList:
         if vis:
-            vis_collection = Compiler.populate_data_type_model(
-                ldf, [vis]
-            )  # autofill data type/model information
-            vis_collection = Compiler.remove_all_invalid(
-                vis_collection
-            )  # remove invalid visualizations from collection
+            # autofill data type/model information
+            vis_collection = Compiler.populate_data_type_model(ldf, [vis])
+            # remove invalid visualizations from collection
+            vis_collection = Compiler.remove_all_invalid(vis_collection)
             for vis in vis_collection:
-                Compiler.determine_encoding(
-                    ldf, vis
-                )  # autofill viz related information
+                # autofill viz related information
+                Compiler.determine_encoding(ldf, vis)
             ldf._compiled = True
             return vis_collection
 
@@ -72,17 +69,14 @@ def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList
         """
         if _inferred_intent:
             vis_collection = Compiler.enumerate_collection(_inferred_intent, ldf)
-            vis_collection = Compiler.populate_data_type_model(
-                ldf, vis_collection
-            )  # autofill data type/model information
+            # autofill data type/model information
+            vis_collection = Compiler.populate_data_type_model(ldf, vis_collection)
+            # remove invalid visualizations from collection
             if len(vis_collection) >= 1:
-                vis_collection = Compiler.remove_all_invalid(
-                    vis_collection
-                )  # remove invalid visualizations from collection
+                vis_collection = Compiler.remove_all_invalid(vis_collection)
             for vis in vis_collection:
-                Compiler.determine_encoding(
-                    ldf, vis
-                )  # autofill viz related information
+                # autofill viz related information
+                Compiler.determine_encoding(ldf, vis)
             ldf._compiled = True
             return vis_collection
 
@@ -121,9 +115,8 @@ def combine(col_attrs, accum):
             for i in range(n):
                 column_list = copy.deepcopy(accum + [col_attrs[0][i]])
                 if last:
-                    if (
-                        len(filters) > 0
-                    ):  # if we have filters, generate combinations for each row.
+                    # if we have filters, generate combinations for each row.
+                    if len(filters) > 0:
                         for row in filters:
                             _inferred_intent = copy.deepcopy(column_list + [row])
                             vis = Vis(_inferred_intent)
@@ -164,19 +157,17 @@ def populate_data_type_model(ldf, vis_collection) -> VisList:
                 if clause.description == "?":
                     clause.description = ""
                 # TODO: Note that "and not is_datetime_string(clause.attribute))" is a temporary hack and breaks the `test_row_column_group` example
-                if (
-                    clause.attribute != "" and clause.attribute != "Record"
-                ):  # and not is_datetime_string(clause.attribute):
+                # and not is_datetime_string(clause.attribute):
+                if clause.attribute != "" and clause.attribute != "Record":
                     if clause.data_type == "":
                         clause.data_type = ldf.data_type_lookup[clause.attribute]
                     if clause.data_type == "id":
                         clause.data_type = "nominal"
                     if clause.data_model == "":
                         clause.data_model = ldf.data_model_lookup[clause.attribute]
                 if clause.value != "":
-                    if (
-                        vis.title == ""
-                    ):  # If user provided title for Vis, then don't override.
+                    # If user provided title for Vis, then don't override.
+                    if vis.title == "":
                         if isinstance(clause.value, np.datetime64):
                             chart_title = date_utils.date_formatter(clause.value, ldf)
                         else:
@@ -303,10 +294,9 @@ def line_or_bar(ldf, dimension: Clause, measure: Clause):
                 dimension = d2
                 color_attr = d1
             else:
+                # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one
                 if d1.attribute == d2.attribute:
-                    vis._inferred_intent.pop(
-                        0
-                    )  # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one
+                    vis._inferred_intent.pop(0)
                 else:
                     vis.remove_column_from_spec(d2.attribute)
                 dimension = d1
@@ -380,12 +370,10 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]):
         ValueError
                 Ensures no more than one attribute is placed in the same channel.
         """
-        result_dict = (
-            {}
-        )  # result of enforcing specified channel will be stored in result_dict
-        specified_dict = (
-            {}
-        )  # specified_dict={"x":[],"y":[list of Dobj with y specified as channel]}
+        # result of enforcing specified channel will be stored in result_dict
+        result_dict = {}
+        # specified_dict={"x":[],"y":[list of Dobj with y specified as channel]}
+        specified_dict = {}
         # create a dictionary of specified channels in the given dobj
         for val in auto_channel.keys():
             specified_dict[val] = vis.get_attr_by_channel(val)
@@ -395,9 +383,10 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]):
             if len(sAttr) == 1:  # if specified in dobj
                 # remove the specified channel from auto_channel (matching by value, since channel key may not be same)
                 for i in list(auto_channel.keys()):
+                    # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name)
                     if (auto_channel[i].attribute == sAttr[0].attribute) and (
                         auto_channel[i].channel == sVal
-                    ):  # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name)
+                    ):
                         auto_channel.pop(i)
                         break
                 sAttr[0].channel = sVal

diff --git a/lux/processor/Parser.py b/lux/processor/Parser.py
@@ -54,9 +54,8 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]:
             if isinstance(clause, list):
                 valid_values = []
                 for v in clause:
-                    if (
-                        type(v) is str
-                    ):  # and v in list(ldf.columns): #TODO: Move validation check to Validator
+                    # and v in list(ldf.columns): #TODO: Move validation check to Validator
+                    if type(v) is str:
                         valid_values.append(v)
                 temp_spec = Clause(attribute=valid_values)
                 new_context.append(temp_spec)
@@ -95,9 +94,8 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]:
             if clause.description:
                 # TODO: Move validation check to Validator
                 # if ((clause.description in list(ldf.columns)) or clause.description == "?"):# if clause.description in the list of attributes
-                if any(
-                    ext in [">", "<", "=", "!="] for ext in clause.description
-                ):  # clause.description contain ">","<". or "="
+                # clause.description contain ">","<". or "="
+                if any(ext in [">", "<", "=", "!="] for ext in clause.description):
                     # then parse it and assign to clause.attribute, clause.filter_op, clause.values
                     clause.filter_op = re.findall(
                         r"/.*/|>|=|<|>=|<=|!=", clause.description

diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py
@@ -85,9 +85,8 @@ def validate_clause(clause):
                                 else:
                                     vals = [clause.value]
                                 for val in vals:
-                                    if (
-                                        val not in series.values
-                                    ):  # (not series.str.contains(val).any()):
+                                    # (not series.str.contains(val).any()):
+                                    if val not in series.values:
                                         warnings.warn(
                                             f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame."
                                         )

diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py
@@ -40,9 +40,9 @@ def date_formatter(time_stamp, ldf):
     """
     datetime = pd.to_datetime(time_stamp)
     if ldf.data_type["temporal"]:
-        date_column = ldf[
-            ldf.data_type["temporal"][0]
-        ]  # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future
+        # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future
+        date_column = ldf[ldf.data_type["temporal"][0]]
+
     granularity = compute_date_granularity(date_column)
     date_str = ""
     if granularity == "year":
@@ -80,16 +80,12 @@ def compute_date_granularity(date_column: pd.core.series.Series):
     field: str
             A str specifying the granularity of dates for the inspected temporal column
     """
-    date_fields = [
-        "day",
-        "month",
-        "year",
-    ]  # supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html)
+    # supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html)
+    date_fields = ["day", "month", "year"]
     date_index = pd.DatetimeIndex(date_column)
     for field in date_fields:
-        if (
-            hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1
-        ):  # can be changed to sum(getattr(date_index, field)) != 0
+        # can be changed to sum(getattr(date_index, field)) != 0
+        if hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1:
             return field
     return "year"  # if none, then return year by default
 

diff --git a/lux/utils/utils.py b/lux/utils/utils.py
@@ -69,9 +69,8 @@ def check_if_id_like(df, attribute):
     import re
 
     # Strong signals
-    high_cardinality = (
-        df.cardinality[attribute] > 500
-    )  # so that aggregated reset_index fields don't get misclassified
+    # so that aggregated reset_index fields don't get misclassified
+    high_cardinality = df.cardinality[attribute] > 500
     attribute_contain_id = re.search(r"id", str(attribute)) is not None
     almost_all_vals_unique = df.cardinality[attribute] >= 0.98 * len(df)
     is_string = pd.api.types.is_string_dtype(df[attribute])

diff --git a/lux/vis/Vis.py b/lux/vis/Vis.py
@@ -322,9 +322,9 @@ def refresh_source(self, ldf):  # -> Vis:
             from lux.processor.Parser import Parser
             from lux.processor.Validator import Validator
             from lux.processor.Compiler import Compiler
-            from lux.executor.PandasExecutor import (
-                PandasExecutor,
-            )  # TODO: temporary (generalize to executor)
+            from lux.executor.PandasExecutor import PandasExecutor
+
+            # TODO: temporary (generalize to executor)
 
             self.check_not_vislist_intent()
 

diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py
@@ -129,9 +129,8 @@ def __repr__(self):
         y_channel = ""
         largest_mark = 0
         largest_filter = 0
-        for (
-            vis
-        ) in self._collection:  # finds longest x attribute among all visualizations
+        # finds longest x attribute among all visualizations
+        for vis in self._collection:
             filter_intents = None
             for clause in vis._inferred_intent:
                 if clause.value != "":
@@ -163,11 +162,8 @@ def __repr__(self):
         vis_repr = []
         largest_x_length = len(x_channel)
         largest_y_length = len(y_channel)
-        for (
-            vis
-        ) in (
-            self._collection
-        ):  # pads the shorter visualizations with spaces before the y attribute
+        # pads the shorter visualizations with spaces before the y attribute
+        for vis in self._collection:
             filter_intents = None
             x_channel = ""
             y_channel = ""

diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py
@@ -120,14 +120,12 @@ def add_text(self):
             self.chart = self.chart + self.text
             self.code += self._topkcode
 
-    def encode_color(
-        self,
-    ):  # override encode_color in AltairChart to enforce add_text occurs afterwards
+    # override encode_color in AltairChart to enforce add_text occurs afterwards
+    def encode_color(self):
         AltairChart.encode_color(self)
         self.add_text()
-        self.chart = self.chart.configure_mark(
-            tooltip=alt.TooltipContent("encoding")
-        )  # Setting tooltip as non-null
+        # Setting tooltip as non-null
+        self.chart = self.chart.configure_mark(tooltip=alt.TooltipContent("encoding"))
         self.code += (
             f"""chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding'))"""
         )
diff --git a/lux/vislib/altair/Heatmap.py b/lux/vislib/altair/Heatmap.py
@@ -66,9 +66,8 @@ def initialize_chart(self):
             )
         )
         chart = chart.configure_scale(minOpacity=0.1, maxOpacity=1)
-        chart = chart.configure_mark(
-            tooltip=alt.TooltipContent("encoding")
-        )  # Setting tooltip as non-null
+        # Setting tooltip as non-null
+        chart = chart.configure_mark(tooltip=alt.TooltipContent("encoding"))
         chart = chart.interactive()  # Enable Zooming and Panning
 
         ####################################