code cleanup

* making LuxDataFrame pickle-able * various simplification and rewrite to Compiler and Vis * changing `render_VSpec` to `to_code` * bugfix in filter (list comprehension lazily evaluated)
lux-org · Nov 25, 2020 · 8991206 · 8991206
1 parent 873629c
commit 8991206
Show file tree

Hide file tree

Showing 9 changed files with 59 additions and 50 deletions.
diff --git a/doc/source/reference/gen/lux.vis.Vis.Vis.rst b/doc/source/reference/gen/lux.vis.Vis.Vis.rst
@@ -22,7 +22,7 @@ lux.vis.Vis.Vis
       ~Vis.refresh_source
       ~Vis.remove_column_from_spec
       ~Vis.remove_filter_from_spec
-      ~Vis.render_VSpec
+      ~Vis.to_code
       ~Vis.set_intent
       ~Vis.to_Altair
       ~Vis.to_VegaLite

diff --git a/lux/action/filter.py b/lux/action/filter.py
@@ -38,8 +38,8 @@ def filter(ldf):
     filter_values = []
     output = []
     # if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical variable
-    column_spec = utils.get_attrs_specs(ldf.current_vis[0]._inferred_intent)
-    column_spec_attr = map(lambda x: x.attribute, column_spec)
+    column_spec = utils.get_attrs_specs(ldf.current_vis[0].intent)
+    column_spec_attr = list(map(lambda x: x.attribute, column_spec))
     if len(filters) == 1:
         # get unique values for all categorical values specified and creates corresponding filters
         fltr = filters[0]
@@ -106,9 +106,9 @@ def get_complementary_ops(fltr_op):
                 categorical_vars.append(col)
         for cat in categorical_vars:
             unique_values = ldf.unique_values[cat]
-            for i in range(0, len(unique_values)):
+            for val in unique_values:
                 new_spec = column_spec.copy()
-                new_filter = lux.Clause(attribute=cat, filter_op="=", value=unique_values[i])
+                new_filter = lux.Clause(attribute=cat, filter_op="=", value=val)
                 new_spec.append(new_filter)
                 temp_vis = Vis(new_spec)
                 output.append(temp_vis)

diff --git a/lux/core/frame.py b/lux/core/frame.py
@@ -40,14 +40,18 @@ class LuxDataFrame(pd.DataFrame):
         "unique_values",
         "cardinality",
         "_rec_info",
-        "_pandas_only",
         "_min_max",
         "_current_vis",
         "_widget",
         "_recommendation",
         "_prev",
         "_history",
         "_saved_export",
+        "_sampled",
+        "_toggle_pandas_display",
+        "_message",
+        "_pandas_only",
+        "pre_aggregated",
     ]
 
     def __init__(self, *args, **kw):
@@ -67,7 +71,6 @@ def __init__(self, *args, **kw):
         self.table_name = ""
 
         self._sampled = None
-        self._default_pandas_display = True
         self._toggle_pandas_display = True
         self._message = Message()
         self._pandas_only = False
@@ -771,7 +774,7 @@ def current_vis_to_JSON(vlist, input_current_vis=""):
         current_vis_spec = {}
         numVC = len(vlist)  # number of visualizations in the vis list
         if numVC == 1:
-            current_vis_spec = vlist[0].render_VSpec()
+            current_vis_spec = vlist[0].to_code(prettyOutput=False)
         elif numVC > 1:
             pass
         return current_vis_spec
@@ -786,10 +789,10 @@ def rec_to_JSON(recs):
             if len(rec["collection"]) > 0:
                 rec["vspec"] = []
                 for vis in rec["collection"]:
-                    chart = vis.render_VSpec()
+                    chart = vis.to_code(prettyOutput=False)
                     rec["vspec"].append(chart)
                 rec_lst.append(rec)
-                # delete DataObjectCollection since not JSON serializable
+                # delete since not JSON serializable
                 del rec_lst[idx]["collection"]
         return rec_lst
 

diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py
@@ -37,17 +37,29 @@ def __repr__(self):
         return f"<Compiler>"
 
     @staticmethod
-    def compile_vis(ldf: LuxDataFrame, vis: Vis) -> VisList:
+    def compile_vis(ldf: LuxDataFrame, vis: Vis) -> Vis:
+        """
+        Root method for compiling visualizations
+
+        Parameters
+        ----------
+        ldf : LuxDataFrame
+        vis : Vis
+
+        Returns
+        -------
+        Vis
+            Compiled Vis object
+        """
         if vis:
             # autofill data type/model information
-            vis_collection = Compiler.populate_data_type_model(ldf, [vis])
+            Compiler.populate_data_type_model(ldf, [vis])
             # remove invalid visualizations from collection
-            vis_collection = Compiler.remove_all_invalid(vis_collection)
-            for vis in vis_collection:
-                # autofill viz related information
-                Compiler.determine_encoding(ldf, vis)
+            Compiler.remove_all_invalid([vis])
+            # autofill viz related information
+            Compiler.determine_encoding(ldf, vis)
             ldf._compiled = True
-            return vis_collection
+            return vis
 
     @staticmethod
     def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList:
@@ -72,7 +84,7 @@ def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList
         if _inferred_intent:
             vis_collection = Compiler.enumerate_collection(_inferred_intent, ldf)
             # autofill data type/model information
-            vis_collection = Compiler.populate_data_type_model(ldf, vis_collection)
+            Compiler.populate_data_type_model(ldf, vis_collection)
             # remove invalid visualizations from collection
             if len(vis_collection) >= 1:
                 vis_collection = Compiler.remove_all_invalid(vis_collection)
@@ -131,7 +143,7 @@ def combine(col_attrs, accum):
         return VisList(collection)
 
     @staticmethod
-    def populate_data_type_model(ldf, vis_collection) -> VisList:
+    def populate_data_type_model(ldf, vlist):
         """
         Given a underspecified Clause, populate the data_type and data_model information accordingly
 
@@ -142,16 +154,10 @@ def populate_data_type_model(ldf, vis_collection) -> VisList:
 
         vis_collection : list[lux.vis.Vis]
                 List of lux.Vis objects that will have their underspecified Clause details filled out.
-        Returns
-        -------
-        vlist: VisList
-                vis list with compiled lux.Vis objects.
         """
         # TODO: copy might not be neccesary
         from lux.utils.date_utils import is_datetime_string
-        import copy
 
-        vlist = copy.deepcopy(vis_collection)  # Preserve the original dobj
         for vis in vlist:
             for clause in vis._inferred_intent:
                 if clause.description == "?":
@@ -173,7 +179,6 @@ def populate_data_type_model(ldf, vis_collection) -> VisList:
                         else:
                             chart_title = clause.value
                         vis.title = f"{clause.attribute} {clause.filter_op} {chart_title}"
-        return vlist
 
     @staticmethod
     def remove_all_invalid(vis_collection: VisList) -> VisList:

diff --git a/lux/vis/Clause.py b/lux/vis/Clause.py
@@ -124,7 +124,7 @@ def to_string(self):
     def __repr__(self):
         attributes = []
         if self.description != "":
-            attributes.append("         description: " + self.description)
+            attributes.append(f"         description: {self.description}")
         if self.channel != "":
             attributes.append("         channel: " + self.channel)
         if len(self.attribute) != 0:

diff --git a/lux/vis/Vis.py b/lux/vis/Vis.py
@@ -257,9 +257,24 @@ def to_VegaLite(self, prettyOutput=True) -> Union[dict, str]:
         else:
             return self._code
 
-    def render_VSpec(self, renderer="altair"):
-        if renderer == "altair":
-            return self.to_VegaLite(prettyOutput=False)
+    def to_code(self, language="vegalite", **kwargs):
+        """
+        Export Vis object to code specification
+
+        Parameters
+        ----------
+        language : str, optional
+            choice of target language to produce the visualization code in, by default "vegalite"
+
+        Returns
+        -------
+        spec:
+            visualization specification corresponding to the Vis object
+        """
+        if language == "vegalite":
+            return self.to_VegaLite(**kwargs)
+        elif language == "altair":
+            return self.to_Altair(**kwargs)
 
     def refresh_source(self, ldf):  # -> Vis:
         """
@@ -288,26 +303,15 @@ def refresh_source(self, ldf):  # -> Vis:
             from lux.processor.Parser import Parser
             from lux.processor.Validator import Validator
             from lux.processor.Compiler import Compiler
-            from lux.executor.PandasExecutor import PandasExecutor
-
-            # TODO: temporary (generalize to executor)
 
             self.check_not_vislist_intent()
 
             ldf.maintain_metadata()
             self._source = ldf
             self._inferred_intent = Parser.parse(self._intent)
             Validator.validate_intent(self._inferred_intent, ldf)
-            vlist = Compiler.compile_vis(ldf, self)
-            ldf.executor.execute(vlist, ldf)
-            # Copying properties over since we can not redefine `self` within class function
-            if len(vlist) > 0:
-                vis = vlist[0]
-                self.title = vis.title
-                self._mark = vis._mark
-                self._inferred_intent = vis._inferred_intent
-                self._vis_data = vis.data
-                self._min_max = vis._min_max
+            Compiler.compile_vis(ldf, self)
+            ldf.executor.execute([self], ldf)
 
     def check_not_vislist_intent(self):
         import sys

diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py
@@ -303,10 +303,8 @@ def refresh_source(self, ldf):
                     for vis in self._collection:
                         vis._inferred_intent = Parser.parse(vis._intent)
                         Validator.validate_intent(vis._inferred_intent, ldf)
-                        vislist = Compiler.compile_vis(ldf, vis)
-                        if len(vislist) > 0:
-                            vis = vislist[0]
-                            compiled_collection.append(vis)
+                        Compiler.compile_vis(ldf, vis)
+                        compiled_collection.append(vis)
                     self._collection = compiled_collection
                 else:
                     self._inferred_intent = Parser.parse(self._intent)

diff --git a/lux/vislib/altair/AltairRenderer.py b/lux/vislib/altair/AltairRenderer.py
@@ -35,7 +35,7 @@ def __repr__(self):
 
     def create_vis(self, vis, standalone=True):
         """
-        Input DataObject and return a visualization specification
+        Input Vis object and return a visualization specification
 
         Parameters
         ----------

diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py
@@ -432,8 +432,7 @@ def check_metadata_equal(df1, df2):
                     elif key in y_info:
                         assert x_info[key] == y_info[key]
 
-        elif attr != "_widget":
-            print(attr)
+        elif attr != "_widget" and attr != "_sampled" and attr != "_message":
             assert getattr(df1, attr) == getattr(df2, attr)