Merge pull request #237 from thyneb19/Database-Executor

Merged in changes from master branch
lux-org · Jan 21, 2021 · 56ff766 · 56ff766
2 parents 395dfd6 + 02906f9
commit 56ff766
Show file tree

Hide file tree

Showing 18 changed files with 358 additions and 104 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -14,4 +14,4 @@ script:
   - python -m pytest tests/*.py
   - pytest --cov-report term --cov=lux tests/
 after_success:
-  - bash <(curl -s https://codecov.io/bash)
+  - bash <(curl -s https://codecov.io/bash)
diff --git a/doc/source/reference/config.rst b/doc/source/reference/config.rst
@@ -2,7 +2,28 @@
 Configuration Settings 
 ***********************
 
-In Lux, users can customize various global settings to configure the behavior of Lux through :py:class:`lux.config.Config`. This page documents some of the configurations that you can apply in Lux.
+In Lux, users can customize various global settings to configure the behavior of Lux through :py:class:`lux.config.Config`. These configurations are applied across all dataframes in the session. This page documents some of the configurations that you can apply in Lux.
+
+.. note::
+
+    Lux caches past generated recommendations, so if you have already printed the dataframe in the past, the recommendations would not be regenerated with the new config properties. In order for the config properties to apply, you would need to explicitly expire the recommendations as such:
+
+        .. code-block:: python
+
+            df = pd.read_csv("..")
+            df # recommendations already generated here
+
+            df.expire_recs()
+            lux.config.SOME_SETTING = "..."
+            df # recommendation will be generated again here
+
+    Alternatively, you can place the config settings before you first print out the dataframe for the first time: 
+
+        .. code-block:: python
+
+            df = pd.read_csv("..")
+            lux.config.SOME_SETTING = "..."
+            df # recommendations generated for the first time with config
 
 
 Change the default display of Lux
@@ -108,3 +129,35 @@ The above results in the following changes:
 
 See `this page <https://lux-api.readthedocs.io/en/latest/source/guide/style.html>`__ for more details.
 
+Modify Sorting and Ranking in Recommendations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In Lux, we select a small subset of visualizations to display in each action tab to avoid displaying too many charts at once. 
+Certain recommendation categories ranks and selects the top K most interesting visualizations to display.
+You can modify the sorting order and selection cutoff via :code:`lux.config`.
+By default, the recommendations are sorted in a :code:`"descending"` order based on their interestingness score, you can reverse the ordering by setting the sort order as:
+
+.. code-block:: python 
+
+    lux.config.sort = "ascending"
+
+To turn off the sorting of visualizations based on its score completely and ensure that the visualizations show up in the same order across all dataframes, you can set the sorting as "none":
+
+.. code-block:: python 
+
+    lux.config.sort = "none"
+
+For recommendation actions that generate a lot of visualizations, we select the cutoff criteria as the top 15 visualizations. If you would like to see only see the top 6 visualizations, you can set:
+
+.. code-block:: python 
+
+    lux.config.topk = 6
+
+If you would like to turn off the selection criteria completely and display everything, you can turn off the top K selection by:
+
+.. code-block:: python 
+
+    lux.config.topk = False
+
+Beware that this may generate large numbers of visualizations (e.g., for 10 quantitative variables, this will generate 45 scatterplots in the Correlation action!)
+
diff --git a/doc/source/reference/gen/lux._config.config.Config.rst b/doc/source/reference/gen/lux._config.config.Config.rst
@@ -14,6 +14,8 @@ lux.\_config.config.Config
    .. autosummary::
 
       ~Config.__init__
+      ~Config.register_action
+      ~Config.remove_action
       ~Config.set_SQL_connection
       ~Config.set_executor_type
 
@@ -30,5 +32,7 @@ lux.\_config.config.Config
       ~Config.sampling
       ~Config.sampling_cap
       ~Config.sampling_start
+      ~Config.sort
+      ~Config.topk
 
 
diff --git a/doc/source/reference/gen/lux.core.series.LuxSeries.rst b/doc/source/reference/gen/lux.core.series.LuxSeries.rst
@@ -53,7 +53,6 @@ lux.core.series.LuxSeries
       ~LuxSeries.cumsum
       ~LuxSeries.describe
       ~LuxSeries.diff
-      ~LuxSeries.display_pandas
       ~LuxSeries.div
       ~LuxSeries.divide
       ~LuxSeries.divmod

diff --git a/lux/_config/config.py b/lux/_config/config.py
@@ -32,6 +32,8 @@ def __init__(self):
         self._heatmap_flag = True
         self._topk = 15
         self._sort = "descending"
+        self._pandas_fallback = True
+        self._interestingness_fallback = True
 
     @property
     def topk(self):
@@ -80,6 +82,47 @@ def sort(self, flag: Union[str]):
                 stacklevel=2,
             )
 
+    @property
+    def pandas_fallback(self):
+        return self._pandas_fallback
+
+    @pandas_fallback.setter
+    def pandas_fallback(self, fallback: bool) -> None:
+        """
+        Parameters
+        ----------
+        fallback : bool
+            If an error occurs, whether or not to raise an exception or fallback to default Pandas.
+        """
+        if type(fallback) == bool:
+            self._pandas_fallback = fallback
+        else:
+            warnings.warn(
+                "The flag for Pandas fallback must be a boolean.",
+                stacklevel=2,
+            )
+
+    @property
+    def interestingness_fallback(self):
+        return self._interestingness_fallback
+
+    @interestingness_fallback.setter
+    def interestingness_fallback(self, fallback: bool) -> None:
+        """
+        Parameters
+        ----------
+        fallback : bool
+            If an error occurs while calculating interestingness, whether or not
+            to raise an exception or fallback to default Pandas.
+        """
+        if type(fallback) == bool:
+            self._interestingness_fallback = fallback
+        else:
+            warnings.warn(
+                "The flag for interestingness fallback must be a boolean.",
+                stacklevel=2,
+            )
+
     @property
     def sampling_cap(self):
         """

diff --git a/lux/action/default.py b/lux/action/default.py
@@ -7,7 +7,6 @@ def register_default_actions():
     from lux.action.filter import add_filter
     from lux.action.generalize import generalize
 
-    print("Register default actions")
     # display conditions for default actions
     no_vis = lambda ldf: (ldf.current_vis is None) or (
         ldf.current_vis is not None and len(ldf.current_vis) == 0

diff --git a/lux/core/frame.py b/lux/core/frame.py
@@ -38,7 +38,7 @@ class LuxDataFrame(pd.DataFrame):
     _metadata = [
         "_intent",
         "_inferred_intent",
-        "data_type",
+        "_data_type",
         "unique_values",
         "cardinality",
         "_rec_info",
@@ -54,6 +54,7 @@ class LuxDataFrame(pd.DataFrame):
         "_message",
         "_pandas_only",
         "pre_aggregated",
+        "_type_override",
     ]
 
     def __init__(self, *args, **kw):
@@ -82,11 +83,12 @@ def __init__(self, *args, **kw):
         self._message = Message()
         self._pandas_only = False
         # Metadata
-        self.data_type = None
+        self._data_type = None
         self.unique_values = None
         self.cardinality = None
         self._min_max = None
         self.pre_aggregated = None
+        self._type_override = {}
         warnings.formatwarning = lux.warning_format
 
     @property
@@ -107,6 +109,12 @@ def f(*args, **kwargs):
     def history(self):
         return self._history
 
+    @property
+    def data_type(self):
+        if not self._data_type:
+            self.maintain_metadata()
+        return self._data_type
+
     def maintain_metadata(self):
         if lux.config.SQLconnection != "" and lux.config.executor.name != "SQL":
             from lux.executor.SQLExecutor import SQLExecutor
@@ -138,7 +146,7 @@ def expire_metadata(self):
         Expire all saved metadata to trigger a recomputation the next time the data is required.
         """
         self._metadata_fresh = False
-        self.data_type = None
+        self._data_type = None
         self.unique_values = None
         self.cardinality = None
         self._min_max = None
@@ -254,6 +262,40 @@ def set_intent_as_vis(self, vis: Vis):
         self._intent = vis._inferred_intent
         self._parse_validate_compile_intent()
 
+    def set_data_type(self, types: dict):
+        """
+        Set the data type for a particular attribute in the dataframe
+        overriding the automatically-detected type inferred by Lux
+
+        Parameters
+        ----------
+        types: dict
+            Dictionary that maps attribute/column name to a specified Lux Type.
+            Possible options: "nominal", "quantitative", "id", and "temporal".
+
+        Example
+        ----------
+        df = pd.read_csv("https://raw.githubusercontent.com/lux-org/lux-datasets/master/data/absenteeism.csv")
+        df.set_data_type({"ID":"id",
+                          "Reason for absence":"nominal"})
+        """
+        if self._type_override == None:
+            self._type_override = types
+        else:
+            self._type_override = {**self._type_override, **types}
+
+        if not self.data_type:
+            self.maintain_metadata()
+
+        for attr in types:
+            if types[attr] not in ["nominal", "quantitative", "id", "temporal"]:
+                raise ValueError(
+                    f'Invalid data type option specified for {attr}. Please use one of the following supported types: ["nominal", "quantitative", "id", "temporal"]'
+                )
+            self.data_type[attr] = types[attr]
+
+        self.expire_recs()
+
     def to_pandas(self):
         import lux.core
 
@@ -564,14 +606,17 @@ def on_button_clicked(b):
         except (KeyboardInterrupt, SystemExit):
             raise
         except Exception:
-            warnings.warn(
-                "\nUnexpected error in rendering Lux widget and recommendations. "
-                "Falling back to Pandas display.\n"
-                "Please report the following issue on Github: https://github.com/lux-org/lux/issues \n",
-                stacklevel=2,
-            )
-            warnings.warn(traceback.format_exc())
-            display(self.display_pandas())
+            if lux.config.pandas_fallback:
+                warnings.warn(
+                    "\nUnexpected error in rendering Lux widget and recommendations. "
+                    "Falling back to Pandas display.\n"
+                    "Please report the following issue on Github: https://github.com/lux-org/lux/issues \n",
+                    stacklevel=2,
+                )
+                warnings.warn(traceback.format_exc())
+                display(self.display_pandas())
+            else:
+                raise
 
     def display_pandas(self):
         return self.to_pandas()

diff --git a/lux/core/series.py b/lux/core/series.py
@@ -84,8 +84,13 @@ def __repr__(self):
         ldf = LuxDataFrame(self)
 
         try:
+            # Ignore recommendations when Series a results of:
+            # 1) Values of the series are of dtype objects (df.dtypes)
             is_dtype_series = all(isinstance(val, np.dtype) for val in self.values)
-            if ldf._pandas_only or is_dtype_series:
+            # 2) Mixed type, often a result of a "row" acting as a series (df.iterrows, df.iloc[0])
+            # Tolerant for NaNs + 1 type
+            mixed_dtype = len(set([type(val) for val in self.values])) > 2
+            if ldf._pandas_only or is_dtype_series or mixed_dtype:
                 print(series_repr)
                 ldf._pandas_only = False
             else: