From b2ffe148de7971a8ca9587c4a935c652b67c26c8 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 26 Oct 2020 17:51:57 -0700 Subject: [PATCH 01/22] add black to travis --- .travis.yml | 1 + requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index c087fe4d..c999278c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ install: #- npm i lux-widget # command to run tests script: + - black --check . - python -m pytest tests/*.py - pytest --cov-report term --cov=lux tests/ after_success: diff --git a/requirements.txt b/requirements.txt index 722203d1..25e680e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,7 @@ scikit-learn>=0.22 Sphinx>=3.0.2 sphinx-rtd-theme>=0.4.3 lux-widget==0.1.0 +black>=20.8b1 # Install only to use SQLExecutor # psycopg2>=2.8.5 # psycopg2-binary>=2.8.5 From 6715ebe52306367b4c3f7b39419994cc854f382b Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 26 Oct 2020 18:29:57 -0700 Subject: [PATCH 02/22] reformat all code and adjust test --- .idea/lux.iml | 12 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + .idea/workspace.xml | 237 ++++ doc/conf.py | 50 +- lux/__init__.py | 2 +- lux/_config/config.py | 269 ++-- lux/_version.py | 2 +- lux/action/__init__.py | 3 +- lux/action/column_group.py | 48 +- lux/action/correlation.py | 116 +- lux/action/custom.py | 34 +- lux/action/enhance.py | 96 +- lux/action/filter.py | 175 +-- lux/action/generalize.py | 138 ++- lux/action/row_group.py | 57 +- lux/action/similarity.py | 90 +- lux/action/univariate.py | 123 +- lux/core/__init__.py | 11 +- lux/core/frame.py | 1576 +++++++++++++----------- lux/core/series.py | 38 +- lux/executor/Executor.py | 15 +- lux/executor/PandasExecutor.py | 383 +++--- lux/executor/SQLExecutor.py | 189 ++- lux/executor/__init__.py | 3 +- lux/history/__init__.py | 3 +- lux/history/event.py | 31 +- lux/history/history.py | 53 +- lux/interestingness/__init__.py | 3 +- lux/interestingness/interestingness.py | 573 +++++---- lux/processor/Compiler.py | 867 +++++++------ lux/processor/Parser.py | 185 +-- lux/processor/Validator.py | 128 +- lux/processor/__init__.py | 3 +- lux/utils/__init__.py | 3 +- lux/utils/date_utils.py | 220 ++-- lux/utils/message.py | 26 +- lux/utils/utils.py | 116 +- lux/vis/Clause.py | 240 ++-- lux/vis/Vis.py | 578 +++++---- lux/vis/VisList.py | 609 ++++----- lux/vis/__init__.py | 4 +- lux/vislib/__init__.py | 3 +- lux/vislib/altair/AltairChart.py | 161 ++- lux/vislib/altair/AltairRenderer.py | 185 +-- lux/vislib/altair/BarChart.py | 184 +-- lux/vislib/altair/Heatmap.py | 101 +- lux/vislib/altair/Histogram.py | 121 +- lux/vislib/altair/LineChart.py | 95 +- lux/vislib/altair/ScatterChart.py | 96 +- lux/vislib/altair/__init__.py | 3 +- setup.py | 50 +- tests/__init__.py | 3 +- tests/context.py | 7 +- tests/test_action.py | 243 ++-- tests/test_compiler.py | 458 ++++--- tests/test_config.py | 235 ++-- tests/test_dates.py | 127 +- tests/test_display.py | 17 +- tests/test_error_warning.py | 38 +- tests/test_executor.py | 202 ++- tests/test_interestingness.py | 232 ++-- tests/test_maintainence.py | 62 +- tests/test_nan.py | 16 +- tests/test_pandas.py | 17 +- tests/test_pandas_coverage.py | 298 +++-- tests/test_parser.py | 105 +- tests/test_performance.py | 65 +- tests/test_type.py | 168 +-- tests/test_vis.py | 205 ++- 71 files changed, 6380 insertions(+), 4444 deletions(-) create mode 100644 .idea/lux.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml diff --git a/.idea/lux.iml b/.idea/lux.iml new file mode 100644 index 00000000..7c9d48f0 --- /dev/null +++ b/.idea/lux.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..65531ca9 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..e15faf5f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 00000000..1424ff2b --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,237 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1602172290063 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From d3b0320f8a0048c1c7a345010ceaa8c7fdb7f51a Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Wed, 28 Oct 2020 19:06:56 -0700 Subject: [PATCH 04/22] fix contributing doc --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8f068c4f..638212f6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -47,7 +47,7 @@ python -m pytest tests/*.py # Submitting a Pull Request -You can commit your code and push to your forked repo. Once all of your local changes have been tested and is working, you are ready to submit a PR. For Lux, we use the "Squash and Merge" strategy to merge in PR, which means that even if you make a lot of small commits in your PR, they will all get squashed into a single commit associated with the PR. Please make sure that comments and unnecessary file changes are not committed as part of the PR by looking at the "File Changes" diff view on the pull request page. +Before submitting a PR, please make sure you have formatted your code using the command `black .`. You can commit your code and push to your forked repo. Once all of your local changes have been tested and is working, you are ready to submit a PR. For Lux, we use the "Squash and Merge" strategy to merge in PR, which means that even if you make a lot of small commits in your PR, they will all get squashed into a single commit associated with the PR. Please make sure that comments and unnecessary file changes are not committed as part of the PR by looking at the "File Changes" diff view on the pull request page. Once the pull request is submitted, the maintainer will get notified and review your pull request. They may ask for additional changes or comment on the PR. You can always make updates to your pull request after submitting it. From 60b8eff9af0018b4cc6ba0e2b70df6e743bc7030 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Thu, 29 Oct 2020 00:17:58 -0700 Subject: [PATCH 05/22] small change in contributing --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index accec953..ac05767b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -50,7 +50,7 @@ In order to keep our codebase clean and readible, we are using PEP8 guidelines. # Submitting a Pull Request - You can commit your code and push to your forked repo. Once all of your local changes have been tested and is working, you are ready to submit a PR. For Lux, we use the "Squash and Merge" strategy to merge in PR, which means that even if you make a lot of small commits in your PR, they will all get squashed into a single commit associated with the PR. Please make sure that comments and unnecessary file changes are not committed as part of the PR by looking at the "File Changes" diff view on the pull request page. + You can commit your code and push to your forked repo. Once all of your local changes have been tested and formatted, you are ready to submit a PR. For Lux, we use the "Squash and Merge" strategy to merge in PR, which means that even if you make a lot of small commits in your PR, they will all get squashed into a single commit associated with the PR. Please make sure that comments and unnecessary file changes are not committed as part of the PR by looking at the "File Changes" diff view on the pull request page. Once the pull request is submitted, the maintainer will get notified and review your pull request. They may ask for additional changes or comment on the PR. You can always make updates to your pull request after submitting it. From 824dd185ce8e16824c5b35a9b4a3fa748f8910ab Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Thu, 29 Oct 2020 00:32:38 -0700 Subject: [PATCH 06/22] update --- lux/core/frame.py | 1572 ++++++++++++++++++++++++-------------------- lux/vis/VisList.py | 515 ++++++++------- 2 files changed, 1145 insertions(+), 942 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 798d7c1a..c31226d4 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -26,711 +26,867 @@ class LuxDataFrame(pd.DataFrame): - ''' - A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. - ''' - # MUST register here for new properties!! - _metadata = ['_intent','data_type_lookup','data_type', - 'data_model_lookup','data_model','unique_values','cardinality','_rec_info', '_pandas_only', - '_min_max','plot_config', '_current_vis','_widget', '_recommendation','_prev','_history', '_saved_export'] - - def __init__(self,*args, **kw): - from lux.executor.PandasExecutor import PandasExecutor - self._history = History() - self._intent = [] - self._recommendation = {} - self._saved_export = None - self._current_vis = [] - self._prev = None - super(LuxDataFrame, self).__init__(*args, **kw) - - self.executor_type = "Pandas" - self.executor = PandasExecutor() - self.SQLconnection = "" - self.table_name = "" - - self._sampled = None - self._default_pandas_display = True - self._toggle_pandas_display = True - self._plot_config = None - self._message = Message() - self._pandas_only=False - # Metadata - self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - @property - def _constructor(self): - return LuxDataFrame - # @property - # def _constructor_sliced(self): - # def f(*args, **kwargs): - # # adapted from https://github.com/pandas-dev/pandas/issues/13208#issuecomment-326556232 - # return LuxSeries(*args, **kwargs).__finalize__(self, method='inherit') - # return f - @property - def history(self): - return self._history - def maintain_metadata(self): - if (not hasattr(self,"_metadata_fresh") or not self._metadata_fresh ): # Check that metadata has not yet been computed - if (len(self)>0): #only compute metadata information if the dataframe is non-empty - self.executor.compute_stats(self) - self.executor.compute_dataset_metadata(self) - self._infer_structure() - self._metadata_fresh = True - def expire_recs(self): - self._recs_fresh = False - self.recommendation = {} - self.current_vis = None - self._widget = None - self._rec_info = None - self._sampled = None - def expire_metadata(self): - # Set metadata as null - self._metadata_fresh = False - self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - ##################### - ## Override Pandas ## - ##################### - def __getattr__(self, name): - ret_value = super(LuxDataFrame, self).__getattr__(name) - self.expire_metadata() - self.expire_recs() - return ret_value - def _set_axis(self, axis, labels): - super(LuxDataFrame, self)._set_axis(axis, labels) - self.expire_metadata() - self.expire_recs() - def _update_inplace(self,*args,**kwargs): - super(LuxDataFrame, self)._update_inplace(*args,**kwargs) - self.expire_metadata() - self.expire_recs() - def _set_item(self, key, value): - super(LuxDataFrame, self)._set_item(key, value) - self.expire_metadata() - self.expire_recs() - def _infer_structure(self): - # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data - is_multi_index_flag = self.index.nlevels !=1 - not_int_index_flag = self.index.dtype !='int64' - small_df_flag = len(self)<100 - self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag - if ("Number of Records" in self.columns): - self.pre_aggregated = True - very_small_df_flag = len(self)<=10 - if (very_small_df_flag): - self.pre_aggregated = True - def set_executor_type(self, exe): - if (exe =="SQL"): - import pkgutil - if (pkgutil.find_loader("psycopg2") is None): - raise ImportError("psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection.") - else: - import psycopg2 - from lux.executor.SQLExecutor import SQLExecutor - self.executor = SQLExecutor - else: - from lux.executor.PandasExecutor import PandasExecutor - self.executor = PandasExecutor() - self.executor_type = exe - @property - def plot_config(self): - return self._plot_config - @plot_config.setter - def plot_config(self,config_func:Callable): - """ - Modify plot aesthetic settings to all visualizations in the dataframe display - Currently only supported for Altair visualizations - Parameters - ---------- - config_func : Callable - A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output - - Example - ---------- - Changing the color of marks and adding a title for all charts displayed for this dataframe - >>> df = pd.read_csv("lux/data/car.csv") - >>> def changeColorAddTitle(chart): - chart = chart.configure_mark(color="red") # change mark color to red - chart.title = "Custom Title" # add title to chart - return chart - >>> df.plot_config = changeColorAddTitle - >>> df - Change the opacity of all scatterplots displayed for this dataframe - >>> df = pd.read_csv("lux/data/olympic.csv") - >>> def changeOpacityScatterOnly(chart): - if chart.mark=='circle': - chart = chart.configure_mark(opacity=0.1) # lower opacity - return chart - >>> df.plot_config = changeOpacityScatterOnly - >>> df - """ - self._plot_config = config_func - self._recs_fresh=False - def clear_plot_config(self): - self._plot_config = None - self._recs_fresh=False - - @property - def intent(self): - return self._intent - @intent.setter - def intent(self, intent_input:Union[List[Union[str, Clause]],Vis]): - is_list_input = isinstance(intent_input,list) - is_vis_input = isinstance(intent_input,Vis) - if not (is_list_input or is_vis_input): - raise TypeError("Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." - "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" - ) - if is_list_input: - self.set_intent(intent_input) - elif is_vis_input: - self.set_intent_as_vis(intent_input) - def clear_intent(self): - self.intent = [] - def set_intent(self, intent:List[Union[str, Clause]]): - """ - Main function to set the intent of the dataframe. - The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. - - Parameters - ---------- - intent : List[str,Clause] - intent list, can be a mix of string shorthand or a lux.Clause object - - Notes - ----- - :doc:`../guide/clause` - """ - self.expire_recs() - self._intent = intent - self._parse_validate_compile_intent() - def _parse_validate_compile_intent(self): - from lux.processor.Parser import Parser - from lux.processor.Validator import Validator - self._intent = Parser.parse(self._intent) - Validator.validate_intent(self._intent,self) - self.maintain_metadata() - from lux.processor.Compiler import Compiler - self.current_vis = Compiler.compile_intent(self, self._intent) - - def copy_intent(self): - #creates a true copy of the dataframe's intent - output = [] - for clause in self._intent: - temp_clause = clause.copy_clause() - output.append(temp_clause) - return(output) - - def set_intent_as_vis(self,vis:Vis): - """ - Set intent of the dataframe as the Vis - - Parameters - ---------- - vis : Vis - """ - self.expire_recs() - self._intent = vis._inferred_intent - self._parse_validate_compile_intent() - - def to_pandas(self): - import lux.core - return lux.core.originalDF(self,copy=False) - - @property - def recommendation(self): - return self._recommendation - @recommendation.setter - def recommendation(self,recommendation:Dict): - self._recommendation = recommendation - @property - def current_vis(self): - return self._current_vis - @current_vis.setter - def current_vis(self,current_vis:Dict): - self._current_vis = current_vis - def __repr__(self): - # TODO: _repr_ gets called from _repr_html, need to get rid of this call - return "" - - ####################################################### - ########## SQL Metadata, type, model schema ########### - ####################################################### - - def set_SQL_connection(self, connection, t_name): - self.SQLconnection = connection - self.table_name = t_name - self.compute_SQL_dataset_metadata() - self.set_executor_type("SQL") - - def compute_SQL_dataset_metadata(self): - self.get_SQL_attributes() - for attr in list(self.columns): - self[attr] = None - self.data_type_lookup = {} - self.data_type = {} - #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this - ##### in the initialization and do it just once - self.compute_SQL_data_type() - self.compute_SQL_stats() - self.data_model_lookup = {} - self.data_model = {} - self.compute_data_model() - - def compute_SQL_stats(self): - # precompute statistics - self.unique_values = {} - self._min_max = {} - - self.get_SQL_unique_values() - #self.get_SQL_cardinality() - for attribute in self.columns: - if self.data_type_lookup[attribute] == 'quantitative': - self._min_max[attribute] = (self[attribute].min(), self[attribute].max()) - - def get_SQL_attributes(self): - if "." in self.table_name: - table_name = self.table_name[self.table_name.index(".")+1:] - else: - table_name = self.table_name - attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format(table_name) - attributes = list(pd.read_sql(attr_query, self.SQLconnection)['column_name']) - for attr in attributes: - self[attr] = None - - def get_SQL_cardinality(self): - cardinality = {} - for attr in list(self.columns): - card_query = pd.read_sql("SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), self.SQLconnection) - cardinality[attr] = list(card_query["count"])[0] - self.cardinality = cardinality - - def get_SQL_unique_values(self): - unique_vals = {} - for attr in list(self.columns): - unique_query = pd.read_sql("SELECT Distinct({}) FROM {}".format(attr, self.table_name), self.SQLconnection) - unique_vals[attr] = list(unique_query[attr]) - self.unique_values = unique_vals - - def compute_SQL_data_type(self): - data_type_lookup = {} - sql_dtypes = {} - self.get_SQL_cardinality() - if "." in self.table_name: - table_name = self.table_name[self.table_name.index(".")+1:] - else: - table_name = self.table_name - #get the data types of the attributes in the SQL table - for attr in list(self.columns): - datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format(table_name, attr) - datatype = list(pd.read_sql(datatype_query, self.SQLconnection)['data_type'])[0] - sql_dtypes[attr] = datatype - - data_type = {"quantitative":[], "nominal":[], "temporal":[]} - for attr in list(self.columns): - if str(attr).lower() in ["month", "year"]: - data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) - elif sql_dtypes[attr] in ["character", "character varying", "boolean", "uuid", "text"]: - data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) - elif sql_dtypes[attr] in ["integer", "real", "smallint", "smallserial", "serial"]: - if self.cardinality[attr] < 13: - data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) - else: - data_type_lookup[attr] = "quantitative" - data_type["quantitative"].append(attr) - elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: - data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) - self.data_type_lookup = data_type_lookup - self.data_type = data_type - def _append_rec(self,rec_infolist,recommendations:Dict): - if (recommendations["collection"] is not None and len(recommendations["collection"])>0): - rec_infolist.append(recommendations) - def maintain_recs(self): - # `rec_df` is the dataframe to generate the recommendations on - # check to see if globally defined actions have been registered/removed - if (lux.update_actions["flag"] == True): - self._recs_fresh = False - show_prev = False # flag indicating whether rec_df is showing previous df or current self - if self._prev is not None: - rec_df = self._prev - rec_df._message = Message() - rec_df.maintain_metadata() # the prev dataframe may not have been printed before - last_event = self.history._events[-1].name - rec_df._message.add(f"Lux is visualizing the previous version of the dataframe before you applied {last_event}.") - show_prev = True - else: - rec_df = self - rec_df._message = Message() - # Add warning message if there exist ID fields - id_fields_str = "" - if (len(rec_df.data_type["id"])>0): - for id_field in rec_df.data_type["id"]: id_fields_str += f"{id_field}, " - id_fields_str = id_fields_str[:-2] - rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") - rec_df._prev = None # reset _prev - - if (not hasattr(rec_df,"_recs_fresh") or not rec_df._recs_fresh ): # Check that recs has not yet been computed - rec_infolist = [] - from lux.action.custom import custom - from lux.action.custom import custom_actions - from lux.action.correlation import correlation - from lux.action.univariate import univariate - from lux.action.enhance import enhance - from lux.action.filter import filter - from lux.action.generalize import generalize - from lux.action.row_group import row_group - from lux.action.column_group import column_group - if (rec_df.pre_aggregated): - if (rec_df.columns.name is not None): - rec_df._append_rec(rec_infolist, row_group(rec_df)) - if (rec_df.index.name is not None): - rec_df._append_rec(rec_infolist, column_group(rec_df)) - else: - if self.recommendation == {}: - # display conditions for default actions - no_vis = lambda ldf: (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0) - one_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) == 1 - multiple_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) > 1 - - # globally register default actions - lux.register_action("correlation", correlation, no_vis) - lux.register_action("distribution", univariate, no_vis, "quantitative") - lux.register_action("occurrence", univariate, no_vis, "nominal") - lux.register_action("temporal", univariate, no_vis, "temporal") - - lux.register_action("enhance", enhance, one_current_vis) - lux.register_action("filter", filter, one_current_vis) - lux.register_action("generalize", generalize, one_current_vis) - - lux.register_action("custom", custom, multiple_current_vis) - - # generate vis from globally registered actions and append to dataframe - custom_action_collection = custom_actions(rec_df) - for rec in custom_action_collection: - rec_df._append_rec(rec_infolist, rec) - lux.update_actions["flag"] = False - - # Store _rec_info into a more user-friendly dictionary form - rec_df.recommendation = {} - for rec_info in rec_infolist: - action_type = rec_info["action"] - vlist = rec_info["collection"] - if (rec_df._plot_config): - for vis in rec_df.current_vis: vis._plot_config = rec_df.plot_config - for vis in vlist: vis._plot_config = rec_df.plot_config - if (len(vlist)>0): - rec_df.recommendation[action_type] = vlist - rec_df._rec_info = rec_infolist - self._widget = rec_df.render_widget() - elif (show_prev): # re-render widget for the current dataframe if previous rec is not recomputed - self._widget = rec_df.render_widget() - self._recs_fresh = True - - - ####################################################### - ############## LuxWidget Result Display ############### - ####################################################### - @property - def widget(self): - if(self._widget): - return self._widget - @property - def exported(self) -> Union[Dict[str,VisList], VisList]: - """ - Get selected visualizations as exported Vis List - - Notes - ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - {'Correlation': [0, 2], 'Occurrence': [1]} - indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. - - Returns - ------- - Union[Dict[str,VisList], VisList] - When there are no exported vis, return empty list -> [] - When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) - When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} - """ - if not hasattr(self,"_widget"): - warnings.warn( - "\nNo widget attached to the dataframe." - "Please assign dataframe to an output variable.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - , stacklevel=2) - return [] - exported_vis_lst = self._widget._selectedVisIdxs - exported_vis = [] - if (exported_vis_lst=={}): - if self._saved_export: - return self._saved_export - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: - return self.current_vis - elif len(exported_vis_lst) > 1: - exported_vis = {} - if ("currentVis" in exported_vis_lst): - exported_vis["Current Vis"] = self.current_vis - for export_action in exported_vis_lst: - if (export_action != "currentVis"): - exported_vis[export_action] = VisList(list(map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) - return exported_vis - elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): - export_action = list(exported_vis_lst.keys())[0] - exported_vis = VisList(list(map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) - self._saved_export = exported_vis - return exported_vis - else: - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - - def remove_deleted_recs(self, change): - for action in self._widget.deletedIndices: - deletedSoFar = 0 - for index in self._widget.deletedIndices[action]: - self.recommendation[action].remove_index(index - deletedSoFar) - deletedSoFar += 1 - - def set_intent_on_click(self, change): - from IPython.display import display, clear_output - from lux.processor.Compiler import Compiler - - intent_action = list(self._widget.selectedIntentIndex.keys())[0] - vis = self.recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] - self.set_intent_as_vis(vis) - - self.maintain_metadata() - self.current_vis = Compiler.compile_intent(self, self._intent) - self.maintain_recs() - - with self.output: - clear_output() - display(self._widget) - - self._widget.observe(self.remove_deleted_recs, names='deletedIndices') - self._widget.observe(self.set_intent_on_click, names='selectedIntentIndex') - - def _repr_html_(self): - from IPython.display import display - from IPython.display import clear_output - import ipywidgets as widgets - - try: - if (self._pandas_only): - display(self.display_pandas()) - self._pandas_only=False - else: - if(self.index.nlevels>=2 or self.columns.nlevels >= 2): - warnings.warn( - "\nLux does not currently support dataframes " - "with hierarchical indexes.\n" - "Please convert the dataframe into a flat " - "table via `pandas.DataFrame.reset_index`.\n", - stacklevel=2, - ) - display(self.display_pandas()) - return - - if (len(self)<=0): - warnings.warn("\nLux can not operate on an empty dataframe.\nPlease check your input again.\n",stacklevel=2) - display(self.display_pandas()) - return - if (len(self.columns)<=1): - warnings.warn("\nLux defaults to Pandas when there is only a single column.",stacklevel=2) - display(self.display_pandas()) - return - self.maintain_metadata() - - if (self._intent!=[] and (not hasattr(self,"_compiled") or not self._compiled)): - from lux.processor.Compiler import Compiler - self.current_vis = Compiler.compile_intent(self, self._intent) - - if (lux.config.default_display == "lux"): - self._toggle_pandas_display = False - else: - self._toggle_pandas_display = True - - # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) - self.maintain_recs() - - #Observers(callback_function, listen_to_this_variable) - self._widget.observe(self.remove_deleted_recs, names='deletedIndices') - self._widget.observe(self.set_intent_on_click, names='selectedIntentIndex') - - if len(self.recommendation) > 0: - # box = widgets.Box(layout=widgets.Layout(display='inline')) - button = widgets.Button(description="Toggle Pandas/Lux",layout=widgets.Layout(width='140px',top='5px')) - self.output = widgets.Output() - # box.children = [button,output] - # output.children = [button] - # display(box) - display(button, self.output) - def on_button_clicked(b): - with self.output: - if (b): - self._toggle_pandas_display = not self._toggle_pandas_display - clear_output() - if (self._toggle_pandas_display): - display(self.display_pandas()) - else: - # b.layout.display = "none" - display(self._widget) - # b.layout.display = "inline-block" - button.on_click(on_button_clicked) - on_button_clicked(None) - else: - warnings.warn("\nLux defaults to Pandas when there are no valid actions defined.",stacklevel=2) - display(self.display_pandas()) - - except(KeyboardInterrupt,SystemExit): - raise - except: - warnings.warn( - "\nUnexpected error in rendering Lux widget and recommendations. " - "Falling back to Pandas display.\n\n" - "Please report this issue on Github: https://github.com/lux-org/lux/issues " - ,stacklevel=2) - display(self.display_pandas()) - def display_pandas(self): - return self.to_pandas() - def render_widget(self, renderer:str ="altair", input_current_vis=""): - """ - Generate a LuxWidget based on the LuxDataFrame - - Structure of widgetJSON: - { - 'current_vis': {}, - 'recommendation': [ - { - 'action': 'Correlation', - 'description': "some description", - 'vspec': [ - {Vega-Lite spec for vis 1}, - {Vega-Lite spec for vis 2}, - ... - ] - }, - ... repeat for other actions - ] - } - Parameters - ---------- - renderer : str, optional - Choice of visualization rendering library, by default "altair" - input_current_vis : lux.LuxDataFrame, optional - User-specified current vis to override default Current Vis, by default - """ - check_import_lux_widget() - import luxwidget - widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) - return luxwidget.LuxWidget( - currentVis=widgetJSON["current_vis"], - recommendations=widgetJSON["recommendation"], - intent=LuxDataFrame.intent_to_string(self._intent), - message = self._message.to_html() - ) - @staticmethod - def intent_to_JSON(intent): - from lux.utils import utils - - filter_specs = utils.get_filter_specs(intent) - attrs_specs = utils.get_attrs_specs(intent) - - intent = {} - intent['attributes'] = [clause.attribute for clause in attrs_specs] - intent['filters'] = [clause.attribute for clause in filter_specs] - return intent - @staticmethod - def intent_to_string(intent): - if (intent): - return ", ".join([clause.to_string() for clause in intent]) - else: - return "" - - def to_JSON(self, rec_infolist, input_current_vis=""): - widget_spec = {} - if (self.current_vis): - self.executor.execute(self.current_vis, self) - widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON(self.current_vis, input_current_vis) - else: - widget_spec["current_vis"] = {} - widget_spec["recommendation"] = [] - - # Recommended Collection - recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) - widget_spec["recommendation"].extend(recCollection) - return widget_spec - - @staticmethod - def current_vis_to_JSON(vlist, input_current_vis=""): - current_vis_spec = {} - numVC = len(vlist) #number of visualizations in the vis list - if (numVC==1): - current_vis_spec = vlist[0].render_VSpec() - elif (numVC>1): - pass - return current_vis_spec - - @staticmethod - def rec_to_JSON(recs): - rec_lst = [] - import copy - rec_copy = copy.deepcopy(recs) - for idx,rec in enumerate(rec_copy): - if (len(rec["collection"])>0): - rec["vspec"] = [] - for vis in rec["collection"]: - chart = vis.render_VSpec() - rec["vspec"].append(chart) - rec_lst.append(rec) - # delete DataObjectCollection since not JSON serializable - del rec_lst[idx]["collection"] - return rec_lst - - # Overridden Pandas Functions - def head(self, n: int = 5): - self._prev = self - self._history.append_event("head", n=5) - return super(LuxDataFrame, self).head(n) - - def tail(self, n: int = 5): - self._prev = self - self._history.append_event("tail", n=5) - return super(LuxDataFrame, self).tail(n) - - def info(self, *args, **kwargs): - self._pandas_only=True - self._history.append_event("info",*args, **kwargs) - return super(LuxDataFrame, self).info(*args, **kwargs) - - def describe(self, *args, **kwargs): - self._pandas_only=True - self._history.append_event("describe",*args, **kwargs) - return super(LuxDataFrame, self).describe(*args, **kwargs) + """ + A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. + """ + + # MUST register here for new properties!! + _metadata = [ + "_intent", + "data_type_lookup", + "data_type", + "data_model_lookup", + "data_model", + "unique_values", + "cardinality", + "_rec_info", + "_pandas_only", + "_min_max", + "plot_config", + "_current_vis", + "_widget", + "_recommendation", + "_prev", + "_history", + "_saved_export", + ] + + def __init__(self, *args, **kw): + from lux.executor.PandasExecutor import PandasExecutor + + self._history = History() + self._intent = [] + self._recommendation = {} + self._saved_export = None + self._current_vis = [] + self._prev = None + super(LuxDataFrame, self).__init__(*args, **kw) + + self.executor_type = "Pandas" + self.executor = PandasExecutor() + self.SQLconnection = "" + self.table_name = "" + + self._sampled = None + self._default_pandas_display = True + self._toggle_pandas_display = True + self._plot_config = None + self._message = Message() + self._pandas_only = False + # Metadata + self.data_type_lookup = None + self.data_type = None + self.data_model_lookup = None + self.data_model = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + @property + def _constructor(self): + return LuxDataFrame + + # @property + # def _constructor_sliced(self): + # def f(*args, **kwargs): + # # adapted from https://github.com/pandas-dev/pandas/issues/13208#issuecomment-326556232 + # return LuxSeries(*args, **kwargs).__finalize__(self, method='inherit') + # return f + @property + def history(self): + return self._history + + def maintain_metadata(self): + if ( + not hasattr(self, "_metadata_fresh") or not self._metadata_fresh + ): # Check that metadata has not yet been computed + if ( + len(self) > 0 + ): # only compute metadata information if the dataframe is non-empty + self.executor.compute_stats(self) + self.executor.compute_dataset_metadata(self) + self._infer_structure() + self._metadata_fresh = True + + def expire_recs(self): + self._recs_fresh = False + self.recommendation = {} + self.current_vis = None + self._widget = None + self._rec_info = None + self._sampled = None + + def expire_metadata(self): + # Set metadata as null + self._metadata_fresh = False + self.data_type_lookup = None + self.data_type = None + self.data_model_lookup = None + self.data_model = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + ##################### + ## Override Pandas ## + ##################### + def __getattr__(self, name): + ret_value = super(LuxDataFrame, self).__getattr__(name) + self.expire_metadata() + self.expire_recs() + return ret_value + + def _set_axis(self, axis, labels): + super(LuxDataFrame, self)._set_axis(axis, labels) + self.expire_metadata() + self.expire_recs() + + def _update_inplace(self, *args, **kwargs): + super(LuxDataFrame, self)._update_inplace(*args, **kwargs) + self.expire_metadata() + self.expire_recs() + + def _set_item(self, key, value): + super(LuxDataFrame, self)._set_item(key, value) + self.expire_metadata() + self.expire_recs() + + def _infer_structure(self): + # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data + is_multi_index_flag = self.index.nlevels != 1 + not_int_index_flag = self.index.dtype != "int64" + small_df_flag = len(self) < 100 + self.pre_aggregated = ( + is_multi_index_flag or not_int_index_flag + ) and small_df_flag + if "Number of Records" in self.columns: + self.pre_aggregated = True + very_small_df_flag = len(self) <= 10 + if very_small_df_flag: + self.pre_aggregated = True + + def set_executor_type(self, exe): + if exe == "SQL": + import pkgutil + + if pkgutil.find_loader("psycopg2") is None: + raise ImportError( + "psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection." + ) + else: + import psycopg2 + from lux.executor.SQLExecutor import SQLExecutor + + self.executor = SQLExecutor + else: + from lux.executor.PandasExecutor import PandasExecutor + + self.executor = PandasExecutor() + self.executor_type = exe + + @property + def plot_config(self): + return self._plot_config + + @plot_config.setter + def plot_config(self, config_func: Callable): + """ + Modify plot aesthetic settings to all visualizations in the dataframe display + Currently only supported for Altair visualizations + Parameters + ---------- + config_func : Callable + A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output + + Example + ---------- + Changing the color of marks and adding a title for all charts displayed for this dataframe + >>> df = pd.read_csv("lux/data/car.csv") + >>> def changeColorAddTitle(chart): + chart = chart.configure_mark(color="red") # change mark color to red + chart.title = "Custom Title" # add title to chart + return chart + >>> df.plot_config = changeColorAddTitle + >>> df + Change the opacity of all scatterplots displayed for this dataframe + >>> df = pd.read_csv("lux/data/olympic.csv") + >>> def changeOpacityScatterOnly(chart): + if chart.mark=='circle': + chart = chart.configure_mark(opacity=0.1) # lower opacity + return chart + >>> df.plot_config = changeOpacityScatterOnly + >>> df + """ + self._plot_config = config_func + self._recs_fresh = False + + def clear_plot_config(self): + self._plot_config = None + self._recs_fresh = False + + @property + def intent(self): + return self._intent + + @intent.setter + def intent(self, intent_input: Union[List[Union[str, Clause]], Vis]): + is_list_input = isinstance(intent_input, list) + is_vis_input = isinstance(intent_input, Vis) + if not (is_list_input or is_vis_input): + raise TypeError( + "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." + "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" + ) + if is_list_input: + self.set_intent(intent_input) + elif is_vis_input: + self.set_intent_as_vis(intent_input) + + def clear_intent(self): + self.intent = [] + + def set_intent(self, intent: List[Union[str, Clause]]): + """ + Main function to set the intent of the dataframe. + The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. + + Parameters + ---------- + intent : List[str,Clause] + intent list, can be a mix of string shorthand or a lux.Clause object + + Notes + ----- + :doc:`../guide/clause` + """ + self.expire_recs() + self._intent = intent + self._parse_validate_compile_intent() + + def _parse_validate_compile_intent(self): + from lux.processor.Parser import Parser + from lux.processor.Validator import Validator + + self._intent = Parser.parse(self._intent) + Validator.validate_intent(self._intent, self) + self.maintain_metadata() + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + def copy_intent(self): + # creates a true copy of the dataframe's intent + output = [] + for clause in self._intent: + temp_clause = clause.copy_clause() + output.append(temp_clause) + return output + + def set_intent_as_vis(self, vis: Vis): + """ + Set intent of the dataframe as the Vis + + Parameters + ---------- + vis : Vis + """ + self.expire_recs() + self._intent = vis._inferred_intent + self._parse_validate_compile_intent() + + def to_pandas(self): + import lux.core + + return lux.core.originalDF(self, copy=False) + + @property + def recommendation(self): + return self._recommendation + + @recommendation.setter + def recommendation(self, recommendation: Dict): + self._recommendation = recommendation + + @property + def current_vis(self): + return self._current_vis + + @current_vis.setter + def current_vis(self, current_vis: Dict): + self._current_vis = current_vis + + def __repr__(self): + # TODO: _repr_ gets called from _repr_html, need to get rid of this call + return "" + + ####################################################### + ########## SQL Metadata, type, model schema ########### + ####################################################### + + def set_SQL_connection(self, connection, t_name): + self.SQLconnection = connection + self.table_name = t_name + self.compute_SQL_dataset_metadata() + self.set_executor_type("SQL") + + def compute_SQL_dataset_metadata(self): + self.get_SQL_attributes() + for attr in list(self.columns): + self[attr] = None + self.data_type_lookup = {} + self.data_type = {} + #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this + ##### in the initialization and do it just once + self.compute_SQL_data_type() + self.compute_SQL_stats() + self.data_model_lookup = {} + self.data_model = {} + self.compute_data_model() + + def compute_SQL_stats(self): + # precompute statistics + self.unique_values = {} + self._min_max = {} + + self.get_SQL_unique_values() + # self.get_SQL_cardinality() + for attribute in self.columns: + if self.data_type_lookup[attribute] == "quantitative": + self._min_max[attribute] = ( + self[attribute].min(), + self[attribute].max(), + ) + + def get_SQL_attributes(self): + if "." in self.table_name: + table_name = self.table_name[self.table_name.index(".") + 1 :] + else: + table_name = self.table_name + attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format( + table_name + ) + attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) + for attr in attributes: + self[attr] = None + + def get_SQL_cardinality(self): + cardinality = {} + for attr in list(self.columns): + card_query = pd.read_sql( + "SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), + self.SQLconnection, + ) + cardinality[attr] = list(card_query["count"])[0] + self.cardinality = cardinality + + def get_SQL_unique_values(self): + unique_vals = {} + for attr in list(self.columns): + unique_query = pd.read_sql( + "SELECT Distinct({}) FROM {}".format(attr, self.table_name), + self.SQLconnection, + ) + unique_vals[attr] = list(unique_query[attr]) + self.unique_values = unique_vals + + def compute_SQL_data_type(self): + data_type_lookup = {} + sql_dtypes = {} + self.get_SQL_cardinality() + if "." in self.table_name: + table_name = self.table_name[self.table_name.index(".") + 1 :] + else: + table_name = self.table_name + # get the data types of the attributes in the SQL table + for attr in list(self.columns): + datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format( + table_name, attr + ) + datatype = list( + pd.read_sql(datatype_query, self.SQLconnection)["data_type"] + )[0] + sql_dtypes[attr] = datatype + + data_type = {"quantitative": [], "nominal": [], "temporal": []} + for attr in list(self.columns): + if str(attr).lower() in ["month", "year"]: + data_type_lookup[attr] = "temporal" + data_type["temporal"].append(attr) + elif sql_dtypes[attr] in [ + "character", + "character varying", + "boolean", + "uuid", + "text", + ]: + data_type_lookup[attr] = "nominal" + data_type["nominal"].append(attr) + elif sql_dtypes[attr] in [ + "integer", + "real", + "smallint", + "smallserial", + "serial", + ]: + if self.cardinality[attr] < 13: + data_type_lookup[attr] = "nominal" + data_type["nominal"].append(attr) + else: + data_type_lookup[attr] = "quantitative" + data_type["quantitative"].append(attr) + elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: + data_type_lookup[attr] = "temporal" + data_type["temporal"].append(attr) + self.data_type_lookup = data_type_lookup + self.data_type = data_type + + def _append_rec(self, rec_infolist, recommendations: Dict): + if ( + recommendations["collection"] is not None + and len(recommendations["collection"]) > 0 + ): + rec_infolist.append(recommendations) + + def maintain_recs(self): + # `rec_df` is the dataframe to generate the recommendations on + # check to see if globally defined actions have been registered/removed + if lux.update_actions["flag"] == True: + self._recs_fresh = False + show_prev = False # flag indicating whether rec_df is showing previous df or current self + if self._prev is not None: + rec_df = self._prev + rec_df._message = Message() + rec_df.maintain_metadata() # the prev dataframe may not have been printed before + last_event = self.history._events[-1].name + rec_df._message.add( + f"Lux is visualizing the previous version of the dataframe before you applied {last_event}." + ) + show_prev = True + else: + rec_df = self + rec_df._message = Message() + # Add warning message if there exist ID fields + id_fields_str = "" + if len(rec_df.data_type["id"]) > 0: + for id_field in rec_df.data_type["id"]: + id_fields_str += f"{id_field}, " + id_fields_str = id_fields_str[:-2] + rec_df._message.add( + f"{id_fields_str} is not visualized since it resembles an ID field." + ) + rec_df._prev = None # reset _prev + + if ( + not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh + ): # Check that recs has not yet been computed + rec_infolist = [] + from lux.action.custom import custom + from lux.action.custom import custom_actions + from lux.action.correlation import correlation + from lux.action.univariate import univariate + from lux.action.enhance import enhance + from lux.action.filter import filter + from lux.action.generalize import generalize + from lux.action.row_group import row_group + from lux.action.column_group import column_group + + if rec_df.pre_aggregated: + if rec_df.columns.name is not None: + rec_df._append_rec(rec_infolist, row_group(rec_df)) + if rec_df.index.name is not None: + rec_df._append_rec(rec_infolist, column_group(rec_df)) + else: + if self.recommendation == {}: + # display conditions for default actions + no_vis = lambda ldf: (ldf.current_vis is None) or ( + ldf.current_vis is not None and len(ldf.current_vis) == 0 + ) + one_current_vis = ( + lambda ldf: ldf.current_vis is not None + and len(ldf.current_vis) == 1 + ) + multiple_current_vis = ( + lambda ldf: ldf.current_vis is not None + and len(ldf.current_vis) > 1 + ) + + # globally register default actions + lux.register_action("correlation", correlation, no_vis) + lux.register_action( + "distribution", univariate, no_vis, "quantitative" + ) + lux.register_action("occurrence", univariate, no_vis, "nominal") + lux.register_action("temporal", univariate, no_vis, "temporal") + + lux.register_action("enhance", enhance, one_current_vis) + lux.register_action("filter", filter, one_current_vis) + lux.register_action("generalize", generalize, one_current_vis) + + lux.register_action("custom", custom, multiple_current_vis) + + # generate vis from globally registered actions and append to dataframe + custom_action_collection = custom_actions(rec_df) + for rec in custom_action_collection: + rec_df._append_rec(rec_infolist, rec) + lux.update_actions["flag"] = False + + # Store _rec_info into a more user-friendly dictionary form + rec_df.recommendation = {} + for rec_info in rec_infolist: + action_type = rec_info["action"] + vlist = rec_info["collection"] + if rec_df._plot_config: + for vis in rec_df.current_vis: + vis._plot_config = rec_df.plot_config + for vis in vlist: + vis._plot_config = rec_df.plot_config + if len(vlist) > 0: + rec_df.recommendation[action_type] = vlist + rec_df._rec_info = rec_infolist + self._widget = rec_df.render_widget() + elif ( + show_prev + ): # re-render widget for the current dataframe if previous rec is not recomputed + self._widget = rec_df.render_widget() + self._recs_fresh = True + + ####################################################### + ############## LuxWidget Result Display ############### + ####################################################### + @property + def widget(self): + if self._widget: + return self._widget + + @property + def exported(self) -> Union[Dict[str, VisList], VisList]: + """ + Get selected visualizations as exported Vis List + + Notes + ----- + Convert the _selectedVisIdxs dictionary into a programmable VisList + Example _selectedVisIdxs : + {'Correlation': [0, 2], 'Occurrence': [1]} + indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. + + Returns + ------- + Union[Dict[str,VisList], VisList] + When there are no exported vis, return empty list -> [] + When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) + When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} + """ + if not hasattr(self, "_widget"): + warnings.warn( + "\nNo widget attached to the dataframe." + "Please assign dataframe to an output variable.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + exported_vis_lst = self._widget._selectedVisIdxs + exported_vis = [] + if exported_vis_lst == {}: + if self._saved_export: + return self._saved_export + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: + return self.current_vis + elif len(exported_vis_lst) > 1: + exported_vis = {} + if "currentVis" in exported_vis_lst: + exported_vis["Current Vis"] = self.current_vis + for export_action in exported_vis_lst: + if export_action != "currentVis": + exported_vis[export_action] = VisList( + list( + map( + self.recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + return exported_vis + elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): + export_action = list(exported_vis_lst.keys())[0] + exported_vis = VisList( + list( + map( + self.recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + self._saved_export = exported_vis + return exported_vis + else: + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + + def remove_deleted_recs(self, change): + for action in self._widget.deletedIndices: + deletedSoFar = 0 + for index in self._widget.deletedIndices[action]: + self.recommendation[action].remove_index(index - deletedSoFar) + deletedSoFar += 1 + + def set_intent_on_click(self, change): + from IPython.display import display, clear_output + from lux.processor.Compiler import Compiler + + intent_action = list(self._widget.selectedIntentIndex.keys())[0] + vis = self.recommendation[intent_action][ + self._widget.selectedIntentIndex[intent_action][0] + ] + self.set_intent_as_vis(vis) + + self.maintain_metadata() + self.current_vis = Compiler.compile_intent(self, self._intent) + self.maintain_recs() + + with self.output: + clear_output() + display(self._widget) + + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + + def _repr_html_(self): + from IPython.display import display + from IPython.display import clear_output + import ipywidgets as widgets + + try: + if self._pandas_only: + display(self.display_pandas()) + self._pandas_only = False + else: + if self.index.nlevels >= 2 or self.columns.nlevels >= 2: + warnings.warn( + "\nLux does not currently support dataframes " + "with hierarchical indexes.\n" + "Please convert the dataframe into a flat " + "table via `pandas.DataFrame.reset_index`.\n", + stacklevel=2, + ) + display(self.display_pandas()) + return + + if len(self) <= 0: + warnings.warn( + "\nLux can not operate on an empty dataframe.\nPlease check your input again.\n", + stacklevel=2, + ) + display(self.display_pandas()) + return + if len(self.columns) <= 1: + warnings.warn( + "\nLux defaults to Pandas when there is only a single column.", + stacklevel=2, + ) + display(self.display_pandas()) + return + self.maintain_metadata() + + if self._intent != [] and ( + not hasattr(self, "_compiled") or not self._compiled + ): + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + if lux.config.default_display == "lux": + self._toggle_pandas_display = False + else: + self._toggle_pandas_display = True + + # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) + self.maintain_recs() + + # Observers(callback_function, listen_to_this_variable) + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe( + self.set_intent_on_click, names="selectedIntentIndex" + ) + + if len(self.recommendation) > 0: + # box = widgets.Box(layout=widgets.Layout(display='inline')) + button = widgets.Button( + description="Toggle Pandas/Lux", + layout=widgets.Layout(width="140px", top="5px"), + ) + self.output = widgets.Output() + # box.children = [button,output] + # output.children = [button] + # display(box) + display(button, self.output) + + def on_button_clicked(b): + with self.output: + if b: + self._toggle_pandas_display = ( + not self._toggle_pandas_display + ) + clear_output() + if self._toggle_pandas_display: + display(self.display_pandas()) + else: + # b.layout.display = "none" + display(self._widget) + # b.layout.display = "inline-block" + + button.on_click(on_button_clicked) + on_button_clicked(None) + else: + warnings.warn( + "\nLux defaults to Pandas when there are no valid actions defined.", + stacklevel=2, + ) + display(self.display_pandas()) + + except (KeyboardInterrupt, SystemExit): + raise + except: + warnings.warn( + "\nUnexpected error in rendering Lux widget and recommendations. " + "Falling back to Pandas display.\n\n" + "Please report this issue on Github: https://github.com/lux-org/lux/issues ", + stacklevel=2, + ) + display(self.display_pandas()) + + def display_pandas(self): + return self.to_pandas() + + def render_widget(self, renderer: str = "altair", input_current_vis=""): + """ + Generate a LuxWidget based on the LuxDataFrame + + Structure of widgetJSON: + { + 'current_vis': {}, + 'recommendation': [ + { + 'action': 'Correlation', + 'description': "some description", + 'vspec': [ + {Vega-Lite spec for vis 1}, + {Vega-Lite spec for vis 2}, + ... + ] + }, + ... repeat for other actions + ] + } + Parameters + ---------- + renderer : str, optional + Choice of visualization rendering library, by default "altair" + input_current_vis : lux.LuxDataFrame, optional + User-specified current vis to override default Current Vis, by default + """ + check_import_lux_widget() + import luxwidget + + widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) + return luxwidget.LuxWidget( + currentVis=widgetJSON["current_vis"], + recommendations=widgetJSON["recommendation"], + intent=LuxDataFrame.intent_to_string(self._intent), + message=self._message.to_html(), + ) + + @staticmethod + def intent_to_JSON(intent): + from lux.utils import utils + + filter_specs = utils.get_filter_specs(intent) + attrs_specs = utils.get_attrs_specs(intent) + + intent = {} + intent["attributes"] = [clause.attribute for clause in attrs_specs] + intent["filters"] = [clause.attribute for clause in filter_specs] + return intent + + @staticmethod + def intent_to_string(intent): + if intent: + return ", ".join([clause.to_string() for clause in intent]) + else: + return "" + + def to_JSON(self, rec_infolist, input_current_vis=""): + widget_spec = {} + if self.current_vis: + self.executor.execute(self.current_vis, self) + widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON( + self.current_vis, input_current_vis + ) + else: + widget_spec["current_vis"] = {} + widget_spec["recommendation"] = [] + + # Recommended Collection + recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) + widget_spec["recommendation"].extend(recCollection) + return widget_spec + + @staticmethod + def current_vis_to_JSON(vlist, input_current_vis=""): + current_vis_spec = {} + numVC = len(vlist) # number of visualizations in the vis list + if numVC == 1: + current_vis_spec = vlist[0].render_VSpec() + elif numVC > 1: + pass + return current_vis_spec + + @staticmethod + def rec_to_JSON(recs): + rec_lst = [] + import copy + + rec_copy = copy.deepcopy(recs) + for idx, rec in enumerate(rec_copy): + if len(rec["collection"]) > 0: + rec["vspec"] = [] + for vis in rec["collection"]: + chart = vis.render_VSpec() + rec["vspec"].append(chart) + rec_lst.append(rec) + # delete DataObjectCollection since not JSON serializable + del rec_lst[idx]["collection"] + return rec_lst + + # Overridden Pandas Functions + def head(self, n: int = 5): + self._prev = self + self._history.append_event("head", n=5) + return super(LuxDataFrame, self).head(n) + + def tail(self, n: int = 5): + self._prev = self + self._history.append_event("tail", n=5) + return super(LuxDataFrame, self).tail(n) + + def info(self, *args, **kwargs): + self._pandas_only = True + self._history.append_event("info", *args, **kwargs) + return super(LuxDataFrame, self).info(*args, **kwargs) + + def describe(self, *args, **kwargs): + self._pandas_only = True + self._history.append_event("describe", *args, **kwargs) + return super(LuxDataFrame, self).describe(*args, **kwargs) diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py index bde1d0c3..86ccf1a1 100644 --- a/lux/vis/VisList.py +++ b/lux/vis/VisList.py @@ -52,7 +52,6 @@ def intent(self, intent: List[Clause]) -> None: def set_intent(self, intent: List[Clause]) -> None: """ Sets the intent of the VisList and refresh the source based on the new clause - Parameters ---------- intent : List[Clause] @@ -65,238 +64,286 @@ def set_intent(self, intent: List[Clause]) -> None: def exported(self) -> VisList: """ Get selected visualizations as exported Vis List - Notes ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - {'Vis List': [0, 2]} - - Returns - ------- - VisList - return a VisList of selected visualizations. -> VisList(v1, v2...) - """ - if not hasattr(self,"widget"): - warnings.warn( - "\nNo widget attached to the VisList." - "Please assign VisList to an output variable.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - , stacklevel=2) - return [] - exported_vis_lst =self._widget._selectedVisIdxs - if (exported_vis_lst=={}): - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - else: - exported_vis = VisList(list(map(self.__getitem__, exported_vis_lst["Vis List"]))) - return exported_vis - def remove_duplicates(self) -> None: - """ - Removes duplicate visualizations in Vis List - """ - self._collection = list(set(self._collection)) - - def remove_index(self, index): - self._collection.pop(index) - - def _is_vis_input(self): - if (type(self._input_lst[0])==Vis): - return True - elif (type(self._input_lst[0])==Clause): - return False - def __getitem__(self, key): - return self._collection[key] - def __setitem__(self, key, value): - self._collection[key] = value - def __len__(self): - return len(self._collection) - def __repr__(self): - if len(self._collection) == 0: - return str(self._input_lst) - x_channel = "" - y_channel = "" - largest_mark = 0 - largest_filter = 0 - for vis in self._collection: #finds longest x attribute among all visualizations - filter_intents = None - for clause in vis._inferred_intent: - if clause.value != "": - filter_intents = clause - - if (clause.aggregation != "" and clause.aggregation is not None): - attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" - elif clause.bin_size > 0: - attribute = "BIN(" + clause.attribute + ")" - else: - attribute = clause.attribute - - if clause.channel == "x" and len(x_channel) < len(attribute): - x_channel = attribute - if clause.channel == "y" and len(y_channel) < len(attribute): - y_channel = attribute - if len(vis.mark) > largest_mark: - largest_mark = len(vis.mark) - if filter_intents and len(str(filter_intents.value)) + len(filter_intents.attribute) > largest_filter: - largest_filter = len(str(filter_intents.value)) + len(filter_intents.attribute) - vis_repr = [] - largest_x_length = len(x_channel) - largest_y_length = len(y_channel) - for vis in self._collection: #pads the shorter visualizations with spaces before the y attribute - filter_intents = None - x_channel = "" - y_channel = "" - additional_channels = [] - for clause in vis._inferred_intent: - if clause.value != "": - filter_intents = clause - - if (clause.aggregation != "" and clause.aggregation is not None and vis.mark!='scatter'): - attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" - elif clause.bin_size > 0: - attribute = "BIN(" + clause.attribute + ")" - else: - attribute = clause.attribute - - if clause.channel == "x": - x_channel = attribute.ljust(largest_x_length) - elif clause.channel == "y": - y_channel = attribute - elif clause.channel != "": - additional_channels.append([clause.channel, attribute]) - if filter_intents: - y_channel = y_channel.ljust(largest_y_length) - elif largest_filter != 0: - y_channel = y_channel.ljust(largest_y_length + largest_filter + 9) - else: - y_channel = y_channel.ljust(largest_y_length + largest_filter) - if x_channel != "": - x_channel = "x: " + x_channel + ", " - if y_channel != "": - y_channel = "y: " + y_channel - aligned_mark = vis.mark.ljust(largest_mark) - str_additional_channels = "" - for channel in additional_channels: - str_additional_channels += ", " + channel[0] + ": " + channel[1] - if filter_intents: - aligned_filter = " -- [" + filter_intents.attribute + filter_intents.filter_op + str(filter_intents.value) + "]" - aligned_filter = aligned_filter.ljust(largest_filter + 8) - vis_repr.append(f" ") - else: - vis_repr.append(f" ") - return '['+',\n'.join(vis_repr)[1:]+']' - def map(self,function): - # generalized way of applying a function to each element - return map(function, self._collection) - - def get(self,field_name): - # Get the value of the field for all objects in the collection - def get_field(d_obj): - field_val = getattr(d_obj,field_name) - # Might want to write catch error if key not in field - return field_val - return self.map(get_field) - - def set(self,field_name,field_val): - return NotImplemented - def set_plot_config(self,config_func:Callable): - """ - Modify plot aesthetic settings to the Vis List - Currently only supported for Altair visualizations - - Parameters - ---------- - config_func : typing.Callable - A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output - """ - for vis in self._collection: - vis.plot_config = config_func - def clear_plot_config(self): - for vis in self._collection: - vis.plot_config = None - def sort(self, remove_invalid=True, descending = True): - # remove the items that have invalid (-1) score - if (remove_invalid): self._collection = list(filter(lambda x: x.score!=-1,self._collection)) - # sort in-place by “score” by default if available, otherwise user-specified field to sort by - self._collection.sort(key=lambda x: x.score, reverse=descending) - - def topK(self,k): - #sort and truncate list to first K items - self.sort(remove_invalid=True) - return VisList(self._collection[:k]) - def bottomK(self,k): - #sort and truncate list to first K items - self.sort(descending=False,remove_invalid=True) - return VisList(self._collection[:k]) - def normalize_score(self, invert_order = False): - max_score = max(list(self.get("score"))) - for dobj in self._collection: - dobj.score = dobj.score/max_score - if (invert_order): dobj.score = 1 - dobj.score - def _repr_html_(self): - self._widget = None - from IPython.display import display - from lux.core.frame import LuxDataFrame - recommendation = {"action": "Vis List", - "description": "Shows a vis list defined by the intent"} - recommendation["collection"] = self._collection - - check_import_lux_widget() - import luxwidget - recJSON = LuxDataFrame.rec_to_JSON([recommendation]) - self._widget = luxwidget.LuxWidget( - currentVis={}, - recommendations=recJSON, - intent="", - message = "" - ) - display(self._widget) - - def refresh_source(self, ldf) : - """ - Loading the source into the visualizations in the VisList, then populating each visualization - based on the new source data, effectively "materializing" the visualization collection. - - Parameters - ---------- - ldf : LuxDataframe - Input Dataframe to be attached to the VisList - - Returns - ------- - VisList - Complete VisList with fully-specified fields - - See Also - -------- - lux.vis.Vis.refresh_source - - Note - ---- - Function derives a new _inferred_intent by instantiating the intent specification on the new data - """ - if (ldf is not None): - from lux.processor.Parser import Parser - from lux.processor.Validator import Validator - from lux.processor.Compiler import Compiler - self._source = ldf - self._source.maintain_metadata() - if len(self._input_lst)>0: - if (self._is_vis_input()): - compiled_collection = [] - for vis in self._collection: - vis._inferred_intent = Parser.parse(vis._intent) - Validator.validate_intent(vis._inferred_intent,ldf) - vislist = Compiler.compile_vis(ldf,vis) - if (len(vislist)>0): - vis = vislist[0] - compiled_collection.append(vis) - self._collection = compiled_collection - else: - self._inferred_intent = Parser.parse(self._intent) - Validator.validate_intent(self._inferred_intent,ldf) - self._collection = Compiler.compile_intent(ldf,self._inferred_intent) - ldf.executor.execute(self._collection,ldf) + Convert the _selectedVisIdxs dictionary into a programmable VisList + Example _selectedVisIdxs : + {'Vis List': [0, 2]} + + Returns + ------- + VisList + return a VisList of selected visualizations. -> VisList(v1, v2...) + """ + if not hasattr(self, "widget"): + warnings.warn( + "\nNo widget attached to the VisList." + "Please assign VisList to an output variable.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + exported_vis_lst = self._widget._selectedVisIdxs + if exported_vis_lst == {}: + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + else: + exported_vis = VisList( + list(map(self.__getitem__, exported_vis_lst["Vis List"])) + ) + return exported_vis + + def remove_duplicates(self) -> None: + """ + Removes duplicate visualizations in Vis List + """ + self._collection = list(set(self._collection)) + + def remove_index(self, index): + self._collection.pop(index) + + def _is_vis_input(self): + if type(self._input_lst[0]) == Vis: + return True + elif type(self._input_lst[0]) == Clause: + return False + + def __getitem__(self, key): + return self._collection[key] + + def __setitem__(self, key, value): + self._collection[key] = value + + def __len__(self): + return len(self._collection) + + def __repr__(self): + if len(self._collection) == 0: + return str(self._input_lst) + x_channel = "" + y_channel = "" + largest_mark = 0 + largest_filter = 0 + for ( + vis + ) in self._collection: # finds longest x attribute among all visualizations + filter_intents = None + for clause in vis._inferred_intent: + if clause.value != "": + filter_intents = clause + + if clause.aggregation != "" and clause.aggregation is not None: + attribute = ( + clause._aggregation_name.upper() + "(" + clause.attribute + ")" + ) + elif clause.bin_size > 0: + attribute = "BIN(" + clause.attribute + ")" + else: + attribute = clause.attribute + + if clause.channel == "x" and len(x_channel) < len(attribute): + x_channel = attribute + if clause.channel == "y" and len(y_channel) < len(attribute): + y_channel = attribute + if len(vis.mark) > largest_mark: + largest_mark = len(vis.mark) + if ( + filter_intents + and len(str(filter_intents.value)) + len(filter_intents.attribute) + > largest_filter + ): + largest_filter = len(str(filter_intents.value)) + len( + filter_intents.attribute + ) + vis_repr = [] + largest_x_length = len(x_channel) + largest_y_length = len(y_channel) + for ( + vis + ) in ( + self._collection + ): # pads the shorter visualizations with spaces before the y attribute + filter_intents = None + x_channel = "" + y_channel = "" + additional_channels = [] + for clause in vis._inferred_intent: + if clause.value != "": + filter_intents = clause + + if ( + clause.aggregation != "" + and clause.aggregation is not None + and vis.mark != "scatter" + ): + attribute = ( + clause._aggregation_name.upper() + "(" + clause.attribute + ")" + ) + elif clause.bin_size > 0: + attribute = "BIN(" + clause.attribute + ")" + else: + attribute = clause.attribute + + if clause.channel == "x": + x_channel = attribute.ljust(largest_x_length) + elif clause.channel == "y": + y_channel = attribute + elif clause.channel != "": + additional_channels.append([clause.channel, attribute]) + if filter_intents: + y_channel = y_channel.ljust(largest_y_length) + elif largest_filter != 0: + y_channel = y_channel.ljust(largest_y_length + largest_filter + 9) + else: + y_channel = y_channel.ljust(largest_y_length + largest_filter) + if x_channel != "": + x_channel = "x: " + x_channel + ", " + if y_channel != "": + y_channel = "y: " + y_channel + aligned_mark = vis.mark.ljust(largest_mark) + str_additional_channels = "" + for channel in additional_channels: + str_additional_channels += ", " + channel[0] + ": " + channel[1] + if filter_intents: + aligned_filter = ( + " -- [" + + filter_intents.attribute + + filter_intents.filter_op + + str(filter_intents.value) + + "]" + ) + aligned_filter = aligned_filter.ljust(largest_filter + 8) + vis_repr.append( + f" " + ) + else: + vis_repr.append( + f" " + ) + return "[" + ",\n".join(vis_repr)[1:] + "]" + + def map(self, function): + # generalized way of applying a function to each element + return map(function, self._collection) + + def get(self, field_name): + # Get the value of the field for all objects in the collection + def get_field(d_obj): + field_val = getattr(d_obj, field_name) + # Might want to write catch error if key not in field + return field_val + + return self.map(get_field) + + def set(self, field_name, field_val): + return NotImplemented + + def set_plot_config(self, config_func: Callable): + """ + Modify plot aesthetic settings to the Vis List + Currently only supported for Altair visualizations + Parameters + ---------- + config_func : typing.Callable + A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output + """ + for vis in self._collection: + vis.plot_config = config_func + + def clear_plot_config(self): + for vis in self._collection: + vis.plot_config = None + + def sort(self, remove_invalid=True, descending=True): + # remove the items that have invalid (-1) score + if remove_invalid: + self._collection = list(filter(lambda x: x.score != -1, self._collection)) + # sort in-place by “score” by default if available, otherwise user-specified field to sort by + self._collection.sort(key=lambda x: x.score, reverse=descending) + + def topK(self, k): + # sort and truncate list to first K items + self.sort(remove_invalid=True) + return VisList(self._collection[:k]) + + def bottomK(self, k): + # sort and truncate list to first K items + self.sort(descending=False, remove_invalid=True) + return VisList(self._collection[:k]) + + def normalize_score(self, invert_order=False): + max_score = max(list(self.get("score"))) + for dobj in self._collection: + dobj.score = dobj.score / max_score + if invert_order: + dobj.score = 1 - dobj.score + + def _repr_html_(self): + self._widget = None + from IPython.display import display + from lux.core.frame import LuxDataFrame + + recommendation = { + "action": "Vis List", + "description": "Shows a vis list defined by the intent", + } + recommendation["collection"] = self._collection + + check_import_lux_widget() + import luxwidget + + recJSON = LuxDataFrame.rec_to_JSON([recommendation]) + self._widget = luxwidget.LuxWidget( + currentVis={}, recommendations=recJSON, intent="", message="" + ) + display(self._widget) + + def refresh_source(self, ldf): + """ + Loading the source into the visualizations in the VisList, then populating each visualization + based on the new source data, effectively "materializing" the visualization collection. + Parameters + ---------- + ldf : LuxDataframe + Input Dataframe to be attached to the VisList + Returns + ------- + VisList + Complete VisList with fully-specified fields + + See Also + -------- + lux.vis.Vis.refresh_source + Note + ---- + Function derives a new _inferred_intent by instantiating the intent specification on the new data + """ + if ldf is not None: + from lux.processor.Parser import Parser + from lux.processor.Validator import Validator + from lux.processor.Compiler import Compiler + + self._source = ldf + self._source.maintain_metadata() + if len(self._input_lst) > 0: + if self._is_vis_input(): + compiled_collection = [] + for vis in self._collection: + vis._inferred_intent = Parser.parse(vis._intent) + Validator.validate_intent(vis._inferred_intent, ldf) + vislist = Compiler.compile_vis(ldf, vis) + if len(vislist) > 0: + vis = vislist[0] + compiled_collection.append(vis) + self._collection = compiled_collection + else: + self._inferred_intent = Parser.parse(self._intent) + Validator.validate_intent(self._inferred_intent, ldf) + self._collection = Compiler.compile_intent( + ldf, self._inferred_intent + ) + ldf.executor.execute(self._collection, ldf) From 4aa4bd40a2800684b7795f49a9beca81469c4018 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sun, 1 Nov 2020 18:03:12 -0800 Subject: [PATCH 07/22] reformat, update command to fix version --- .travis.yml | 2 +- lux/core/frame.py | 1574 +++++++++++++++------------ lux/vislib/altair/AltairRenderer.py | 24 +- tests/test_config.py | 53 +- 4 files changed, 903 insertions(+), 750 deletions(-) diff --git a/.travis.yml b/.travis.yml index d03743d2..98bde1cf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ install: - pip install git+https://github.com/lux-org/lux-widget # command to run tests script: - - black --check . + - black --target-version py37 --check . - python -m pytest tests/*.py - pytest --cov-report term --cov=lux tests/ after_success: diff --git a/lux/core/frame.py b/lux/core/frame.py index 60ba4304..8deb29e7 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -26,712 +26,868 @@ class LuxDataFrame(pd.DataFrame): - ''' - A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. - ''' - # MUST register here for new properties!! - _metadata = ['_intent','data_type_lookup','data_type', - 'data_model_lookup','data_model','unique_values','cardinality','_rec_info', '_pandas_only', - '_min_max','plot_config', '_current_vis','_widget', '_recommendation','_prev','_history', '_saved_export'] - - def __init__(self,*args, **kw): - from lux.executor.PandasExecutor import PandasExecutor - self._history = History() - self._intent = [] - self._recommendation = {} - self._saved_export = None - self._current_vis = [] - self._prev = None - super(LuxDataFrame, self).__init__(*args, **kw) - - self.executor_type = "Pandas" - self.executor = PandasExecutor() - self.SQLconnection = "" - self.table_name = "" - - self._sampled = None - self._default_pandas_display = True - self._toggle_pandas_display = True - self._plot_config = None - self._message = Message() - self._pandas_only=False - # Metadata - self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - @property - def _constructor(self): - return LuxDataFrame - # @property - # def _constructor_sliced(self): - # def f(*args, **kwargs): - # # adapted from https://github.com/pandas-dev/pandas/issues/13208#issuecomment-326556232 - # return LuxSeries(*args, **kwargs).__finalize__(self, method='inherit') - # return f - @property - def history(self): - return self._history - def maintain_metadata(self): - if (not hasattr(self,"_metadata_fresh") or not self._metadata_fresh ): # Check that metadata has not yet been computed - if (len(self)>0): #only compute metadata information if the dataframe is non-empty - self.executor.compute_stats(self) - self.executor.compute_dataset_metadata(self) - self._infer_structure() - self._metadata_fresh = True - def expire_recs(self): - self._recs_fresh = False - self.recommendation = {} - self.current_vis = None - self._widget = None - self._rec_info = None - self._sampled = None - def expire_metadata(self): - # Set metadata as null - self._metadata_fresh = False - self.data_type_lookup = None - self.data_type = None - self.data_model_lookup = None - self.data_model = None - self.unique_values = None - self.cardinality = None - self._min_max = None - self.pre_aggregated = None - - ##################### - ## Override Pandas ## - ##################### - def __getattr__(self, name): - ret_value = super(LuxDataFrame, self).__getattr__(name) - self.expire_metadata() - self.expire_recs() - return ret_value - def _set_axis(self, axis, labels): - super(LuxDataFrame, self)._set_axis(axis, labels) - self.expire_metadata() - self.expire_recs() - def _update_inplace(self,*args,**kwargs): - super(LuxDataFrame, self)._update_inplace(*args,**kwargs) - self.expire_metadata() - self.expire_recs() - def _set_item(self, key, value): - super(LuxDataFrame, self)._set_item(key, value) - self.expire_metadata() - self.expire_recs() - def _infer_structure(self): - # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data - is_multi_index_flag = self.index.nlevels !=1 - not_int_index_flag = self.index.dtype !='int64' - small_df_flag = len(self)<100 - self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag - if ("Number of Records" in self.columns): - self.pre_aggregated = True - very_small_df_flag = len(self)<=10 - if (very_small_df_flag): - self.pre_aggregated = True - def set_executor_type(self, exe): - if (exe =="SQL"): - import pkgutil - if (pkgutil.find_loader("psycopg2") is None): - raise ImportError("psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection.") - else: - import psycopg2 - from lux.executor.SQLExecutor import SQLExecutor - self.executor = SQLExecutor - else: - from lux.executor.PandasExecutor import PandasExecutor - self.executor = PandasExecutor() - self.executor_type = exe - @property - def plot_config(self): - return self._plot_config - @plot_config.setter - def plot_config(self,config_func:Callable): - """ - Modify plot aesthetic settings to all visualizations in the dataframe display - Currently only supported for Altair visualizations - Parameters - ---------- - config_func : Callable - A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output - - Example - ---------- - Changing the color of marks and adding a title for all charts displayed for this dataframe - >>> df = pd.read_csv("lux/data/car.csv") - >>> def changeColorAddTitle(chart): - chart = chart.configure_mark(color="red") # change mark color to red - chart.title = "Custom Title" # add title to chart - return chart - >>> df.plot_config = changeColorAddTitle - >>> df - Change the opacity of all scatterplots displayed for this dataframe - >>> df = pd.read_csv("lux/data/olympic.csv") - >>> def changeOpacityScatterOnly(chart): - if chart.mark=='circle': - chart = chart.configure_mark(opacity=0.1) # lower opacity - return chart - >>> df.plot_config = changeOpacityScatterOnly - >>> df - """ - self._plot_config = config_func - self._recs_fresh=False - def clear_plot_config(self): - self._plot_config = None - self._recs_fresh=False - - @property - def intent(self): - return self._intent - @intent.setter - def intent(self, intent_input:Union[List[Union[str, Clause]],Vis]): - is_list_input = isinstance(intent_input,list) - is_vis_input = isinstance(intent_input,Vis) - if not (is_list_input or is_vis_input): - raise TypeError("Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." - "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" - ) - if is_list_input: - self.set_intent(intent_input) - elif is_vis_input: - self.set_intent_as_vis(intent_input) - def clear_intent(self): - self.intent = [] - def set_intent(self, intent:List[Union[str, Clause]]): - """ - Main function to set the intent of the dataframe. - The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. - - Parameters - ---------- - intent : List[str,Clause] - intent list, can be a mix of string shorthand or a lux.Clause object - - Notes - ----- - :doc:`../guide/clause` - """ - self.expire_recs() - self._intent = intent - self._parse_validate_compile_intent() - def _parse_validate_compile_intent(self): - from lux.processor.Parser import Parser - from lux.processor.Validator import Validator - self._intent = Parser.parse(self._intent) - Validator.validate_intent(self._intent,self) - self.maintain_metadata() - from lux.processor.Compiler import Compiler - self.current_vis = Compiler.compile_intent(self, self._intent) - - def copy_intent(self): - #creates a true copy of the dataframe's intent - output = [] - for clause in self._intent: - temp_clause = clause.copy_clause() - output.append(temp_clause) - return(output) - - def set_intent_as_vis(self,vis:Vis): - """ - Set intent of the dataframe as the Vis - - Parameters - ---------- - vis : Vis - """ - self.expire_recs() - self._intent = vis._inferred_intent - self._parse_validate_compile_intent() - - def to_pandas(self): - import lux.core - return lux.core.originalDF(self,copy=False) - - @property - def recommendation(self): - return self._recommendation - @recommendation.setter - def recommendation(self,recommendation:Dict): - self._recommendation = recommendation - @property - def current_vis(self): - return self._current_vis - @current_vis.setter - def current_vis(self,current_vis:Dict): - self._current_vis = current_vis - def __repr__(self): - # TODO: _repr_ gets called from _repr_html, need to get rid of this call - return "" - - ####################################################### - ########## SQL Metadata, type, model schema ########### - ####################################################### - - def set_SQL_connection(self, connection, t_name): - self.SQLconnection = connection - self.table_name = t_name - self.compute_SQL_dataset_metadata() - self.set_executor_type("SQL") - - def compute_SQL_dataset_metadata(self): - self.get_SQL_attributes() - for attr in list(self.columns): - self[attr] = None - self.data_type_lookup = {} - self.data_type = {} - #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this - ##### in the initialization and do it just once - self.compute_SQL_data_type() - self.compute_SQL_stats() - self.data_model_lookup = {} - self.data_model = {} - self.compute_data_model() - - def compute_SQL_stats(self): - # precompute statistics - self.unique_values = {} - self._min_max = {} - - self.get_SQL_unique_values() - #self.get_SQL_cardinality() - for attribute in self.columns: - if self.data_type_lookup[attribute] == 'quantitative': - self._min_max[attribute] = (self[attribute].min(), self[attribute].max()) - - def get_SQL_attributes(self): - if "." in self.table_name: - table_name = self.table_name[self.table_name.index(".")+1:] - else: - table_name = self.table_name - attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format(table_name) - attributes = list(pd.read_sql(attr_query, self.SQLconnection)['column_name']) - for attr in attributes: - self[attr] = None - - def get_SQL_cardinality(self): - cardinality = {} - for attr in list(self.columns): - card_query = pd.read_sql("SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), self.SQLconnection) - cardinality[attr] = list(card_query["count"])[0] - self.cardinality = cardinality - - def get_SQL_unique_values(self): - unique_vals = {} - for attr in list(self.columns): - unique_query = pd.read_sql("SELECT Distinct({}) FROM {}".format(attr, self.table_name), self.SQLconnection) - unique_vals[attr] = list(unique_query[attr]) - self.unique_values = unique_vals - - def compute_SQL_data_type(self): - data_type_lookup = {} - sql_dtypes = {} - self.get_SQL_cardinality() - if "." in self.table_name: - table_name = self.table_name[self.table_name.index(".")+1:] - else: - table_name = self.table_name - #get the data types of the attributes in the SQL table - for attr in list(self.columns): - datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format(table_name, attr) - datatype = list(pd.read_sql(datatype_query, self.SQLconnection)['data_type'])[0] - sql_dtypes[attr] = datatype - - data_type = {"quantitative":[], "nominal":[], "temporal":[]} - for attr in list(self.columns): - if str(attr).lower() in ["month", "year"]: - data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) - elif sql_dtypes[attr] in ["character", "character varying", "boolean", "uuid", "text"]: - data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) - elif sql_dtypes[attr] in ["integer", "real", "smallint", "smallserial", "serial"]: - if self.cardinality[attr] < 13: - data_type_lookup[attr] = "nominal" - data_type["nominal"].append(attr) - else: - data_type_lookup[attr] = "quantitative" - data_type["quantitative"].append(attr) - elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: - data_type_lookup[attr] = "temporal" - data_type["temporal"].append(attr) - self.data_type_lookup = data_type_lookup - self.data_type = data_type - def _append_rec(self,rec_infolist,recommendations:Dict): - if (recommendations["collection"] is not None and len(recommendations["collection"])>0): - rec_infolist.append(recommendations) - def maintain_recs(self): - # `rec_df` is the dataframe to generate the recommendations on - # check to see if globally defined actions have been registered/removed - if (lux.update_actions["flag"] == True): - self._recs_fresh = False - show_prev = False # flag indicating whether rec_df is showing previous df or current self - if self._prev is not None: - rec_df = self._prev - rec_df._message = Message() - rec_df.maintain_metadata() # the prev dataframe may not have been printed before - last_event = self.history._events[-1].name - rec_df._message.add(f"Lux is visualizing the previous version of the dataframe before you applied {last_event}.") - show_prev = True - else: - rec_df = self - rec_df._message = Message() - # Add warning message if there exist ID fields - id_fields_str = "" - if (len(rec_df.data_type["id"])>0): - for id_field in rec_df.data_type["id"]: id_fields_str += f"{id_field}, " - id_fields_str = id_fields_str[:-2] - rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") - rec_df._prev = None # reset _prev - - if (not hasattr(rec_df,"_recs_fresh") or not rec_df._recs_fresh ): # Check that recs has not yet been computed - rec_infolist = [] - from lux.action.custom import custom - from lux.action.custom import custom_actions - from lux.action.correlation import correlation - from lux.action.univariate import univariate - from lux.action.enhance import enhance - from lux.action.filter import filter - from lux.action.generalize import generalize - from lux.action.row_group import row_group - from lux.action.column_group import column_group - if (rec_df.pre_aggregated): - if (rec_df.columns.name is not None): - rec_df._append_rec(rec_infolist, row_group(rec_df)) - if (rec_df.index.name is not None): - rec_df._append_rec(rec_infolist, column_group(rec_df)) - else: - if self.recommendation == {}: - # display conditions for default actions - no_vis = lambda ldf: (ldf.current_vis is None) or (ldf.current_vis is not None and len(ldf.current_vis) == 0) - one_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) == 1 - multiple_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) > 1 - - # globally register default actions - lux.register_action("correlation", correlation, no_vis) - lux.register_action("distribution", univariate, no_vis, "quantitative") - lux.register_action("occurrence", univariate, no_vis, "nominal") - lux.register_action("temporal", univariate, no_vis, "temporal") - - lux.register_action("enhance", enhance, one_current_vis) - lux.register_action("filter", filter, one_current_vis) - lux.register_action("generalize", generalize, one_current_vis) - - lux.register_action("custom", custom, multiple_current_vis) - - # generate vis from globally registered actions and append to dataframe - custom_action_collection = custom_actions(rec_df) - for rec in custom_action_collection: - rec_df._append_rec(rec_infolist, rec) - lux.update_actions["flag"] = False - - # Store _rec_info into a more user-friendly dictionary form - rec_df.recommendation = {} - for rec_info in rec_infolist: - action_type = rec_info["action"] - vlist = rec_info["collection"] - if (rec_df._plot_config): - if (rec_df.current_vis): - for vis in rec_df.current_vis: vis._plot_config = rec_df.plot_config - for vis in vlist: vis._plot_config = rec_df.plot_config - if (len(vlist)>0): - rec_df.recommendation[action_type] = vlist - rec_df._rec_info = rec_infolist - self._widget = rec_df.render_widget() - elif (show_prev): # re-render widget for the current dataframe if previous rec is not recomputed - self._widget = rec_df.render_widget() - self._recs_fresh = True - - - ####################################################### - ############## LuxWidget Result Display ############### - ####################################################### - @property - def widget(self): - if(self._widget): - return self._widget - @property - def exported(self) -> Union[Dict[str,VisList], VisList]: - """ - Get selected visualizations as exported Vis List - - Notes - ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - {'Correlation': [0, 2], 'Occurrence': [1]} - indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. - - Returns - ------- - Union[Dict[str,VisList], VisList] - When there are no exported vis, return empty list -> [] - When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) - When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} - """ - if not hasattr(self,"_widget"): - warnings.warn( - "\nNo widget attached to the dataframe." - "Please assign dataframe to an output variable.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - , stacklevel=2) - return [] - exported_vis_lst = self._widget._selectedVisIdxs - exported_vis = [] - if (exported_vis_lst=={}): - if self._saved_export: - return self._saved_export - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: - return self.current_vis - elif len(exported_vis_lst) > 1: - exported_vis = {} - if ("currentVis" in exported_vis_lst): - exported_vis["Current Vis"] = self.current_vis - for export_action in exported_vis_lst: - if (export_action != "currentVis"): - exported_vis[export_action] = VisList(list(map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) - return exported_vis - elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): - export_action = list(exported_vis_lst.keys())[0] - exported_vis = VisList(list(map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) - self._saved_export = exported_vis - return exported_vis - else: - warnings.warn( - "\nNo visualization selected to export.\n" - "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips" - ,stacklevel=2) - return [] - - def remove_deleted_recs(self, change): - for action in self._widget.deletedIndices: - deletedSoFar = 0 - for index in self._widget.deletedIndices[action]: - self.recommendation[action].remove_index(index - deletedSoFar) - deletedSoFar += 1 - - def set_intent_on_click(self, change): - from IPython.display import display, clear_output - from lux.processor.Compiler import Compiler - - intent_action = list(self._widget.selectedIntentIndex.keys())[0] - vis = self.recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] - self.set_intent_as_vis(vis) - - self.maintain_metadata() - self.current_vis = Compiler.compile_intent(self, self._intent) - self.maintain_recs() - - with self.output: - clear_output() - display(self._widget) - - self._widget.observe(self.remove_deleted_recs, names='deletedIndices') - self._widget.observe(self.set_intent_on_click, names='selectedIntentIndex') - - def _repr_html_(self): - from IPython.display import display - from IPython.display import clear_output - import ipywidgets as widgets - - try: - if (self._pandas_only): - display(self.display_pandas()) - self._pandas_only=False - else: - if(self.index.nlevels>=2 or self.columns.nlevels >= 2): - warnings.warn( - "\nLux does not currently support dataframes " - "with hierarchical indexes.\n" - "Please convert the dataframe into a flat " - "table via `pandas.DataFrame.reset_index`.\n", - stacklevel=2, - ) - display(self.display_pandas()) - return - - if (len(self)<=0): - warnings.warn("\nLux can not operate on an empty dataframe.\nPlease check your input again.\n",stacklevel=2) - display(self.display_pandas()) - return - if (len(self.columns)<=1): - warnings.warn("\nLux defaults to Pandas when there is only a single column.",stacklevel=2) - display(self.display_pandas()) - return - self.maintain_metadata() - - if (self._intent!=[] and (not hasattr(self,"_compiled") or not self._compiled)): - from lux.processor.Compiler import Compiler - self.current_vis = Compiler.compile_intent(self, self._intent) - - if (lux.config.default_display == "lux"): - self._toggle_pandas_display = False - else: - self._toggle_pandas_display = True - - # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) - self.maintain_recs() - - #Observers(callback_function, listen_to_this_variable) - self._widget.observe(self.remove_deleted_recs, names='deletedIndices') - self._widget.observe(self.set_intent_on_click, names='selectedIntentIndex') - - if len(self.recommendation) > 0: - # box = widgets.Box(layout=widgets.Layout(display='inline')) - button = widgets.Button(description="Toggle Pandas/Lux",layout=widgets.Layout(width='140px',top='5px')) - self.output = widgets.Output() - # box.children = [button,output] - # output.children = [button] - # display(box) - display(button, self.output) - def on_button_clicked(b): - with self.output: - if (b): - self._toggle_pandas_display = not self._toggle_pandas_display - clear_output() - if (self._toggle_pandas_display): - display(self.display_pandas()) - else: - # b.layout.display = "none" - display(self._widget) - # b.layout.display = "inline-block" - button.on_click(on_button_clicked) - on_button_clicked(None) - else: - warnings.warn("\nLux defaults to Pandas when there are no valid actions defined.",stacklevel=2) - display(self.display_pandas()) - - except(KeyboardInterrupt,SystemExit): - raise - except: - warnings.warn( - "\nUnexpected error in rendering Lux widget and recommendations. " - "Falling back to Pandas display.\n\n" - "Please report this issue on Github: https://github.com/lux-org/lux/issues " - ,stacklevel=2) - display(self.display_pandas()) - def display_pandas(self): - return self.to_pandas() - def render_widget(self, renderer:str ="altair", input_current_vis=""): - """ - Generate a LuxWidget based on the LuxDataFrame - - Structure of widgetJSON: - { - 'current_vis': {}, - 'recommendation': [ - { - 'action': 'Correlation', - 'description': "some description", - 'vspec': [ - {Vega-Lite spec for vis 1}, - {Vega-Lite spec for vis 2}, - ... - ] - }, - ... repeat for other actions - ] - } - Parameters - ---------- - renderer : str, optional - Choice of visualization rendering library, by default "altair" - input_current_vis : lux.LuxDataFrame, optional - User-specified current vis to override default Current Vis, by default - """ - check_import_lux_widget() - import luxwidget - widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) - return luxwidget.LuxWidget( - currentVis=widgetJSON["current_vis"], - recommendations=widgetJSON["recommendation"], - intent=LuxDataFrame.intent_to_string(self._intent), - message = self._message.to_html() - ) - @staticmethod - def intent_to_JSON(intent): - from lux.utils import utils - - filter_specs = utils.get_filter_specs(intent) - attrs_specs = utils.get_attrs_specs(intent) - - intent = {} - intent['attributes'] = [clause.attribute for clause in attrs_specs] - intent['filters'] = [clause.attribute for clause in filter_specs] - return intent - @staticmethod - def intent_to_string(intent): - if (intent): - return ", ".join([clause.to_string() for clause in intent]) - else: - return "" - - def to_JSON(self, rec_infolist, input_current_vis=""): - widget_spec = {} - if (self.current_vis): - self.executor.execute(self.current_vis, self) - widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON(self.current_vis, input_current_vis) - else: - widget_spec["current_vis"] = {} - widget_spec["recommendation"] = [] - - # Recommended Collection - recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) - widget_spec["recommendation"].extend(recCollection) - return widget_spec - - @staticmethod - def current_vis_to_JSON(vlist, input_current_vis=""): - current_vis_spec = {} - numVC = len(vlist) #number of visualizations in the vis list - if (numVC==1): - current_vis_spec = vlist[0].render_VSpec() - elif (numVC>1): - pass - return current_vis_spec - - @staticmethod - def rec_to_JSON(recs): - rec_lst = [] - import copy - rec_copy = copy.deepcopy(recs) - for idx,rec in enumerate(rec_copy): - if (len(rec["collection"])>0): - rec["vspec"] = [] - for vis in rec["collection"]: - chart = vis.render_VSpec() - rec["vspec"].append(chart) - rec_lst.append(rec) - # delete DataObjectCollection since not JSON serializable - del rec_lst[idx]["collection"] - return rec_lst - - # Overridden Pandas Functions - def head(self, n: int = 5): - self._prev = self - self._history.append_event("head", n=5) - return super(LuxDataFrame, self).head(n) - - def tail(self, n: int = 5): - self._prev = self - self._history.append_event("tail", n=5) - return super(LuxDataFrame, self).tail(n) - - def info(self, *args, **kwargs): - self._pandas_only=True - self._history.append_event("info",*args, **kwargs) - return super(LuxDataFrame, self).info(*args, **kwargs) - - def describe(self, *args, **kwargs): - self._pandas_only=True - self._history.append_event("describe",*args, **kwargs) - return super(LuxDataFrame, self).describe(*args, **kwargs) + """ + A subclass of pd.DataFrame that supports all dataframe operations while housing other variables and functions for generating visual recommendations. + """ + + # MUST register here for new properties!! + _metadata = [ + "_intent", + "data_type_lookup", + "data_type", + "data_model_lookup", + "data_model", + "unique_values", + "cardinality", + "_rec_info", + "_pandas_only", + "_min_max", + "plot_config", + "_current_vis", + "_widget", + "_recommendation", + "_prev", + "_history", + "_saved_export", + ] + + def __init__(self, *args, **kw): + from lux.executor.PandasExecutor import PandasExecutor + + self._history = History() + self._intent = [] + self._recommendation = {} + self._saved_export = None + self._current_vis = [] + self._prev = None + super(LuxDataFrame, self).__init__(*args, **kw) + + self.executor_type = "Pandas" + self.executor = PandasExecutor() + self.SQLconnection = "" + self.table_name = "" + + self._sampled = None + self._default_pandas_display = True + self._toggle_pandas_display = True + self._plot_config = None + self._message = Message() + self._pandas_only = False + # Metadata + self.data_type_lookup = None + self.data_type = None + self.data_model_lookup = None + self.data_model = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + @property + def _constructor(self): + return LuxDataFrame + + # @property + # def _constructor_sliced(self): + # def f(*args, **kwargs): + # # adapted from https://github.com/pandas-dev/pandas/issues/13208#issuecomment-326556232 + # return LuxSeries(*args, **kwargs).__finalize__(self, method='inherit') + # return f + @property + def history(self): + return self._history + + def maintain_metadata(self): + if ( + not hasattr(self, "_metadata_fresh") or not self._metadata_fresh + ): # Check that metadata has not yet been computed + if ( + len(self) > 0 + ): # only compute metadata information if the dataframe is non-empty + self.executor.compute_stats(self) + self.executor.compute_dataset_metadata(self) + self._infer_structure() + self._metadata_fresh = True + + def expire_recs(self): + self._recs_fresh = False + self.recommendation = {} + self.current_vis = None + self._widget = None + self._rec_info = None + self._sampled = None + + def expire_metadata(self): + # Set metadata as null + self._metadata_fresh = False + self.data_type_lookup = None + self.data_type = None + self.data_model_lookup = None + self.data_model = None + self.unique_values = None + self.cardinality = None + self._min_max = None + self.pre_aggregated = None + + ##################### + ## Override Pandas ## + ##################### + def __getattr__(self, name): + ret_value = super(LuxDataFrame, self).__getattr__(name) + self.expire_metadata() + self.expire_recs() + return ret_value + + def _set_axis(self, axis, labels): + super(LuxDataFrame, self)._set_axis(axis, labels) + self.expire_metadata() + self.expire_recs() + + def _update_inplace(self, *args, **kwargs): + super(LuxDataFrame, self)._update_inplace(*args, **kwargs) + self.expire_metadata() + self.expire_recs() + + def _set_item(self, key, value): + super(LuxDataFrame, self)._set_item(key, value) + self.expire_metadata() + self.expire_recs() + + def _infer_structure(self): + # If the dataframe is very small and the index column is not a range index, then it is likely that this is an aggregated data + is_multi_index_flag = self.index.nlevels != 1 + not_int_index_flag = self.index.dtype != "int64" + small_df_flag = len(self) < 100 + self.pre_aggregated = ( + is_multi_index_flag or not_int_index_flag + ) and small_df_flag + if "Number of Records" in self.columns: + self.pre_aggregated = True + very_small_df_flag = len(self) <= 10 + if very_small_df_flag: + self.pre_aggregated = True + + def set_executor_type(self, exe): + if exe == "SQL": + import pkgutil + + if pkgutil.find_loader("psycopg2") is None: + raise ImportError( + "psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection." + ) + else: + import psycopg2 + from lux.executor.SQLExecutor import SQLExecutor + + self.executor = SQLExecutor + else: + from lux.executor.PandasExecutor import PandasExecutor + + self.executor = PandasExecutor() + self.executor_type = exe + + @property + def plot_config(self): + return self._plot_config + + @plot_config.setter + def plot_config(self, config_func: Callable): + """ + Modify plot aesthetic settings to all visualizations in the dataframe display + Currently only supported for Altair visualizations + Parameters + ---------- + config_func : Callable + A function that takes in an AltairChart (https://altair-viz.github.io/user_guide/generated/toplevel/altair.Chart.html) as input and returns an AltairChart as output + + Example + ---------- + Changing the color of marks and adding a title for all charts displayed for this dataframe + >>> df = pd.read_csv("lux/data/car.csv") + >>> def changeColorAddTitle(chart): + chart = chart.configure_mark(color="red") # change mark color to red + chart.title = "Custom Title" # add title to chart + return chart + >>> df.plot_config = changeColorAddTitle + >>> df + Change the opacity of all scatterplots displayed for this dataframe + >>> df = pd.read_csv("lux/data/olympic.csv") + >>> def changeOpacityScatterOnly(chart): + if chart.mark=='circle': + chart = chart.configure_mark(opacity=0.1) # lower opacity + return chart + >>> df.plot_config = changeOpacityScatterOnly + >>> df + """ + self._plot_config = config_func + self._recs_fresh = False + + def clear_plot_config(self): + self._plot_config = None + self._recs_fresh = False + + @property + def intent(self): + return self._intent + + @intent.setter + def intent(self, intent_input: Union[List[Union[str, Clause]], Vis]): + is_list_input = isinstance(intent_input, list) + is_vis_input = isinstance(intent_input, Vis) + if not (is_list_input or is_vis_input): + raise TypeError( + "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object." + "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html" + ) + if is_list_input: + self.set_intent(intent_input) + elif is_vis_input: + self.set_intent_as_vis(intent_input) + + def clear_intent(self): + self.intent = [] + + def set_intent(self, intent: List[Union[str, Clause]]): + """ + Main function to set the intent of the dataframe. + The intent input goes through the parser, so that the string inputs are parsed into a lux.Clause object. + + Parameters + ---------- + intent : List[str,Clause] + intent list, can be a mix of string shorthand or a lux.Clause object + + Notes + ----- + :doc:`../guide/clause` + """ + self.expire_recs() + self._intent = intent + self._parse_validate_compile_intent() + + def _parse_validate_compile_intent(self): + from lux.processor.Parser import Parser + from lux.processor.Validator import Validator + + self._intent = Parser.parse(self._intent) + Validator.validate_intent(self._intent, self) + self.maintain_metadata() + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + def copy_intent(self): + # creates a true copy of the dataframe's intent + output = [] + for clause in self._intent: + temp_clause = clause.copy_clause() + output.append(temp_clause) + return output + + def set_intent_as_vis(self, vis: Vis): + """ + Set intent of the dataframe as the Vis + + Parameters + ---------- + vis : Vis + """ + self.expire_recs() + self._intent = vis._inferred_intent + self._parse_validate_compile_intent() + + def to_pandas(self): + import lux.core + + return lux.core.originalDF(self, copy=False) + + @property + def recommendation(self): + return self._recommendation + + @recommendation.setter + def recommendation(self, recommendation: Dict): + self._recommendation = recommendation + + @property + def current_vis(self): + return self._current_vis + + @current_vis.setter + def current_vis(self, current_vis: Dict): + self._current_vis = current_vis + + def __repr__(self): + # TODO: _repr_ gets called from _repr_html, need to get rid of this call + return "" + + ####################################################### + ########## SQL Metadata, type, model schema ########### + ####################################################### + + def set_SQL_connection(self, connection, t_name): + self.SQLconnection = connection + self.table_name = t_name + self.compute_SQL_dataset_metadata() + self.set_executor_type("SQL") + + def compute_SQL_dataset_metadata(self): + self.get_SQL_attributes() + for attr in list(self.columns): + self[attr] = None + self.data_type_lookup = {} + self.data_type = {} + #####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this + ##### in the initialization and do it just once + self.compute_SQL_data_type() + self.compute_SQL_stats() + self.data_model_lookup = {} + self.data_model = {} + self.compute_data_model() + + def compute_SQL_stats(self): + # precompute statistics + self.unique_values = {} + self._min_max = {} + + self.get_SQL_unique_values() + # self.get_SQL_cardinality() + for attribute in self.columns: + if self.data_type_lookup[attribute] == "quantitative": + self._min_max[attribute] = ( + self[attribute].min(), + self[attribute].max(), + ) + + def get_SQL_attributes(self): + if "." in self.table_name: + table_name = self.table_name[self.table_name.index(".") + 1 :] + else: + table_name = self.table_name + attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format( + table_name + ) + attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) + for attr in attributes: + self[attr] = None + + def get_SQL_cardinality(self): + cardinality = {} + for attr in list(self.columns): + card_query = pd.read_sql( + "SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), + self.SQLconnection, + ) + cardinality[attr] = list(card_query["count"])[0] + self.cardinality = cardinality + + def get_SQL_unique_values(self): + unique_vals = {} + for attr in list(self.columns): + unique_query = pd.read_sql( + "SELECT Distinct({}) FROM {}".format(attr, self.table_name), + self.SQLconnection, + ) + unique_vals[attr] = list(unique_query[attr]) + self.unique_values = unique_vals + + def compute_SQL_data_type(self): + data_type_lookup = {} + sql_dtypes = {} + self.get_SQL_cardinality() + if "." in self.table_name: + table_name = self.table_name[self.table_name.index(".") + 1 :] + else: + table_name = self.table_name + # get the data types of the attributes in the SQL table + for attr in list(self.columns): + datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format( + table_name, attr + ) + datatype = list( + pd.read_sql(datatype_query, self.SQLconnection)["data_type"] + )[0] + sql_dtypes[attr] = datatype + + data_type = {"quantitative": [], "nominal": [], "temporal": []} + for attr in list(self.columns): + if str(attr).lower() in ["month", "year"]: + data_type_lookup[attr] = "temporal" + data_type["temporal"].append(attr) + elif sql_dtypes[attr] in [ + "character", + "character varying", + "boolean", + "uuid", + "text", + ]: + data_type_lookup[attr] = "nominal" + data_type["nominal"].append(attr) + elif sql_dtypes[attr] in [ + "integer", + "real", + "smallint", + "smallserial", + "serial", + ]: + if self.cardinality[attr] < 13: + data_type_lookup[attr] = "nominal" + data_type["nominal"].append(attr) + else: + data_type_lookup[attr] = "quantitative" + data_type["quantitative"].append(attr) + elif "time" in sql_dtypes[attr] or "date" in sql_dtypes[attr]: + data_type_lookup[attr] = "temporal" + data_type["temporal"].append(attr) + self.data_type_lookup = data_type_lookup + self.data_type = data_type + + def _append_rec(self, rec_infolist, recommendations: Dict): + if ( + recommendations["collection"] is not None + and len(recommendations["collection"]) > 0 + ): + rec_infolist.append(recommendations) + + def maintain_recs(self): + # `rec_df` is the dataframe to generate the recommendations on + # check to see if globally defined actions have been registered/removed + if lux.update_actions["flag"] == True: + self._recs_fresh = False + show_prev = False # flag indicating whether rec_df is showing previous df or current self + if self._prev is not None: + rec_df = self._prev + rec_df._message = Message() + rec_df.maintain_metadata() # the prev dataframe may not have been printed before + last_event = self.history._events[-1].name + rec_df._message.add( + f"Lux is visualizing the previous version of the dataframe before you applied {last_event}." + ) + show_prev = True + else: + rec_df = self + rec_df._message = Message() + # Add warning message if there exist ID fields + id_fields_str = "" + if len(rec_df.data_type["id"]) > 0: + for id_field in rec_df.data_type["id"]: + id_fields_str += f"{id_field}, " + id_fields_str = id_fields_str[:-2] + rec_df._message.add( + f"{id_fields_str} is not visualized since it resembles an ID field." + ) + rec_df._prev = None # reset _prev + + if ( + not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh + ): # Check that recs has not yet been computed + rec_infolist = [] + from lux.action.custom import custom + from lux.action.custom import custom_actions + from lux.action.correlation import correlation + from lux.action.univariate import univariate + from lux.action.enhance import enhance + from lux.action.filter import filter + from lux.action.generalize import generalize + from lux.action.row_group import row_group + from lux.action.column_group import column_group + + if rec_df.pre_aggregated: + if rec_df.columns.name is not None: + rec_df._append_rec(rec_infolist, row_group(rec_df)) + if rec_df.index.name is not None: + rec_df._append_rec(rec_infolist, column_group(rec_df)) + else: + if self.recommendation == {}: + # display conditions for default actions + no_vis = lambda ldf: (ldf.current_vis is None) or ( + ldf.current_vis is not None and len(ldf.current_vis) == 0 + ) + one_current_vis = ( + lambda ldf: ldf.current_vis is not None + and len(ldf.current_vis) == 1 + ) + multiple_current_vis = ( + lambda ldf: ldf.current_vis is not None + and len(ldf.current_vis) > 1 + ) + + # globally register default actions + lux.register_action("correlation", correlation, no_vis) + lux.register_action( + "distribution", univariate, no_vis, "quantitative" + ) + lux.register_action("occurrence", univariate, no_vis, "nominal") + lux.register_action("temporal", univariate, no_vis, "temporal") + + lux.register_action("enhance", enhance, one_current_vis) + lux.register_action("filter", filter, one_current_vis) + lux.register_action("generalize", generalize, one_current_vis) + + lux.register_action("custom", custom, multiple_current_vis) + + # generate vis from globally registered actions and append to dataframe + custom_action_collection = custom_actions(rec_df) + for rec in custom_action_collection: + rec_df._append_rec(rec_infolist, rec) + lux.update_actions["flag"] = False + + # Store _rec_info into a more user-friendly dictionary form + rec_df.recommendation = {} + for rec_info in rec_infolist: + action_type = rec_info["action"] + vlist = rec_info["collection"] + if rec_df._plot_config: + if rec_df.current_vis: + for vis in rec_df.current_vis: + vis._plot_config = rec_df.plot_config + for vis in vlist: + vis._plot_config = rec_df.plot_config + if len(vlist) > 0: + rec_df.recommendation[action_type] = vlist + rec_df._rec_info = rec_infolist + self._widget = rec_df.render_widget() + elif ( + show_prev + ): # re-render widget for the current dataframe if previous rec is not recomputed + self._widget = rec_df.render_widget() + self._recs_fresh = True + + ####################################################### + ############## LuxWidget Result Display ############### + ####################################################### + @property + def widget(self): + if self._widget: + return self._widget + + @property + def exported(self) -> Union[Dict[str, VisList], VisList]: + """ + Get selected visualizations as exported Vis List + + Notes + ----- + Convert the _selectedVisIdxs dictionary into a programmable VisList + Example _selectedVisIdxs : + {'Correlation': [0, 2], 'Occurrence': [1]} + indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. + + Returns + ------- + Union[Dict[str,VisList], VisList] + When there are no exported vis, return empty list -> [] + When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) + When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} + """ + if not hasattr(self, "_widget"): + warnings.warn( + "\nNo widget attached to the dataframe." + "Please assign dataframe to an output variable.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + exported_vis_lst = self._widget._selectedVisIdxs + exported_vis = [] + if exported_vis_lst == {}: + if self._saved_export: + return self._saved_export + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: + return self.current_vis + elif len(exported_vis_lst) > 1: + exported_vis = {} + if "currentVis" in exported_vis_lst: + exported_vis["Current Vis"] = self.current_vis + for export_action in exported_vis_lst: + if export_action != "currentVis": + exported_vis[export_action] = VisList( + list( + map( + self.recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + return exported_vis + elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): + export_action = list(exported_vis_lst.keys())[0] + exported_vis = VisList( + list( + map( + self.recommendation[export_action].__getitem__, + exported_vis_lst[export_action], + ) + ) + ) + self._saved_export = exported_vis + return exported_vis + else: + warnings.warn( + "\nNo visualization selected to export.\n" + "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", + stacklevel=2, + ) + return [] + + def remove_deleted_recs(self, change): + for action in self._widget.deletedIndices: + deletedSoFar = 0 + for index in self._widget.deletedIndices[action]: + self.recommendation[action].remove_index(index - deletedSoFar) + deletedSoFar += 1 + + def set_intent_on_click(self, change): + from IPython.display import display, clear_output + from lux.processor.Compiler import Compiler + + intent_action = list(self._widget.selectedIntentIndex.keys())[0] + vis = self.recommendation[intent_action][ + self._widget.selectedIntentIndex[intent_action][0] + ] + self.set_intent_as_vis(vis) + + self.maintain_metadata() + self.current_vis = Compiler.compile_intent(self, self._intent) + self.maintain_recs() + + with self.output: + clear_output() + display(self._widget) + + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") + + def _repr_html_(self): + from IPython.display import display + from IPython.display import clear_output + import ipywidgets as widgets + + try: + if self._pandas_only: + display(self.display_pandas()) + self._pandas_only = False + else: + if self.index.nlevels >= 2 or self.columns.nlevels >= 2: + warnings.warn( + "\nLux does not currently support dataframes " + "with hierarchical indexes.\n" + "Please convert the dataframe into a flat " + "table via `pandas.DataFrame.reset_index`.\n", + stacklevel=2, + ) + display(self.display_pandas()) + return + + if len(self) <= 0: + warnings.warn( + "\nLux can not operate on an empty dataframe.\nPlease check your input again.\n", + stacklevel=2, + ) + display(self.display_pandas()) + return + if len(self.columns) <= 1: + warnings.warn( + "\nLux defaults to Pandas when there is only a single column.", + stacklevel=2, + ) + display(self.display_pandas()) + return + self.maintain_metadata() + + if self._intent != [] and ( + not hasattr(self, "_compiled") or not self._compiled + ): + from lux.processor.Compiler import Compiler + + self.current_vis = Compiler.compile_intent(self, self._intent) + + if lux.config.default_display == "lux": + self._toggle_pandas_display = False + else: + self._toggle_pandas_display = True + + # df_to_display.maintain_recs() # compute the recommendations (TODO: This can be rendered in another thread in the background to populate self._widget) + self.maintain_recs() + + # Observers(callback_function, listen_to_this_variable) + self._widget.observe(self.remove_deleted_recs, names="deletedIndices") + self._widget.observe( + self.set_intent_on_click, names="selectedIntentIndex" + ) + + if len(self.recommendation) > 0: + # box = widgets.Box(layout=widgets.Layout(display='inline')) + button = widgets.Button( + description="Toggle Pandas/Lux", + layout=widgets.Layout(width="140px", top="5px"), + ) + self.output = widgets.Output() + # box.children = [button,output] + # output.children = [button] + # display(box) + display(button, self.output) + + def on_button_clicked(b): + with self.output: + if b: + self._toggle_pandas_display = ( + not self._toggle_pandas_display + ) + clear_output() + if self._toggle_pandas_display: + display(self.display_pandas()) + else: + # b.layout.display = "none" + display(self._widget) + # b.layout.display = "inline-block" + + button.on_click(on_button_clicked) + on_button_clicked(None) + else: + warnings.warn( + "\nLux defaults to Pandas when there are no valid actions defined.", + stacklevel=2, + ) + display(self.display_pandas()) + + except (KeyboardInterrupt, SystemExit): + raise + except: + warnings.warn( + "\nUnexpected error in rendering Lux widget and recommendations. " + "Falling back to Pandas display.\n\n" + "Please report this issue on Github: https://github.com/lux-org/lux/issues ", + stacklevel=2, + ) + display(self.display_pandas()) + + def display_pandas(self): + return self.to_pandas() + + def render_widget(self, renderer: str = "altair", input_current_vis=""): + """ + Generate a LuxWidget based on the LuxDataFrame + + Structure of widgetJSON: + { + 'current_vis': {}, + 'recommendation': [ + { + 'action': 'Correlation', + 'description': "some description", + 'vspec': [ + {Vega-Lite spec for vis 1}, + {Vega-Lite spec for vis 2}, + ... + ] + }, + ... repeat for other actions + ] + } + Parameters + ---------- + renderer : str, optional + Choice of visualization rendering library, by default "altair" + input_current_vis : lux.LuxDataFrame, optional + User-specified current vis to override default Current Vis, by default + """ + check_import_lux_widget() + import luxwidget + + widgetJSON = self.to_JSON(self._rec_info, input_current_vis=input_current_vis) + return luxwidget.LuxWidget( + currentVis=widgetJSON["current_vis"], + recommendations=widgetJSON["recommendation"], + intent=LuxDataFrame.intent_to_string(self._intent), + message=self._message.to_html(), + ) + + @staticmethod + def intent_to_JSON(intent): + from lux.utils import utils + + filter_specs = utils.get_filter_specs(intent) + attrs_specs = utils.get_attrs_specs(intent) + + intent = {} + intent["attributes"] = [clause.attribute for clause in attrs_specs] + intent["filters"] = [clause.attribute for clause in filter_specs] + return intent + + @staticmethod + def intent_to_string(intent): + if intent: + return ", ".join([clause.to_string() for clause in intent]) + else: + return "" + + def to_JSON(self, rec_infolist, input_current_vis=""): + widget_spec = {} + if self.current_vis: + self.executor.execute(self.current_vis, self) + widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON( + self.current_vis, input_current_vis + ) + else: + widget_spec["current_vis"] = {} + widget_spec["recommendation"] = [] + + # Recommended Collection + recCollection = LuxDataFrame.rec_to_JSON(rec_infolist) + widget_spec["recommendation"].extend(recCollection) + return widget_spec + + @staticmethod + def current_vis_to_JSON(vlist, input_current_vis=""): + current_vis_spec = {} + numVC = len(vlist) # number of visualizations in the vis list + if numVC == 1: + current_vis_spec = vlist[0].render_VSpec() + elif numVC > 1: + pass + return current_vis_spec + + @staticmethod + def rec_to_JSON(recs): + rec_lst = [] + import copy + + rec_copy = copy.deepcopy(recs) + for idx, rec in enumerate(rec_copy): + if len(rec["collection"]) > 0: + rec["vspec"] = [] + for vis in rec["collection"]: + chart = vis.render_VSpec() + rec["vspec"].append(chart) + rec_lst.append(rec) + # delete DataObjectCollection since not JSON serializable + del rec_lst[idx]["collection"] + return rec_lst + + # Overridden Pandas Functions + def head(self, n: int = 5): + self._prev = self + self._history.append_event("head", n=5) + return super(LuxDataFrame, self).head(n) + + def tail(self, n: int = 5): + self._prev = self + self._history.append_event("tail", n=5) + return super(LuxDataFrame, self).tail(n) + + def info(self, *args, **kwargs): + self._pandas_only = True + self._history.append_event("info", *args, **kwargs) + return super(LuxDataFrame, self).info(*args, **kwargs) + + def describe(self, *args, **kwargs): + self._pandas_only = True + self._history.append_event("describe", *args, **kwargs) + return super(LuxDataFrame, self).describe(*args, **kwargs) diff --git a/lux/vislib/altair/AltairRenderer.py b/lux/vislib/altair/AltairRenderer.py index 3068d286..2692f72e 100644 --- a/lux/vislib/altair/AltairRenderer.py +++ b/lux/vislib/altair/AltairRenderer.py @@ -80,9 +80,9 @@ def create_vis(self, vis, standalone=True): chart = None if chart: + if vis.plot_config: + chart.chart = vis.plot_config(chart.chart) if self.output_type == "VegaLite": - if vis.plot_config: - chart.chart = vis.plot_config(chart.chart) chart_dict = chart.chart.to_dict() # this is a bit of a work around because altair must take a pandas dataframe and we can only generate a luxDataFrame # chart["data"] = { "values": vis.data.to_dict(orient='records') } @@ -92,20 +92,12 @@ def create_vis(self, vis, standalone=True): elif self.output_type == "Altair": import inspect - if (chart): - if (vis.plot_config): chart.chart = vis.plot_config(chart.chart) - if (self.output_type=="VegaLite"): - chart_dict = chart.chart.to_dict() - # this is a bit of a work around because altair must take a pandas dataframe and we can only generate a luxDataFrame - # chart["data"] = { "values": vis.data.to_dict(orient='records') } - # chart_dict["width"] = 160 - # chart_dict["height"] = 150 - return chart_dict - elif (self.output_type=="Altair"): - import inspect - if (vis.plot_config): chart.code +='\n'.join(inspect.getsource(vis.plot_config).split('\n ')[1:-1]) - chart.code +="\nchart" - chart.code = chart.code.replace('\n\t\t','\n') + if vis.plot_config: + chart.code += "\n".join( + inspect.getsource(vis.plot_config).split("\n ")[1:-1] + ) + chart.code += "\nchart" + chart.code = chart.code.replace("\n\t\t", "\n") var = vis._source if var is not None: diff --git a/tests/test_config.py b/tests/test_config.py index 09fb1522..adfd2655 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -143,39 +143,44 @@ def test_remove_invalid_action(): def test_remove_default_actions(): - df = pd.read_csv("lux/data/car.csv") - df._repr_html_() + df = pd.read_csv("lux/data/car.csv") + df._repr_html_() - lux.remove_action("Distribution") - df._repr_html_() - assert("Distribution" not in df.recommendation) + lux.remove_action("Distribution") + df._repr_html_() + assert "Distribution" not in df.recommendation - lux.remove_action("Occurrence") - df._repr_html_() - assert("Occurrence" not in df.recommendation) + lux.remove_action("Occurrence") + df._repr_html_() + assert "Occurrence" not in df.recommendation - lux.remove_action("Temporal") - df._repr_html_() - assert("Temporal" not in df.recommendation) + lux.remove_action("Temporal") + df._repr_html_() + assert "Temporal" not in df.recommendation - lux.remove_action("Correlation") - df._repr_html_() - assert("Correlation" not in df.recommendation) + lux.remove_action("Correlation") + df._repr_html_() + assert "Correlation" not in df.recommendation + + assert ( + len(df.recommendation) == 0, + "Default actions should not be rendered after it has been removed.", + ) - assert(len(df.recommendation) == 0, - "Default actions should not be rendered after it has been removed.") + df = register_new_action() + df.set_intent(["Acceleration", "Horsepower"]) + df._repr_html_() + assert ( + "bars" in df.recommendation, + "Bars should be rendered after it has been registered with correct intent.", + ) + assert len(df.recommendation["bars"]) > 0 - df = register_new_action() - df.set_intent(["Acceleration", "Horsepower"]) - df._repr_html_() - assert("bars" in df.recommendation, - "Bars should be rendered after it has been registered with correct intent.") - assert(len(df.recommendation["bars"]) > 0) -# TODO: This test does not pass in pytest but is working in Jupyter notebook. +# TODO: This test does not pass in pytest but is working in Jupyter notebook. # def test_plot_setting(): # df = pd.read_csv("lux/data/car.csv") -# df["Year"] = pd.to_datetime(df["Year"], format='%Y') +# df["Year"] = pd.to_datetime(df["Year"], format='%Y') # def change_color_add_title(chart): # chart = chart.configure_mark(color="green") # change mark color to green # chart.title = "Custom Title" # add title to chart From 4b51dd47a6e0104c3947e38576737659e80ad94a Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sun, 1 Nov 2020 18:13:18 -0800 Subject: [PATCH 08/22] remove dev dependencies --- requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3370b7c0..b23c6009 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,8 +2,6 @@ scipy>=1.3.3 altair>=4.0.0 pandas>=1.1.0 scikit-learn>=0.22 -Sphinx>=3.0.2 -sphinx-rtd-theme>=0.4.3 # Install only to use SQLExecutor # psycopg2>=2.8.5 # psycopg2-binary>=2.8.5 From 700a0bc5f3378b80cfd6197d959f4eb34c388c3a Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Tue, 10 Nov 2020 23:09:38 -0800 Subject: [PATCH 09/22] first pass -- inline comments --- lux/action/column_group.py | 6 +- lux/action/correlation.py | 5 +- lux/action/enhance.py | 5 +- lux/action/filter.py | 4 +- lux/action/univariate.py | 10 ++-- lux/core/frame.py | 10 ++-- lux/executor/PandasExecutor.py | 12 ++-- lux/processor/Compiler.py | 63 +++++++++------------ lux/processor/Parser.py | 10 ++-- lux/processor/Validator.py | 5 +- lux/utils/date_utils.py | 18 +++--- lux/utils/utils.py | 5 +- lux/vis/Vis.py | 12 +--- lux/vis/VisList.py | 26 ++------- lux/vislib/altair/BarChart.py | 10 ++-- lux/vislib/altair/Heatmap.py | 5 +- lux/vislib/altair/ScatterChart.py | 5 +- tests/test_compiler.py | 35 +++++------- tests/test_dates.py | 9 +-- tests/test_error_warning.py | 3 - tests/test_executor.py | 18 +++--- tests/test_interestingness.py | 23 -------- tests/test_maintainence.py | 11 +--- tests/test_pandas_coverage.py | 91 +------------------------------ tests/test_vis.py | 10 ++-- 25 files changed, 111 insertions(+), 300 deletions(-) diff --git a/lux/action/column_group.py b/lux/action/column_group.py index 710cea95..049da68a 100644 --- a/lux/action/column_group.py +++ b/lux/action/column_group.py @@ -31,9 +31,9 @@ def column_group(ldf): ldf_flat = ldf if isinstance(ldf.columns, pd.DatetimeIndex): ldf_flat.columns = ldf_flat.columns.format() - ldf_flat = ( - ldf_flat.reset_index() - ) # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis + + # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis + ldf_flat = ldf_flat.reset_index() if ldf.index.nlevels == 1: if ldf.index.name: index_column_name = ldf.index.name diff --git a/lux/action/correlation.py b/lux/action/correlation.py index 5d51ba01..1a999e48 100644 --- a/lux/action/correlation.py +++ b/lux/action/correlation.py @@ -53,9 +53,8 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): "description": "Show relationships between two

quantitative

attributes.", } ignore_rec_flag = False - if ( - len(ldf) < 5 - ): # Doesn't make sense to compute correlation if less than 4 data values + # Doesn't make sense to compute correlation if less than 4 data values + if len(ldf) < 5: ignore_rec_flag = True # Then use the data populated in the vis list to compute score for vis in vlist: diff --git a/lux/action/enhance.py b/lux/action/enhance.py index ffdc2423..fb889b11 100644 --- a/lux/action/enhance.py +++ b/lux/action/enhance.py @@ -53,9 +53,8 @@ def enhance(ldf): "action": "Enhance", "description": f"Further breaking down current {intended_attrs} intent by additional attribute.", } - elif ( - len(attr_specs) > 2 - ): # if there are too many column attributes, return don't generate Enhance recommendations + # if there are too many column attributes, return don't generate Enhance recommendations + elif len(attr_specs) > 2: recommendation = {"action": "Enhance"} recommendation["collection"] = [] return recommendation diff --git a/lux/action/filter.py b/lux/action/filter.py index f0972722..0f2c6037 100644 --- a/lux/action/filter.py +++ b/lux/action/filter.py @@ -86,8 +86,8 @@ def get_complementary_ops(fltr_op): new_spec.append(new_filter) temp_vis = Vis(new_spec, score=1) output.append(temp_vis) - - else: # if no existing filters, create filters using unique values from all categorical variables in the dataset + # if no existing filters, create filters using unique values from all categorical variables in the dataset + else: intended_attrs = ", ".join( [ clause.attribute diff --git a/lux/action/univariate.py b/lux/action/univariate.py index 4eb0157e..8f8cd1ac 100644 --- a/lux/action/univariate.py +++ b/lux/action/univariate.py @@ -58,9 +58,8 @@ def univariate(ldf, *args): "action": "Distribution", "description": "Show univariate histograms of

quantitative

attributes.", } - if ( - len(ldf) < 5 - ): # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) + # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated) + if len(ldf) < 5: ignore_rec_flag = True elif data_type_constraint == "nominal": intent = [lux.Clause("?", data_type="nominal")] @@ -76,9 +75,8 @@ def univariate(ldf, *args): "action": "Temporal", "description": "Show trends over

time-related

attributes.", } - if ( - len(ldf) < 3 - ): # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) + # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated) + if len(ldf) < 3: ignore_rec_flag = True if ignore_rec_flag: recommendation["collection"] = [] diff --git a/lux/core/frame.py b/lux/core/frame.py index 6e45621b..1dfa0d04 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -482,9 +482,8 @@ def maintain_recs(self): ) rec_df._prev = None # reset _prev - if ( - not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh - ): # Check that recs has not yet been computed + # Check that recs has not yet been computed + if not hasattr(rec_df, "_recs_fresh") or not rec_df._recs_fresh: rec_infolist = [] from lux.action.custom import custom from lux.action.custom import custom_actions @@ -550,9 +549,8 @@ def maintain_recs(self): rec_df.recommendation[action_type] = vlist rec_df._rec_info = rec_infolist self._widget = rec_df.render_widget() - elif ( - show_prev - ): # re-render widget for the current dataframe if previous rec is not recomputed + # re-render widget for the current dataframe if previous rec is not recomputed + elif show_prev: self._widget = rec_df.render_widget() self._recs_fresh = True diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 64fa2e54..d168cdd9 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -80,9 +80,8 @@ def execute(vislist: VisList, ldf: LuxDataFrame): """ PandasExecutor.execute_sampling(ldf) for vis in vislist: - vis._vis_data = ( - ldf._sampled - ) # The vis data starts off being original or sampled dataframe + # The vis data starts off being original or sampled dataframe + vis._vis_data = ldf._sampled filter_executed = PandasExecutor.execute_filter(vis) # Select relevant data based on attribute information attributes = set([]) @@ -220,9 +219,10 @@ def execute_aggregate(vis: Vis, isFiltered=True): ) == N_unique_vals * len( color_attr_vals ), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute, color_attr.attribute}`." - vis._vis_data = vis.data.iloc[ - :, :3 - ] # Keep only the three relevant columns not the *_right columns resulting from merge + + # Keep only the three relevant columns not the *_right columns resulting from merge + vis._vis_data = vis.data.iloc[:, :3] + else: df = pd.DataFrame({columns[0]: attr_unique_vals}) diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index 0635f2de..cf04e741 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -37,16 +37,13 @@ def __repr__(self): @staticmethod def compile_vis(ldf: LuxDataFrame, vis: Vis) -> VisList: if vis: - vis_collection = Compiler.populate_data_type_model( - ldf, [vis] - ) # autofill data type/model information - vis_collection = Compiler.remove_all_invalid( - vis_collection - ) # remove invalid visualizations from collection + # autofill data type/model information + vis_collection = Compiler.populate_data_type_model(ldf, [vis]) + # remove invalid visualizations from collection + vis_collection = Compiler.remove_all_invalid(vis_collection) for vis in vis_collection: - Compiler.determine_encoding( - ldf, vis - ) # autofill viz related information + # autofill viz related information + Compiler.determine_encoding(ldf, vis) ldf._compiled = True return vis_collection @@ -72,17 +69,14 @@ def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList """ if _inferred_intent: vis_collection = Compiler.enumerate_collection(_inferred_intent, ldf) - vis_collection = Compiler.populate_data_type_model( - ldf, vis_collection - ) # autofill data type/model information + # autofill data type/model information + vis_collection = Compiler.populate_data_type_model(ldf, vis_collection) + # remove invalid visualizations from collection if len(vis_collection) >= 1: - vis_collection = Compiler.remove_all_invalid( - vis_collection - ) # remove invalid visualizations from collection + vis_collection = Compiler.remove_all_invalid(vis_collection) for vis in vis_collection: - Compiler.determine_encoding( - ldf, vis - ) # autofill viz related information + # autofill viz related information + Compiler.determine_encoding(ldf, vis) ldf._compiled = True return vis_collection @@ -121,9 +115,8 @@ def combine(col_attrs, accum): for i in range(n): column_list = copy.deepcopy(accum + [col_attrs[0][i]]) if last: - if ( - len(filters) > 0 - ): # if we have filters, generate combinations for each row. + # if we have filters, generate combinations for each row. + if len(filters) > 0: for row in filters: _inferred_intent = copy.deepcopy(column_list + [row]) vis = Vis(_inferred_intent) @@ -164,9 +157,8 @@ def populate_data_type_model(ldf, vis_collection) -> VisList: if clause.description == "?": clause.description = "" # TODO: Note that "and not is_datetime_string(clause.attribute))" is a temporary hack and breaks the `test_row_column_group` example - if ( - clause.attribute != "" and clause.attribute != "Record" - ): # and not is_datetime_string(clause.attribute): + # and not is_datetime_string(clause.attribute): + if clause.attribute != "" and clause.attribute != "Record": if clause.data_type == "": clause.data_type = ldf.data_type_lookup[clause.attribute] if clause.data_type == "id": @@ -174,9 +166,8 @@ def populate_data_type_model(ldf, vis_collection) -> VisList: if clause.data_model == "": clause.data_model = ldf.data_model_lookup[clause.attribute] if clause.value != "": - if ( - vis.title == "" - ): # If user provided title for Vis, then don't override. + # If user provided title for Vis, then don't override. + if vis.title == "": if isinstance(clause.value, np.datetime64): chart_title = date_utils.date_formatter(clause.value, ldf) else: @@ -303,10 +294,9 @@ def line_or_bar(ldf, dimension: Clause, measure: Clause): dimension = d2 color_attr = d1 else: + # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one if d1.attribute == d2.attribute: - vis._inferred_intent.pop( - 0 - ) # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one + vis._inferred_intent.pop(0) else: vis.remove_column_from_spec(d2.attribute) dimension = d1 @@ -380,12 +370,10 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): ValueError Ensures no more than one attribute is placed in the same channel. """ - result_dict = ( - {} - ) # result of enforcing specified channel will be stored in result_dict - specified_dict = ( - {} - ) # specified_dict={"x":[],"y":[list of Dobj with y specified as channel]} + # result of enforcing specified channel will be stored in result_dict + result_dict = {} + # specified_dict={"x":[],"y":[list of Dobj with y specified as channel]} + specified_dict = {} # create a dictionary of specified channels in the given dobj for val in auto_channel.keys(): specified_dict[val] = vis.get_attr_by_channel(val) @@ -395,9 +383,10 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): if len(sAttr) == 1: # if specified in dobj # remove the specified channel from auto_channel (matching by value, since channel key may not be same) for i in list(auto_channel.keys()): + # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name) if (auto_channel[i].attribute == sAttr[0].attribute) and ( auto_channel[i].channel == sVal - ): # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name) + ): auto_channel.pop(i) break sAttr[0].channel = sVal diff --git a/lux/processor/Parser.py b/lux/processor/Parser.py index c1852021..2e205704 100644 --- a/lux/processor/Parser.py +++ b/lux/processor/Parser.py @@ -54,9 +54,8 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]: if isinstance(clause, list): valid_values = [] for v in clause: - if ( - type(v) is str - ): # and v in list(ldf.columns): #TODO: Move validation check to Validator + # and v in list(ldf.columns): #TODO: Move validation check to Validator + if type(v) is str: valid_values.append(v) temp_spec = Clause(attribute=valid_values) new_context.append(temp_spec) @@ -95,9 +94,8 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]: if clause.description: # TODO: Move validation check to Validator # if ((clause.description in list(ldf.columns)) or clause.description == "?"):# if clause.description in the list of attributes - if any( - ext in [">", "<", "=", "!="] for ext in clause.description - ): # clause.description contain ">","<". or "=" + # clause.description contain ">","<". or "=" + if any(ext in [">", "<", "=", "!="] for ext in clause.description): # then parse it and assign to clause.attribute, clause.filter_op, clause.values clause.filter_op = re.findall( r"/.*/|>|=|<|>=|<=|!=", clause.description diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py index 688a5f05..f01e7d42 100644 --- a/lux/processor/Validator.py +++ b/lux/processor/Validator.py @@ -85,9 +85,8 @@ def validate_clause(clause): else: vals = [clause.value] for val in vals: - if ( - val not in series.values - ): # (not series.str.contains(val).any()): + # (not series.str.contains(val).any()): + if val not in series.values: warnings.warn( f"The input value '{val}' does not exist for the attribute '{clause.attribute}' for the DataFrame." ) diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index eb067ea6..817e1ea8 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -40,9 +40,9 @@ def date_formatter(time_stamp, ldf): """ datetime = pd.to_datetime(time_stamp) if ldf.data_type["temporal"]: - date_column = ldf[ - ldf.data_type["temporal"][0] - ] # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future + # assumes only one temporal column, may need to change this function to recieve multiple temporal columns in the future + date_column = ldf[ldf.data_type["temporal"][0]] + granularity = compute_date_granularity(date_column) date_str = "" if granularity == "year": @@ -80,16 +80,12 @@ def compute_date_granularity(date_column: pd.core.series.Series): field: str A str specifying the granularity of dates for the inspected temporal column """ - date_fields = [ - "day", - "month", - "year", - ] # supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html) + # supporting a limited set of Vega-Lite TimeUnit (https://vega.github.io/vega-lite/docs/timeunit.html) + date_fields = ["day", "month", "year"] date_index = pd.DatetimeIndex(date_column) for field in date_fields: - if ( - hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1 - ): # can be changed to sum(getattr(date_index, field)) != 0 + # can be changed to sum(getattr(date_index, field)) != 0 + if hasattr(date_index, field) and len(getattr(date_index, field).unique()) != 1: return field return "year" # if none, then return year by default diff --git a/lux/utils/utils.py b/lux/utils/utils.py index 148509db..0c246597 100644 --- a/lux/utils/utils.py +++ b/lux/utils/utils.py @@ -69,9 +69,8 @@ def check_if_id_like(df, attribute): import re # Strong signals - high_cardinality = ( - df.cardinality[attribute] > 500 - ) # so that aggregated reset_index fields don't get misclassified + # so that aggregated reset_index fields don't get misclassified + high_cardinality = df.cardinality[attribute] > 500 attribute_contain_id = re.search(r"id", str(attribute)) is not None almost_all_vals_unique = df.cardinality[attribute] >= 0.98 * len(df) is_string = pd.api.types.is_string_dtype(df[attribute]) diff --git a/lux/vis/Vis.py b/lux/vis/Vis.py index ca73346c..a7883068 100644 --- a/lux/vis/Vis.py +++ b/lux/vis/Vis.py @@ -322,15 +322,12 @@ def refresh_source(self, ldf): # -> Vis: from lux.processor.Parser import Parser from lux.processor.Validator import Validator from lux.processor.Compiler import Compiler - from lux.executor.PandasExecutor import ( - PandasExecutor, - ) # TODO: temporary (generalize to executor) + from lux.executor.PandasExecutor import PandasExecutor + + # TODO: temporary (generalize to executor) -<<<<<<< HEAD -======= self.check_not_vislist_intent() ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 ldf.maintain_metadata() self._source = ldf self._inferred_intent = Parser.parse(self._intent) @@ -345,8 +342,6 @@ def refresh_source(self, ldf): # -> Vis: self._inferred_intent = vis._inferred_intent self._vis_data = vis.data self._min_max = vis._min_max -<<<<<<< HEAD -======= def check_not_vislist_intent(self): if len(self._intent) > 2 or "?" in self._intent: @@ -359,4 +354,3 @@ def check_not_vislist_intent(self): "The intent that you specified corresponds to more than one visualization. Please replace the Vis constructor with VisList to generate a list of visualizations. " + "For more information, see: https://lux-api.readthedocs.io/en/latest/source/guide/vis.html#working-with-collections-of-visualization-with-vislist" ) ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py index 0f15252a..5fcfbd4d 100644 --- a/lux/vis/VisList.py +++ b/lux/vis/VisList.py @@ -63,19 +63,6 @@ def set_intent(self, intent: List[Clause]) -> None: @property def exported(self) -> VisList: """ -<<<<<<< HEAD - Get selected visualizations as exported Vis List - Notes - ----- - Convert the _selectedVisIdxs dictionary into a programmable VisList - Example _selectedVisIdxs : - {'Vis List': [0, 2]} - - Returns - ------- - VisList - return a VisList of selected visualizations. -> VisList(v1, v2...) -======= Get selected visualizations as exported Vis List Notes @@ -88,7 +75,6 @@ def exported(self) -> VisList: ------- VisList return a VisList of selected visualizations. -> VisList(v1, v2...) ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 """ if not hasattr(self, "widget"): warnings.warn( @@ -143,9 +129,8 @@ def __repr__(self): y_channel = "" largest_mark = 0 largest_filter = 0 - for ( - vis - ) in self._collection: # finds longest x attribute among all visualizations + # finds longest x attribute among all visualizations + for vis in self._collection: filter_intents = None for clause in vis._inferred_intent: if clause.value != "": @@ -177,11 +162,8 @@ def __repr__(self): vis_repr = [] largest_x_length = len(x_channel) largest_y_length = len(y_channel) - for ( - vis - ) in ( - self._collection - ): # pads the shorter visualizations with spaces before the y attribute + # pads the shorter visualizations with spaces before the y attribute + for vis in self._collection: filter_intents = None x_channel = "" y_channel = "" diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py index 561a23d4..5b7ecb57 100644 --- a/lux/vislib/altair/BarChart.py +++ b/lux/vislib/altair/BarChart.py @@ -120,14 +120,12 @@ def add_text(self): self.chart = self.chart + self.text self.code += self._topkcode - def encode_color( - self, - ): # override encode_color in AltairChart to enforce add_text occurs afterwards + # override encode_color in AltairChart to enforce add_text occurs afterwards + def encode_color(self): AltairChart.encode_color(self) self.add_text() - self.chart = self.chart.configure_mark( - tooltip=alt.TooltipContent("encoding") - ) # Setting tooltip as non-null + # Setting tooltip as non-null + self.chart = self.chart.configure_mark(tooltip=alt.TooltipContent("encoding")) self.code += ( f"""chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding'))""" ) diff --git a/lux/vislib/altair/Heatmap.py b/lux/vislib/altair/Heatmap.py index 56ae7276..87c97b13 100644 --- a/lux/vislib/altair/Heatmap.py +++ b/lux/vislib/altair/Heatmap.py @@ -66,9 +66,8 @@ def initialize_chart(self): ) ) chart = chart.configure_scale(minOpacity=0.1, maxOpacity=1) - chart = chart.configure_mark( - tooltip=alt.TooltipContent("encoding") - ) # Setting tooltip as non-null + # Setting tooltip as non-null + chart = chart.configure_mark(tooltip=alt.TooltipContent("encoding")) chart = chart.interactive() # Enable Zooming and Panning #################################### diff --git a/lux/vislib/altair/ScatterChart.py b/lux/vislib/altair/ScatterChart.py index a6463041..583291d0 100644 --- a/lux/vislib/altair/ScatterChart.py +++ b/lux/vislib/altair/ScatterChart.py @@ -59,9 +59,8 @@ def initialize_chart(self): ), ) ) - chart = chart.configure_mark( - tooltip=alt.TooltipContent("encoding") - ) # Setting tooltip as non-null + # Setting tooltip as non-null + chart = chart.configure_mark(tooltip=alt.TooltipContent("encoding")) chart = chart.interactive() # Enable Zooming and Panning ##################################### diff --git a/tests/test_compiler.py b/tests/test_compiler.py index e2079ab6..760c742c 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -156,9 +156,8 @@ def test_sort_bar(): def test_specified_vis_collection(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vlst = VisList( [ @@ -188,9 +187,8 @@ def test_specified_vis_collection(): def test_specified_channel_enforced_vis_collection(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") visList = VisList( [lux.Clause(attribute="?"), lux.Clause(attribute="MilesPerGal", channel="x")], df, @@ -202,9 +200,8 @@ def test_specified_channel_enforced_vis_collection(): def test_autoencoding_scatter(): # No channel specified df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="MilesPerGal"), lux.Clause(attribute="Weight")], df) check_attribute_on_channel(vis, "MilesPerGal", "x") check_attribute_on_channel(vis, "Weight", "y") @@ -244,9 +241,8 @@ def test_autoencoding_scatter(): def test_autoencoding_histogram(): # No channel specified df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="MilesPerGal", channel="y")], df) check_attribute_on_channel(vis, "MilesPerGal", "y") @@ -257,9 +253,8 @@ def test_autoencoding_histogram(): def test_autoencoding_line_chart(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="Year"), lux.Clause(attribute="Acceleration")], df) check_attribute_on_channel(vis, "Year", "x") check_attribute_on_channel(vis, "Acceleration", "y") @@ -298,9 +293,8 @@ def test_autoencoding_line_chart(): def test_autoencoding_color_line_chart(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ lux.Clause(attribute="Year"), lux.Clause(attribute="Acceleration"), @@ -314,9 +308,8 @@ def test_autoencoding_color_line_chart(): def test_autoencoding_color_scatter_chart(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis( [ lux.Clause(attribute="Horsepower"), diff --git a/tests/test_dates.py b/tests/test_dates.py index 28cdcc3a..4b87f7a6 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -22,9 +22,8 @@ def test_dateformatter(): ldf = pd.read_csv("lux/data/car.csv") - ldf["Year"] = pd.to_datetime( - ldf["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + ldf["Year"] = pd.to_datetime(ldf["Year"], format="%Y") timestamp = np.datetime64("2019-08-26") ldf.maintain_metadata() assert date_utils.date_formatter(timestamp, ldf) == "2019" @@ -93,11 +92,7 @@ def test_period_to_altair(): exported_code = df.recommendation["Filter"][2].to_Altair() -<<<<<<< HEAD - assert "Year = 1971" in exported_code -======= assert "Year = 1972" in exported_code ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 def test_refresh_inplace(): diff --git a/tests/test_error_warning.py b/tests/test_error_warning.py index 238b7549..d5fe49ff 100644 --- a/tests/test_error_warning.py +++ b/tests/test_error_warning.py @@ -36,8 +36,6 @@ def test_bad_filter(): df[df["Region"] == "asdfgh"]._repr_html_() -<<<<<<< HEAD -======= def test_multi_vis(): df = pd.read_csv("lux/data/college.csv") with pytest.raises( @@ -61,7 +59,6 @@ def test_multi_vis(): )._repr_html_() ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 # Test Properties with Private Variables Readable but not Writable def test_vis_private_properties(): from lux.vis.Vis import Vis diff --git a/tests/test_executor.py b/tests/test_executor.py index 2dababb0..d1a18a6b 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -35,9 +35,8 @@ def test_lazy_execution(): def test_selection(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ lux.Clause(attribute=["Horsepower", "Weight", "Acceleration"]), lux.Clause(attribute="Year"), @@ -102,9 +101,8 @@ def test_colored_line_chart(): from lux.vis.Vis import Clause df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") x_clause = Clause(attribute="Year", channel="x") y_clause = Clause(attribute="MilesPerGal", channel="y") @@ -116,19 +114,19 @@ def test_colored_line_chart(): color_cardinality = len(df.unique_values["Cylinders"]) group_by_cardinality = len(df.unique_values["Year"]) assert len(new_vis.data.columns) == 3 + # Not color_cardinality*group_by_cardinality since some combinations have 0 values assert ( len(new_vis.data) == 60 > group_by_cardinality < color_cardinality * group_by_cardinality - ) # Not color_cardinality*group_by_cardinality since some combinations have 0 values + ) def test_filter(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Year"), diff --git a/tests/test_interestingness.py b/tests/test_interestingness.py index 48d04961..a42766ce 100644 --- a/tests/test_interestingness.py +++ b/tests/test_interestingness.py @@ -51,15 +51,9 @@ def test_interestingness_1_0_0(): if int(vis._inferred_intent[2].value) == 8: rank1 = f if int(vis._inferred_intent[2].value) == 6: -<<<<<<< HEAD - rank2 = f - if "1972" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): - rank3 = f -======= rank3 = f if "ford" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): rank2 = f ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 @@ -118,11 +112,7 @@ def test_interestingness_0_1_0(): rank1 = f if str(df.recommendation["Filter"][f]._inferred_intent[2].value) == "Europe": rank2 = f -<<<<<<< HEAD - if "1971" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): -======= if "1970" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 @@ -161,14 +151,7 @@ def test_interestingness_1_1_0(): if len(vis.get_attr_by_attr_name("Cylinders")) > 0: if int(vis._inferred_intent[2].value) == 6: rank1 = f -<<<<<<< HEAD - if int(vis._inferred_intent[2].value) == 5: - rank3 = f - if len(vis.get_attr_by_attr_name("Origin")) > 0: - if str(vis._inferred_intent[2].value) == "Europe": -======= if int(vis._inferred_intent[2].value) == 8: ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 rank2 = f if len(vis.get_attr_by_attr_name("Origin")) > 0: if str(vis._inferred_intent[2].value) == "Europe": @@ -278,15 +261,9 @@ def test_interestingness_0_2_0(): for f in range(0, len(df.recommendation["Filter"])): if "1973" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): rank1 = f -<<<<<<< HEAD - if "1976" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): - rank2 = f - if str(df.recommendation["Filter"][f]._inferred_intent[2].value) == "Europe": -======= if "ford" in str(df.recommendation["Filter"][f]._inferred_intent[2].value): rank2 = f if str(df.recommendation["Filter"][f]._inferred_intent[2].value) == "USA": ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 diff --git a/tests/test_maintainence.py b/tests/test_maintainence.py index adb3697e..35f4ec71 100644 --- a/tests/test_maintainence.py +++ b/tests/test_maintainence.py @@ -62,7 +62,8 @@ def test_metadata_column_group_reset_df(): assert hasattr(df, "_metadata_fresh") result = df.groupby("Cylinders").mean() assert not hasattr(result, "_metadata_fresh") - result._repr_html_() # Note that this should trigger two compute metadata (one for df, and one for an intermediate df.reset_index used to feed inside created Vis) + # Note that this should trigger two compute metadata (one for df, and one for an intermediate df.reset_index used to feed inside created Vis) + result._repr_html_() assert ( result._metadata_fresh == True ), "Failed to maintain metadata after display df" @@ -77,20 +78,12 @@ def test_recs_inplace_operation(): df = pd.read_csv("lux/data/car.csv") df._repr_html_() assert df._recs_fresh == True, "Failed to maintain recommendation after display df" -<<<<<<< HEAD - assert len(df.recommendation["Occurrence"]) == 3 -======= assert len(df.recommendation["Occurrence"]) == 4 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df.drop(columns=["Name"], inplace=True) assert "Name" not in df.columns, "Failed to perform `drop` operation in-place" assert ( df._recs_fresh == False ), "Failed to maintain recommendation after in-place Pandas operation" df._repr_html_() -<<<<<<< HEAD - assert len(df.recommendation["Occurrence"]) == 2 -======= assert len(df.recommendation["Occurrence"]) == 3 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 assert df._recs_fresh == True, "Failed to maintain recommendation after display df" diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index 561d086a..ad5008de 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -38,10 +38,8 @@ def test_rename_inplace(): df.rename(columns={"Name": "Car Name"}, inplace=True) df._repr_html_() new_df._repr_html_() - new_df, df = ( - df, - new_df, - ) # new_df is the old dataframe (df) with the new column name changed inplace + # new_df is the old dataframe (df) with the new column name changed inplace + new_df, df = df, new_df assert df.data_type_lookup != new_df.data_type_lookup @@ -111,10 +109,7 @@ def test_rename3(): "col7", "col8", "col9", -<<<<<<< HEAD -======= "col10", ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 ] df._repr_html_() assert list(df.recommendation.keys()) == [ @@ -123,11 +118,7 @@ def test_rename3(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(df.cardinality) == 9 -======= assert len(df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 assert "col2" in list(df.cardinality.keys()) @@ -202,13 +193,8 @@ def test_query(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df.cardinality) == 9 -======= assert len(new_df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_pop(): df = pd.read_csv("lux/data/car.csv") @@ -221,13 +207,8 @@ def test_pop(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(df.cardinality) == 8 -======= assert len(df.cardinality) == 9 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_transform(): df = pd.read_csv("lux/data/car.csv") @@ -235,13 +216,8 @@ def test_transform(): new_df = df.iloc[:, 1:].groupby("Origin").transform(sum) new_df._repr_html_() assert list(new_df.recommendation.keys()) == ["Correlation", "Occurrence"] -<<<<<<< HEAD - assert len(new_df.cardinality) == 6 -======= assert len(new_df.cardinality) == 7 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_get_group(): df = pd.read_csv("lux/data/car.csv") @@ -255,13 +231,8 @@ def test_get_group(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df.cardinality) == 9 -======= assert len(new_df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_applymap(): df = pd.read_csv("lux/data/car.csv") @@ -275,22 +246,11 @@ def test_applymap(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(df.cardinality) == 9 -======= assert len(df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_strcat(): -<<<<<<< HEAD - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - ) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df["Year"] = pd.to_datetime(df["Year"], format="%Y") df["combined"] = df["Origin"].str.cat(df["Brand"], sep=", ") df._repr_html_() @@ -304,13 +264,7 @@ def test_strcat(): def test_named_agg(): -<<<<<<< HEAD - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - ) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.groupby("Brand").agg( avg_weight=("Weight", "mean"), @@ -333,13 +287,8 @@ def test_change_dtype(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(df.data_type_lookup) == 9 -======= assert len(df.data_type_lookup) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_get_dummies(): df = pd.read_csv("lux/data/car.csv") @@ -352,13 +301,8 @@ def test_get_dummies(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df.data_type_lookup) == 310 -======= assert len(new_df.data_type_lookup) == 339 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_drop(): df = pd.read_csv("lux/data/car.csv") @@ -372,13 +316,8 @@ def test_drop(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df2.cardinality) == 6 -======= assert len(new_df2.cardinality) == 7 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_merge(): df = pd.read_csv("lux/data/car.csv") @@ -392,13 +331,8 @@ def test_merge(): "Occurrence", "Temporal", ] # TODO once bug is fixed -<<<<<<< HEAD - assert len(new_df2.cardinality) == 10 -======= assert len(new_df2.cardinality) == 11 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 - def test_prefix(): df = pd.read_csv("lux/data/car.csv") @@ -411,22 +345,12 @@ def test_prefix(): "Occurrence", "Temporal", ] -<<<<<<< HEAD - assert len(new_df.cardinality) == 9 -======= assert len(new_df.cardinality) == 10 ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 assert new_df.cardinality["1_Name"] == 300 def test_loc(): -<<<<<<< HEAD - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - ) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.loc[:, "Displacement":"Origin"] new_df._repr_html_() @@ -457,13 +381,7 @@ def test_loc(): def test_iloc(): -<<<<<<< HEAD - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - ) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.iloc[:, 3:9] new_df._repr_html_() @@ -636,12 +554,7 @@ def test_value_counts(): def test_str_replace(): -<<<<<<< HEAD - url = "https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true" - df = pd.read_csv(url) -======= df = pd.read_csv("lux/data/car.csv") ->>>>>>> 9897d0e18c9ee0c775151e88cde40ba890732939 df._repr_html_() # compute metadata assert df.cardinality is not None series = df["Brand"].str.replace("chevrolet", "chevy") diff --git a/tests/test_vis.py b/tests/test_vis.py index 0f6f9eec..ff3b6f63 100644 --- a/tests/test_vis.py +++ b/tests/test_vis.py @@ -49,9 +49,8 @@ def test_vis_collection(): filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist) )[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" - assert ( - len(vlist) == len(df.columns) - 1 - 1 - ) # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute + # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute + assert len(vlist) == len(df.columns) - 1 - 1 vlist = VisList(["Height", "?"], df) assert len(vlist) == len(df.columns) - 1 # remove 1 for vis with for same attribute @@ -141,9 +140,8 @@ def test_vis_collection_via_list_of_vis(): "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" ) df = pd.read_csv(url) - df["Year"] = pd.to_datetime( - df["Year"], format="%Y" - ) # change pandas dtype for the column "Year" to datetype + # change pandas dtype for the column "Year" to datetype + df["Year"] = pd.to_datetime(df["Year"], format="%Y") from lux.vis.VisList import VisList from lux.vis.Vis import Vis From c8f2db54c3a984f5e2b60094b77047d53ffac2fa Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 13:26:36 -0800 Subject: [PATCH 10/22] _config/config.py --- lux/_config/config.py | 4 +- test.ipynb | 258 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+), 3 deletions(-) create mode 100644 test.ipynb diff --git a/lux/_config/config.py b/lux/_config/config.py index 0c1e967f..809d89b1 100644 --- a/lux/_config/config.py +++ b/lux/_config/config.py @@ -115,9 +115,7 @@ def register_action( update_actions["flag"] = True -def remove_action( - name: str = "", -) -> None: +def remove_action(name: str = "") -> None: """ Removes the provided action globally in lux diff --git a/test.ipynb b/test.ipynb new file mode 100644 index 00000000..27184612 --- /dev/null +++ b/test.ipynb @@ -0,0 +1,258 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import lux" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", + "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", + "\n", + "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", + " return method()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame([])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, 0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", + "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", + "\n", + "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", + " return method()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(df.shape)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "df.intent = []" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", + "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", + "\n", + "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", + " return method()\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 4a361e151299f5c9587e89f8de477ae0e5793d23 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 13:27:20 -0800 Subject: [PATCH 11/22] delete test notebook --- test.ipynb | 258 ----------------------------------------------------- 1 file changed, 258 deletions(-) delete mode 100644 test.ipynb diff --git a/test.ipynb b/test.ipynb deleted file mode 100644 index 27184612..00000000 --- a/test.ipynb +++ /dev/null @@ -1,258 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import lux" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", - "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", - "\n", - "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", - " return method()\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.DataFrame([])\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(0, 0)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", - "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", - "\n", - "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", - " return method()\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "print(df.shape)\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "df.intent = []" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/ujjaini/anaconda3/lib/python3.7/site-packages/IPython/core/formatters.py:345: UserWarning: \n", - "Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.\n", - "\n", - "Please report this issue on Github: https://github.com/lux-org/lux/issues \n", - " return method()\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 3aceabcbe4f15263f828d34d419c5cc2c2adfbfc Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 13:39:28 -0800 Subject: [PATCH 12/22] action --- lux/action/column_group.py | 4 +--- lux/action/correlation.py | 3 +-- lux/action/custom.py | 8 ++------ lux/action/enhance.py | 8 ++------ lux/action/filter.py | 4 +--- lux/action/generalize.py | 8 ++------ lux/action/similarity.py | 10 ++-------- 7 files changed, 11 insertions(+), 34 deletions(-) diff --git a/lux/action/column_group.py b/lux/action/column_group.py index 049da68a..29d33d92 100644 --- a/lux/action/column_group.py +++ b/lux/action/column_group.py @@ -51,9 +51,7 @@ def column_group(ldf): data_model="dimension", aggregation=None, ), - lux.Clause( - str(attribute), data_type="quantitative", aggregation=None - ), + lux.Clause(str(attribute), data_type="quantitative", aggregation=None), ] ) collection.append(vis) diff --git a/lux/action/correlation.py b/lux/action/correlation.py index 1a999e48..53cc8540 100644 --- a/lux/action/correlation.py +++ b/lux/action/correlation.py @@ -85,8 +85,7 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True): def check_transpose_not_computed(vlist: VisList, a: str, b: str): transpose_exist = list( filter( - lambda x: (x._inferred_intent[0].attribute == b) - and (x._inferred_intent[1].attribute == a), + lambda x: (x._inferred_intent[0].attribute == b) and (x._inferred_intent[1].attribute == a), vlist, ) ) diff --git a/lux/action/custom.py b/lux/action/custom.py index c709d34b..72ece683 100644 --- a/lux/action/custom.py +++ b/lux/action/custom.py @@ -67,14 +67,10 @@ def custom_actions(ldf): recommendations = [] for action_name in lux.actions.__dir__(): display_condition = lux.actions.__getattr__(action_name).display_condition - if display_condition is None or ( - display_condition is not None and display_condition(ldf) - ): + if display_condition is None or (display_condition is not None and display_condition(ldf)): args = lux.actions.__getattr__(action_name).args if args: - recommendation = lux.actions.__getattr__(action_name).action( - ldf, args - ) + recommendation = lux.actions.__getattr__(action_name).action(ldf, args) else: recommendation = lux.actions.__getattr__(action_name).action(ldf) recommendations.append(recommendation) diff --git a/lux/action/enhance.py b/lux/action/enhance.py index fb889b11..a74bd452 100644 --- a/lux/action/enhance.py +++ b/lux/action/enhance.py @@ -35,14 +35,10 @@ def enhance(ldf): filters = utils.get_filter_specs(ldf._intent) # Collect variables that already exist in the intent - attr_specs = list( - filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent) - ) + attr_specs = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent)) fltr_str = [fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters] attr_str = [clause.attribute for clause in attr_specs] - intended_attrs = ( - '

' + ", ".join(attr_str + fltr_str) + "

" - ) + intended_attrs = '

' + ", ".join(attr_str + fltr_str) + "

" if len(attr_specs) == 1: recommendation = { "action": "Enhance", diff --git a/lux/action/filter.py b/lux/action/filter.py index 0f2c6037..891ad909 100644 --- a/lux/action/filter.py +++ b/lux/action/filter.py @@ -108,9 +108,7 @@ def get_complementary_ops(fltr_op): unique_values = ldf.unique_values[cat] for i in range(0, len(unique_values)): new_spec = column_spec.copy() - new_filter = lux.Clause( - attribute=cat, filter_op="=", value=unique_values[i] - ) + new_filter = lux.Clause(attribute=cat, filter_op="=", value=unique_values[i]) new_spec.append(new_filter) temp_vis = Vis(new_spec) output.append(temp_vis) diff --git a/lux/action/generalize.py b/lux/action/generalize.py index c6096cc0..d95bcb26 100644 --- a/lux/action/generalize.py +++ b/lux/action/generalize.py @@ -38,16 +38,12 @@ def generalize(ldf): output = [] excluded_columns = [] - attributes = list( - filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent) - ) + attributes = list(filter(lambda x: x.value == "" and x.attribute != "Record", ldf._intent)) filters = utils.get_filter_specs(ldf._intent) fltr_str = [fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters] attr_str = [clause.attribute for clause in attributes] - intended_attrs = ( - '

' + ", ".join(attr_str + fltr_str) + "

" - ) + intended_attrs = '

' + ", ".join(attr_str + fltr_str) + "

" recommendation = { "action": "Generalize", diff --git a/lux/action/similarity.py b/lux/action/similarity.py index c9871cbc..174a4d43 100644 --- a/lux/action/similarity.py +++ b/lux/action/similarity.py @@ -80,12 +80,7 @@ def aggregate(vis): xAxis = vis.get_attr_by_channel("x")[0].attribute yAxis = vis.get_attr_by_channel("y")[0].attribute - vis.data = ( - vis.data[[xAxis, yAxis]] - .groupby(xAxis, as_index=False) - .agg({yAxis: "mean"}) - .copy() - ) + vis.data = vis.data[[xAxis, yAxis]].groupby(xAxis, as_index=False).agg({yAxis: "mean"}).copy() def interpolate(vis, length): @@ -133,8 +128,7 @@ def interpolate(vis, length): x_diff = xVals[count] - xVals[count - 1] yDiff = yVals[count] - yVals[count - 1] interpolated_y_vals[i] = ( - yVals[count - 1] - + (interpolated_x - xVals[count - 1]) / x_diff * yDiff + yVals[count - 1] + (interpolated_x - xVals[count - 1]) / x_diff * yDiff ) vis.data = pd.DataFrame( list(zip(interpolated_x_vals, interpolated_y_vals)), From 1e7e03b2a84b2fea1bca4de1ed616d354cc68139 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 13:49:45 -0800 Subject: [PATCH 13/22] line length 105 --- lux/core/frame.py | 53 ++++++--------------- lux/executor/PandasExecutor.py | 57 ++++++----------------- lux/executor/SQLExecutor.py | 64 ++++++++++---------------- lux/interestingness/interestingness.py | 28 +++-------- lux/processor/Compiler.py | 28 +++-------- lux/processor/Parser.py | 4 +- lux/processor/Validator.py | 13 ++---- lux/utils/date_utils.py | 8 +--- lux/utils/message.py | 4 +- lux/utils/utils.py | 4 +- lux/vis/Vis.py | 31 ++++--------- lux/vis/VisList.py | 33 ++++--------- lux/vislib/altair/AltairChart.py | 22 ++++----- lux/vislib/altair/AltairRenderer.py | 12 ++--- lux/vislib/altair/BarChart.py | 20 ++++---- lux/vislib/altair/Histogram.py | 5 +- lux/vislib/altair/LineChart.py | 20 ++++---- tests/test_action.py | 11 ++--- tests/test_compiler.py | 8 +--- tests/test_dates.py | 20 ++------ tests/test_error_warning.py | 4 +- tests/test_executor.py | 31 +++---------- tests/test_interestingness.py | 25 ++++------ tests/test_maintainence.py | 12 ++--- tests/test_pandas_coverage.py | 52 ++++++--------------- tests/test_type.py | 14 ++---- tests/test_vis.py | 39 ++++------------ 27 files changed, 179 insertions(+), 443 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 1dfa0d04..080c0294 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -102,12 +102,10 @@ def history(self): return self._history def maintain_metadata(self): - if ( - not hasattr(self, "_metadata_fresh") or not self._metadata_fresh - ): # Check that metadata has not yet been computed - if ( - len(self) > 0 - ): # only compute metadata information if the dataframe is non-empty + # Check that metadata has not yet been computed + if not hasattr(self, "_metadata_fresh") or not self._metadata_fresh: + # only compute metadata information if the dataframe is non-empty + if len(self) > 0: self.executor.compute_stats(self) self.executor.compute_dataset_metadata(self) self._infer_structure() @@ -162,9 +160,7 @@ def _infer_structure(self): is_multi_index_flag = self.index.nlevels != 1 not_int_index_flag = self.index.dtype != "int64" small_df_flag = len(self) < 100 - self.pre_aggregated = ( - is_multi_index_flag or not_int_index_flag - ) and small_df_flag + self.pre_aggregated = (is_multi_index_flag or not_int_index_flag) and small_df_flag if "Number of Records" in self.columns: self.pre_aggregated = True very_small_df_flag = len(self) <= 10 @@ -408,9 +404,7 @@ def compute_SQL_data_type(self): datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format( table_name, attr ) - datatype = list( - pd.read_sql(datatype_query, self.SQLconnection)["data_type"] - )[0] + datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0] sql_dtypes[attr] = datatype data_type = {"quantitative": [], "nominal": [], "temporal": []} @@ -447,10 +441,7 @@ def compute_SQL_data_type(self): self.data_type = data_type def _append_rec(self, rec_infolist, recommendations: Dict): - if ( - recommendations["collection"] is not None - and len(recommendations["collection"]) > 0 - ): + if recommendations["collection"] is not None and len(recommendations["collection"]) > 0: rec_infolist.append(recommendations) def maintain_recs(self): @@ -477,9 +468,7 @@ def maintain_recs(self): for id_field in rec_df.data_type["id"]: id_fields_str += f"{id_field}, " id_fields_str = id_fields_str[:-2] - rec_df._message.add( - f"{id_fields_str} is not visualized since it resembles an ID field." - ) + rec_df._message.add(f"{id_fields_str} is not visualized since it resembles an ID field.") rec_df._prev = None # reset _prev # Check that recs has not yet been computed @@ -506,19 +495,15 @@ def maintain_recs(self): ldf.current_vis is not None and len(ldf.current_vis) == 0 ) one_current_vis = ( - lambda ldf: ldf.current_vis is not None - and len(ldf.current_vis) == 1 + lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) == 1 ) multiple_current_vis = ( - lambda ldf: ldf.current_vis is not None - and len(ldf.current_vis) > 1 + lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) > 1 ) # globally register default actions lux.register_action("correlation", correlation, no_vis) - lux.register_action( - "distribution", univariate, no_vis, "quantitative" - ) + lux.register_action("distribution", univariate, no_vis, "quantitative") lux.register_action("occurrence", univariate, no_vis, "nominal") lux.register_action("temporal", univariate, no_vis, "temporal") @@ -649,9 +634,7 @@ def set_intent_on_click(self, change): from lux.processor.Compiler import Compiler intent_action = list(self._widget.selectedIntentIndex.keys())[0] - vis = self.recommendation[intent_action][ - self._widget.selectedIntentIndex[intent_action][0] - ] + vis = self.recommendation[intent_action][self._widget.selectedIntentIndex[intent_action][0]] self.set_intent_as_vis(vis) self.maintain_metadata() @@ -702,9 +685,7 @@ def _repr_html_(self): return self.maintain_metadata() - if self._intent != [] and ( - not hasattr(self, "_compiled") or not self._compiled - ): + if self._intent != [] and (not hasattr(self, "_compiled") or not self._compiled): from lux.processor.Compiler import Compiler self.current_vis = Compiler.compile_intent(self, self._intent) @@ -719,9 +700,7 @@ def _repr_html_(self): # Observers(callback_function, listen_to_this_variable) self._widget.observe(self.remove_deleted_recs, names="deletedIndices") - self._widget.observe( - self.set_intent_on_click, names="selectedIntentIndex" - ) + self._widget.observe(self.set_intent_on_click, names="selectedIntentIndex") if len(self.recommendation) > 0: # box = widgets.Box(layout=widgets.Layout(display='inline')) @@ -738,9 +717,7 @@ def _repr_html_(self): def on_button_clicked(b): with self.output: if b: - self._toggle_pandas_display = ( - not self._toggle_pandas_display - ) + self._toggle_pandas_display = not self._toggle_pandas_display clear_output() if self._toggle_pandas_display: display(self.display_pandas()) diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index d168cdd9..97bed87c 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -162,21 +162,15 @@ def execute_aggregate(vis: Vis, isFiltered=True): .reset_index() ) vis._vis_data = vis.data.rename(columns={"index": "Record"}) - vis._vis_data = vis.data[ - [groupby_attr.attribute, color_attr.attribute, "Record"] - ] + vis._vis_data = vis.data[[groupby_attr.attribute, color_attr.attribute, "Record"]] else: - vis._vis_data = ( - vis.data.groupby(groupby_attr.attribute).count().reset_index() - ) + vis._vis_data = vis.data.groupby(groupby_attr.attribute).count().reset_index() vis._vis_data = vis.data.rename(columns={"index": "Record"}) vis._vis_data = vis.data[[groupby_attr.attribute, "Record"]] else: # if color is specified, need to group by groupby_attr and color_attr if has_color: - groupby_result = vis.data.groupby( - [groupby_attr.attribute, color_attr.attribute] - ) + groupby_result = vis.data.groupby([groupby_attr.attribute, color_attr.attribute]) else: groupby_result = vis.data.groupby(groupby_attr.attribute) groupby_result = groupby_result.agg(agg_func) @@ -199,9 +193,7 @@ def execute_aggregate(vis: Vis, isFiltered=True): df = pd.DataFrame( { columns[0]: attr_unique_vals * color_cardinality, - columns[1]: pd.Series(color_attr_vals).repeat( - N_unique_vals - ), + columns[1]: pd.Series(color_attr_vals).repeat(N_unique_vals), } ) vis._vis_data = vis.data.merge( @@ -211,12 +203,8 @@ def execute_aggregate(vis: Vis, isFiltered=True): suffixes=["", "_right"], ) for col in columns[2:]: - vis.data[col] = vis.data[col].fillna( - 0 - ) # Triggers __setitem__ - assert len( - list(vis.data[groupby_attr.attribute]) - ) == N_unique_vals * len( + vis.data[col] = vis.data[col].fillna(0) # Triggers __setitem__ + assert len(list(vis.data[groupby_attr.attribute])) == N_unique_vals * len( color_attr_vals ), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute, color_attr.attribute}`." @@ -235,9 +223,7 @@ def execute_aggregate(vis: Vis, isFiltered=True): assert ( len(list(vis.data[groupby_attr.attribute])) == N_unique_vals ), f"Aggregated data missing values compared to original range of values of `{groupby_attr.attribute}`." - vis._vis_data = vis.data.sort_values( - by=groupby_attr.attribute, ascending=True - ) + vis._vis_data = vis.data.sort_values(by=groupby_attr.attribute, ascending=True) vis._vis_data = vis.data.reset_index() vis._vis_data = vis.data.drop(columns="index") @@ -292,9 +278,7 @@ def execute_filter(vis: Vis): return False @staticmethod - def apply_filter( - df: pd.DataFrame, attribute: str, op: str, val: object - ) -> pd.DataFrame: + def apply_filter(df: pd.DataFrame, attribute: str, op: str, val: object) -> pd.DataFrame: """ Helper function for applying filter to a dataframe @@ -335,12 +319,8 @@ def execute_2D_binning(vis: Vis): x_attr = vis.get_attr_by_channel("x")[0] y_attr = vis.get_attr_by_channel("y")[0] - vis._vis_data.loc[:, "xBin"] = pd.cut( - vis._vis_data[x_attr.attribute], bins=40 - ) - vis._vis_data.loc[:, "yBin"] = pd.cut( - vis._vis_data[y_attr.attribute], bins=40 - ) + vis._vis_data.loc[:, "xBin"] = pd.cut(vis._vis_data[x_attr.attribute], bins=40) + vis._vis_data.loc[:, "yBin"] = pd.cut(vis._vis_data[y_attr.attribute], bins=40) color_attr = vis.get_attr_by_channel("color") if len(color_attr) > 0: @@ -369,14 +349,10 @@ def execute_2D_binning(vis: Vis): result = result[result["count"] != 0] # convert type to facilitate weighted correlation interestingess calculation - result.loc[:, "xBinStart"] = ( - result["xBin"].apply(lambda x: x.left).astype("float") - ) + result.loc[:, "xBinStart"] = result["xBin"].apply(lambda x: x.left).astype("float") result.loc[:, "xBinEnd"] = result["xBin"].apply(lambda x: x.right) - result.loc[:, "yBinStart"] = ( - result["yBin"].apply(lambda x: x.left).astype("float") - ) + result.loc[:, "yBinStart"] = result["yBin"].apply(lambda x: x.left).astype("float") result.loc[:, "yBinEnd"] = result["yBin"].apply(lambda x: x.right) vis._vis_data = result.drop(columns=["xBin", "yBin"]) @@ -408,10 +384,7 @@ def compute_data_type(self, ldf: LuxDataFrame): if ldf.pre_aggregated: if ldf.cardinality[attr] == len(ldf): ldf.data_type_lookup[attr] = "nominal" - if ( - ldf.cardinality[attr] / len(ldf) < 0.4 - and ldf.cardinality[attr] < 20 - ): + if ldf.cardinality[attr] / len(ldf) < 0.4 and ldf.cardinality[attr] < 20: ldf.data_type_lookup[attr] = "nominal" else: ldf.data_type_lookup[attr] = "quantitative" @@ -463,9 +436,7 @@ def compute_data_type(self, ldf: LuxDataFrame): def compute_data_model(self, ldf: LuxDataFrame): ldf.data_model = { "measure": ldf.data_type["quantitative"], - "dimension": ldf.data_type["nominal"] - + ldf.data_type["temporal"] - + ldf.data_type["id"], + "dimension": ldf.data_type["nominal"] + ldf.data_type["temporal"] + ldf.data_type["id"], } ldf.data_model_lookup = self.reverseMapping(ldf.data_model) diff --git a/lux/executor/SQLExecutor.py b/lux/executor/SQLExecutor.py index 2cca392d..c3978975 100644 --- a/lux/executor/SQLExecutor.py +++ b/lux/executor/SQLExecutor.py @@ -60,9 +60,7 @@ def execute(vislist: VisList, ldf: LuxDataFrame): required_variables = ",".join(required_variables) row_count = list( pd.read_sql( - "SELECT COUNT(*) FROM {} {}".format( - ldf.table_name, where_clause - ), + "SELECT COUNT(*) FROM {} {}".format(ldf.table_name, where_clause), ldf.SQLconnection, )["count"] )[0] @@ -116,41 +114,35 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame): else: where_clause, filterVars = SQLExecutor.execute_filter(vis) if agg_func == "mean": - mean_query = ( - "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format( + groupby_attr.attribute, + measure_attr.attribute, + measure_attr.attribute, + ldf.table_name, + where_clause, + groupby_attr.attribute, ) vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) if agg_func == "sum": - mean_query = ( - "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format( + groupby_attr.attribute, + measure_attr.attribute, + measure_attr.attribute, + ldf.table_name, + where_clause, + groupby_attr.attribute, ) vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) if agg_func == "max": - mean_query = ( - "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format( + groupby_attr.attribute, + measure_attr.attribute, + measure_attr.attribute, + ldf.table_name, + where_clause, + groupby_attr.attribute, ) vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) @@ -162,9 +154,7 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame): # For filtered aggregation that have missing groupby-attribute values, set these aggregated value as 0, since no datapoints for vals in all_attr_vals: if vals not in result_vals: - vis.data.loc[len(vis.data)] = [vals] + [0] * ( - len(vis.data.columns) - 1 - ) + vis.data.loc[len(vis.data)] = [vals] + [0] * (len(vis.data.columns) - 1) @staticmethod def execute_binning(vis: Vis, ldf: LuxDataFrame): @@ -200,9 +190,7 @@ def execute_binning(vis: Vis, ldf: LuxDataFrame): # binEdges of size N+1, so need to compute binCenter as the bin location upper_edges = [float(i) for i in upper_edges.split(",")] if attr_type == int: - bin_centers = np.array( - [math.ceil((attr_min + attr_min + bin_width) / 2)] - ) + bin_centers = np.array([math.ceil((attr_min + attr_min + bin_width) / 2)]) else: bin_centers = np.array([(attr_min + attr_min + bin_width) / 2]) bin_centers = np.append( @@ -215,9 +203,7 @@ def execute_binning(vis: Vis, ldf: LuxDataFrame): math.ceil((upper_edges[len(upper_edges) - 1] + attr_max) / 2), ) else: - bin_centers = np.append( - bin_centers, (upper_edges[len(upper_edges) - 1] + attr_max) / 2 - ) + bin_centers = np.append(bin_centers, (upper_edges[len(upper_edges) - 1] + attr_max) / 2) if len(bin_centers) > len(bin_count_data): bucket_lables = bin_count_data["width_bucket"].unique() diff --git a/lux/interestingness/interestingness.py b/lux/interestingness/interestingness.py index 9d175583..f70a658b 100644 --- a/lux/interestingness/interestingness.py +++ b/lux/interestingness/interestingness.py @@ -75,9 +75,7 @@ def interestingness(vis: Vis, ldf: LuxDataFrame) -> int: if n_filter == 0: return unevenness(vis, ldf, measure_lst, dimension_lst) elif n_filter == 1: - return deviation_from_overall( - vis, ldf, filter_specs, measure_lst[0].attribute - ) + return deviation_from_overall(vis, ldf, filter_specs, measure_lst[0].attribute) # Histogram elif n_dim == 0 and n_msr == 1: if v_size < 2: @@ -94,9 +92,7 @@ def interestingness(vis: Vis, ldf: LuxDataFrame) -> int: if v_size < 10: return -1 if vis.mark == "heatmap": - return weighted_correlation( - vis.data["xBinStart"], vis.data["yBinStart"], vis.data["count"] - ) + return weighted_correlation(vis.data["xBinStart"], vis.data["yBinStart"], vis.data["count"]) if n_filter == 1: v_filter_size = get_filtered_size(filter_specs, vis.data) sig = v_filter_size / v_size @@ -139,9 +135,7 @@ def interestingness(vis: Vis, ldf: LuxDataFrame) -> int: groupby_unique_vals = ldf.unique_values[groupby_column] for c in range(0, groupby_cardinality): contingency_table.append( - vis.data[vis.data[groupby_column] == groupby_unique_vals[c]][ - measure_column - ] + vis.data[vis.data[groupby_column] == groupby_unique_vals[c]][measure_column] ) score = 0.12 # ValueError results if an entire column of the contingency table is 0, can happen if an applied filter results in @@ -186,14 +180,10 @@ def weighted_cov(x, y, w): def weighted_correlation(x, y, w): # Based on https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#Weighted_correlation_coefficient - return weighted_cov(x, y, w) / np.sqrt( - weighted_cov(x, x, w) * weighted_cov(y, y, w) - ) + return weighted_cov(x, y, w) / np.sqrt(weighted_cov(x, x, w) * weighted_cov(y, y, w)) -def deviation_from_overall( - vis: Vis, ldf: LuxDataFrame, filter_specs: list, msr_attribute: str -) -> int: +def deviation_from_overall(vis: Vis, ldf: LuxDataFrame, filter_specs: list, msr_attribute: str) -> int: """ Difference in bar chart/histogram shape from overall chart Note: this function assumes that the filtered vis.data is operating on the same range as the unfiltered vis.data. @@ -230,9 +220,7 @@ def deviation_from_overall( v = unfiltered_vis.data[msr_attribute] v = v / v.sum() - assert len(v) == len( - v_filter - ), "Data for filtered and unfiltered vis have unequal length." + assert len(v) == len(v_filter), "Data for filtered and unfiltered vis have unequal length." sig = v_filter_size / v_size # significance factor # Euclidean distance as L2 function @@ -257,9 +245,7 @@ def deviation_from_overall( return sig * rankSig * euclidean(v, v_filter) -def unevenness( - vis: Vis, ldf: LuxDataFrame, measure_lst: list, dimension_lst: list -) -> int: +def unevenness(vis: Vis, ldf: LuxDataFrame, measure_lst: list, dimension_lst: list) -> int: """ Measure the unevenness of a bar chart vis. If a bar chart is highly uneven across the possible values, then it may be interesting. (e.g., USA produces lots of cars compared to Japan and Europe) diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index cf04e741..aa02af6d 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -81,9 +81,7 @@ def compile_intent(ldf: LuxDataFrame, _inferred_intent: List[Clause]) -> VisList return vis_collection @staticmethod - def enumerate_collection( - _inferred_intent: List[Clause], ldf: LuxDataFrame - ) -> VisList: + def enumerate_collection(_inferred_intent: List[Clause], ldf: LuxDataFrame) -> VisList: """ Given specifications that have been expanded thorught populateOptions, recursively iterate over the resulting list combinations to generate a vis list. @@ -172,9 +170,7 @@ def populate_data_type_model(ldf, vis_collection) -> VisList: chart_title = date_utils.date_formatter(clause.value, ldf) else: chart_title = clause.value - vis.title = ( - f"{clause.attribute} {clause.filter_op} {chart_title}" - ) + vis.title = f"{clause.attribute} {clause.filter_op} {chart_title}" return vlist @staticmethod @@ -335,9 +331,7 @@ def line_or_bar(ldf, dimension: Clause, measure: Clause): "y": vis._inferred_intent[1], "color": vis._inferred_intent[2], } - relevant_attributes = [ - auto_channel[channel].attribute for channel in auto_channel - ] + relevant_attributes = [auto_channel[channel].attribute for channel in auto_channel] relevant_min_max = dict( (attr, ldf._min_max[attr]) for attr in relevant_attributes @@ -399,9 +393,7 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): # and the leftovers in the auto_channel specification, # step through them together and fill it automatically. leftover_channels = list(filter(lambda x: result_dict[x] == "", result_dict)) - for leftover_channel, leftover_encoding in zip( - leftover_channels, auto_channel.values() - ): + for leftover_channel, leftover_encoding in zip(leftover_channels, auto_channel.values()): leftover_encoding.channel = leftover_channel result_dict[leftover_channel] = leftover_encoding vis._inferred_intent = list(result_dict.values()) @@ -409,9 +401,7 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): @staticmethod # def populate_wildcard_options(ldf: LuxDataFrame) -> dict: - def populate_wildcard_options( - _inferred_intent: List[Clause], ldf: LuxDataFrame - ) -> dict: + def populate_wildcard_options(_inferred_intent: List[Clause], ldf: LuxDataFrame) -> dict: """ Given wildcards and constraints in the LuxDataFrame's intent, return the list of available values that satisfies the data_type or data_model constraints. @@ -436,13 +426,9 @@ def populate_wildcard_options( if clause.attribute == "?": options = set(list(ldf.columns)) # all attributes if clause.data_type != "": - options = options.intersection( - set(ldf.data_type[clause.data_type]) - ) + options = options.intersection(set(ldf.data_type[clause.data_type])) if clause.data_model != "": - options = options.intersection( - set(ldf.data_model[clause.data_model]) - ) + options = options.intersection(set(ldf.data_model[clause.data_model])) options = list(options) else: options = convert_to_list(clause.attribute) diff --git a/lux/processor/Parser.py b/lux/processor/Parser.py index 2e205704..065d420e 100644 --- a/lux/processor/Parser.py +++ b/lux/processor/Parser.py @@ -97,9 +97,7 @@ def parse(intent: List[Union[Clause, str]]) -> List[Clause]: # clause.description contain ">","<". or "=" if any(ext in [">", "<", "=", "!="] for ext in clause.description): # then parse it and assign to clause.attribute, clause.filter_op, clause.values - clause.filter_op = re.findall( - r"/.*/|>|=|<|>=|<=|!=", clause.description - )[0] + clause.filter_op = re.findall(r"/.*/|>|=|<|>=|<=|!=", clause.description)[0] split_description = clause.description.split(clause.filter_op) clause.attribute = split_description[0] clause.value = split_description[1] diff --git a/lux/processor/Validator.py b/lux/processor/Validator.py index f01e7d42..a497045a 100644 --- a/lux/processor/Validator.py +++ b/lux/processor/Validator.py @@ -54,8 +54,7 @@ def validate_intent(intent: List[Clause], ldf: LuxDataFrame) -> None: def validate_clause(clause): if not ( - (clause.attribute and clause.attribute == "?") - or (clause.value and clause.value == "?") + (clause.attribute and clause.attribute == "?") or (clause.value and clause.value == "?") ): if isinstance(clause.attribute, list): for attr in clause.attribute: @@ -66,18 +65,12 @@ def validate_clause(clause): else: if clause.attribute != "Record": # we don't value check datetime since datetime can take filter values that don't exactly match the exact TimeStamp representation - if clause.attribute and not is_datetime_string( - clause.attribute - ): + if clause.attribute and not is_datetime_string(clause.attribute): if not clause.attribute in list(ldf.columns): warnings.warn( f"The input attribute '{clause.attribute}' does not exist in the DataFrame." ) - if ( - clause.value - and clause.attribute - and clause.filter_op == "=" - ): + if clause.value and clause.attribute and clause.filter_op == "=": series = ldf[clause.attribute] if not is_datetime_series(series): if isinstance(clause.value, list): diff --git a/lux/utils/date_utils.py b/lux/utils/date_utils.py index 817e1ea8..d3ed03ae 100644 --- a/lux/utils/date_utils.py +++ b/lux/utils/date_utils.py @@ -50,9 +50,7 @@ def date_formatter(time_stamp, ldf): elif granularity == "month": date_str += str(datetime.year) + "-" + str(datetime.month) elif granularity == "day": - date_str += ( - str(datetime.year) + "-" + str(datetime.month) + "-" + str(datetime.day) - ) + date_str += str(datetime.year) + "-" + str(datetime.month) + "-" + str(datetime.day) else: # non supported granularity return datetime.date() @@ -103,9 +101,7 @@ def is_datetime_series(series: pd.Series) -> bool: ------- is_date: bool """ - return pd.api.types.is_datetime64_any_dtype(series) or pd.api.types.is_period_dtype( - series - ) + return pd.api.types.is_datetime64_any_dtype(series) or pd.api.types.is_period_dtype(series) def is_datetime_string(string: str) -> bool: diff --git a/lux/utils/message.py b/lux/utils/message.py index 638fd581..04d1cc37 100644 --- a/lux/utils/message.py +++ b/lux/utils/message.py @@ -29,9 +29,7 @@ def to_html(self): if len(self.messages) == 0: return "" else: - sorted_msgs = sorted( - self.messages, key=lambda i: i["priority"], reverse=True - ) + sorted_msgs = sorted(self.messages, key=lambda i: i["priority"], reverse=True) html = "
    " for msg in sorted_msgs: msgTxt = msg["text"] diff --git a/lux/utils/utils.py b/lux/utils/utils.py index 0c246597..4c289b65 100644 --- a/lux/utils/utils.py +++ b/lux/utils/utils.py @@ -80,9 +80,7 @@ def check_if_id_like(df, attribute): sampled = df[attribute].sample(50, random_state=99) else: sampled = df[attribute] - str_length_uniformity = ( - sampled.apply(lambda x: type(x) == str and len(x)).std() < 3 - ) + str_length_uniformity = sampled.apply(lambda x: type(x) == str and len(x)).std() < 3 return ( high_cardinality and (attribute_contain_id or almost_all_vals_unique) diff --git a/lux/vis/Vis.py b/lux/vis/Vis.py index a7883068..66ef85f4 100644 --- a/lux/vis/Vis.py +++ b/lux/vis/Vis.py @@ -27,7 +27,9 @@ def __init__(self, intent, source=None, title="", score=0.0): self._intent = intent # This is the user's original intent to Vis self._inferred_intent = intent # This is the re-written, expanded version of user's original intent (include inferred vis info) self._source = source # This is the original data that is attached to the Vis - self._vis_data = None # This is the data that represents the Vis (e.g., selected, aggregated, binned) + self._vis_data = ( + None # This is the data that represents the Vis (e.g., selected, aggregated, binned) + ) self._code = None self._mark = "" self._min_max = {} @@ -39,9 +41,7 @@ def __init__(self, intent, source=None, title="", score=0.0): def __repr__(self): if self._source is None: - return ( - f"" - ) + return f"" filter_intents = None channels, additional_channels = [], [] for clause in self._inferred_intent: @@ -52,12 +52,7 @@ def __repr__(self): if hasattr(clause, "attribute"): if clause.attribute != "": if clause.aggregation != "" and clause.aggregation is not None: - attribute = ( - clause._aggregation_name.upper() - + "(" - + clause.attribute - + ")" - ) + attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" elif clause.bin_size > 0: attribute = "BIN(" + clause.attribute + ")" else: @@ -77,9 +72,7 @@ def __repr__(self): if filter_intents: return f"" else: - return ( - f"" - ) + return f"" @property def data(self): @@ -164,9 +157,7 @@ def get_attr_by_attr_name(self, attr_name): def get_attr_by_channel(self, channel): spec_obj = list( filter( - lambda x: x.channel == channel and x.value == "" - if hasattr(x, "channel") - else False, + lambda x: x.channel == channel and x.value == "" if hasattr(x, "channel") else False, self._inferred_intent, ) ) @@ -195,9 +186,7 @@ def get_attr_by_data_model(self, dmodel, exclude_record=False): def get_attr_by_data_type(self, dtype): return list( filter( - lambda x: x.data_type == dtype and x.value == "" - if hasattr(x, "data_type") - else False, + lambda x: x.data_type == dtype and x.value == "" if hasattr(x, "data_type") else False, self._inferred_intent, ) ) @@ -218,9 +207,7 @@ def remove_column_from_spec(self, attribute, remove_first: bool = False): Boolean flag to determine whether to remove all instances of the attribute or only one (first) instance, by default False """ if not remove_first: - new_inferred = list( - filter(lambda x: x.attribute != attribute, self._inferred_intent) - ) + new_inferred = list(filter(lambda x: x.attribute != attribute, self._inferred_intent)) self._inferred_intent = new_inferred self._intent = new_inferred elif remove_first: diff --git a/lux/vis/VisList.py b/lux/vis/VisList.py index 5fcfbd4d..c25495d9 100644 --- a/lux/vis/VisList.py +++ b/lux/vis/VisList.py @@ -93,9 +93,7 @@ def exported(self) -> VisList: ) return [] else: - exported_vis = VisList( - list(map(self.__getitem__, exported_vis_lst["Vis List"])) - ) + exported_vis = VisList(list(map(self.__getitem__, exported_vis_lst["Vis List"]))) return exported_vis def remove_duplicates(self) -> None: @@ -137,9 +135,7 @@ def __repr__(self): filter_intents = clause if clause.aggregation != "" and clause.aggregation is not None: - attribute = ( - clause._aggregation_name.upper() + "(" + clause.attribute + ")" - ) + attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" elif clause.bin_size > 0: attribute = "BIN(" + clause.attribute + ")" else: @@ -153,12 +149,9 @@ def __repr__(self): largest_mark = len(vis.mark) if ( filter_intents - and len(str(filter_intents.value)) + len(filter_intents.attribute) - > largest_filter + and len(str(filter_intents.value)) + len(filter_intents.attribute) > largest_filter ): - largest_filter = len(str(filter_intents.value)) + len( - filter_intents.attribute - ) + largest_filter = len(str(filter_intents.value)) + len(filter_intents.attribute) vis_repr = [] largest_x_length = len(x_channel) largest_y_length = len(y_channel) @@ -172,14 +165,8 @@ def __repr__(self): if clause.value != "": filter_intents = clause - if ( - clause.aggregation != "" - and clause.aggregation is not None - and vis.mark != "scatter" - ): - attribute = ( - clause._aggregation_name.upper() + "(" + clause.attribute + ")" - ) + if clause.aggregation != "" and clause.aggregation is not None and vis.mark != "scatter": + attribute = clause._aggregation_name.upper() + "(" + clause.attribute + ")" elif clause.bin_size > 0: attribute = "BIN(" + clause.attribute + ")" else: @@ -294,9 +281,7 @@ def _repr_html_(self): import luxwidget recJSON = LuxDataFrame.rec_to_JSON([recommendation]) - self._widget = luxwidget.LuxWidget( - currentVis={}, recommendations=recJSON, intent="", message="" - ) + self._widget = luxwidget.LuxWidget(currentVis={}, recommendations=recJSON, intent="", message="") display(self._widget) def refresh_source(self, ldf): @@ -340,7 +325,5 @@ def refresh_source(self, ldf): else: self._inferred_intent = Parser.parse(self._intent) Validator.validate_intent(self._inferred_intent, ldf) - self._collection = Compiler.compile_intent( - ldf, self._inferred_intent - ) + self._collection = Compiler.compile_intent(ldf, self._inferred_intent) ldf.executor.execute(self._collection, ldf) diff --git a/lux/vislib/altair/AltairChart.py b/lux/vislib/altair/AltairChart.py index 09a01013..f0ccb869 100644 --- a/lux/vislib/altair/AltairChart.py +++ b/lux/vislib/altair/AltairChart.py @@ -50,9 +50,7 @@ def add_tooltip(self): self.chart = self.chart.encode(tooltip=list(self.vis.data.columns)) def apply_default_config(self): - self.chart = self.chart.configure_title( - fontWeight=500, fontSize=13, font="Helvetica Neue" - ) + self.chart = self.chart.configure_title(fontWeight=500, fontSize=13, font="Helvetica Neue") self.chart = self.chart.configure_axis( titleFontWeight=500, titleFontSize=11, @@ -71,13 +69,15 @@ def apply_default_config(self): labelFont="Helvetica Neue", ) self.chart = self.chart.properties(width=160, height=150) - self.code += "\nchart = chart.configure_title(fontWeight=500,fontSize=13,font='Helvetica Neue')\n" + self.code += ( + "\nchart = chart.configure_title(fontWeight=500,fontSize=13,font='Helvetica Neue')\n" + ) self.code += "chart = chart.configure_axis(titleFontWeight=500,titleFontSize=11,titleFont='Helvetica Neue',\n" - self.code += " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" - self.code += "chart = chart.configure_legend(titleFontWeight=500,titleFontSize=10,titleFont='Helvetica Neue',\n" self.code += ( - " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" + " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" ) + self.code += "chart = chart.configure_legend(titleFontWeight=500,titleFontSize=10,titleFont='Helvetica Neue',\n" + self.code += " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" self.code += "chart = chart.properties(width=160,height=150)\n" def encode_color(self): @@ -97,9 +97,7 @@ def encode_color(self): ) self.code += f"chart = chart.encode(color=alt.Color('{color_attr_name}',type='{color_attr_type}',timeUnit='{timeUnit}',title='{color_attr_name}'))" else: - self.chart = self.chart.encode( - color=alt.Color(color_attr_name, type=color_attr_type) - ) + self.chart = self.chart.encode(color=alt.Color(color_attr_name, type=color_attr_type)) self.code += f"chart = chart.encode(color=alt.Color('{color_attr_name}',type='{color_attr_type}'))\n" elif len(color_attr) > 1: raise ValueError( @@ -111,9 +109,7 @@ def add_title(self): if chart_title: self.chart = self.chart.encode().properties(title=chart_title) if self.code != "": - self.code += ( - f"chart = chart.encode().properties(title = '{chart_title}')" - ) + self.code += f"chart = chart.encode().properties(title = '{chart_title}')" def initialize_chart(self): return NotImplemented diff --git a/lux/vislib/altair/AltairRenderer.py b/lux/vislib/altair/AltairRenderer.py index 2692f72e..1d10aeb0 100644 --- a/lux/vislib/altair/AltairRenderer.py +++ b/lux/vislib/altair/AltairRenderer.py @@ -93,9 +93,7 @@ def create_vis(self, vis, standalone=True): import inspect if vis.plot_config: - chart.code += "\n".join( - inspect.getsource(vis.plot_config).split("\n ")[1:-1] - ) + chart.code += "\n".join(inspect.getsource(vis.plot_config).split("\n ")[1:-1]) chart.code += "\nchart" chart.code = chart.code.replace("\n\t\t", "\n") @@ -107,15 +105,11 @@ def create_vis(self, vis, standalone=True): if local_vars: callers_local_vars = local_vars.f_locals.items() possible_vars = [ - var_name - for var_name, var_val in callers_local_vars - if var_val is var + var_name for var_name, var_val in callers_local_vars if var_val is var ] all_vars.extend(possible_vars) found_variable = [ - possible_var - for possible_var in all_vars - if possible_var[0] != "_" + possible_var for possible_var in all_vars if possible_var[0] != "_" ][0] else: # if vis._source was not set when the Vis was created found_variable = "df" diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py index 5b7ecb57..0550e590 100644 --- a/lux/vislib/altair/BarChart.py +++ b/lux/vislib/altair/BarChart.py @@ -49,11 +49,11 @@ def initialize_chart(self): type=y_attr.data_type, axis=alt.Axis(labelOverlap=True), ) - x_attr_field = alt.X( - x_attr.attribute, type=x_attr.data_type, title=agg_title - ) + x_attr_field = alt.X(x_attr.attribute, type=x_attr.data_type, title=agg_title) y_attr_field_code = f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', axis=alt.Axis(labelOverlap=True))" - x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', title='{agg_title}')" + x_attr_field_code = ( + f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', title='{agg_title}')" + ) if y_attr.sort == "ascending": y_attr_field.sort = "-x" @@ -67,11 +67,11 @@ def initialize_chart(self): type=x_attr.data_type, axis=alt.Axis(labelOverlap=True), ) - y_attr_field = alt.Y( - y_attr.attribute, type=y_attr.data_type, title=agg_title - ) + y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True))" - y_attr_field_code = f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" + y_attr_field_code = ( + f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" + ) if x_attr.sort == "ascending": x_attr_field.sort = "-y" x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True),sort='-y')" @@ -126,6 +126,4 @@ def encode_color(self): self.add_text() # Setting tooltip as non-null self.chart = self.chart.configure_mark(tooltip=alt.TooltipContent("encoding")) - self.code += ( - f"""chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding'))""" - ) + self.code += f"""chart = chart.configure_mark(tooltip=alt.TooltipContent('encoding'))""" diff --git a/lux/vislib/altair/Histogram.py b/lux/vislib/altair/Histogram.py index b9d1da4a..fdcaaabc 100644 --- a/lux/vislib/altair/Histogram.py +++ b/lux/vislib/altair/Histogram.py @@ -41,10 +41,7 @@ def initialize_chart(self): x_min = self.vis.min_max[msr_attr.attribute][0] x_max = self.vis.min_max[msr_attr.attribute][1] - x_range = abs( - max(self.vis.data[msr_attr.attribute]) - - min(self.vis.data[msr_attr.attribute]) - ) + x_range = abs(max(self.vis.data[msr_attr.attribute]) - min(self.vis.data[msr_attr.attribute])) plot_range = abs(x_max - x_min) markbar = x_range / plot_range * 12 diff --git a/lux/vislib/altair/LineChart.py b/lux/vislib/altair/LineChart.py index 1e01eabf..002beefb 100644 --- a/lux/vislib/altair/LineChart.py +++ b/lux/vislib/altair/LineChart.py @@ -48,23 +48,19 @@ def initialize_chart(self): if y_attr.data_model == "measure": agg_title = get_agg_title(y_attr) x_attr_spec = alt.X(x_attr.attribute, type=x_attr.data_type) - y_attr_spec = alt.Y( - y_attr.attribute, type=y_attr.data_type, title=agg_title - ) - x_attr_field_code = ( - f"alt.X('{x_attr.attribute}', type = '{x_attr.data_type}')" + y_attr_spec = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) + x_attr_field_code = f"alt.X('{x_attr.attribute}', type = '{x_attr.data_type}')" + y_attr_fieldCode = ( + f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" ) - y_attr_fieldCode = f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" else: agg_title = get_agg_title(x_attr) - x_attr_spec = alt.X( - x_attr.attribute, type=x_attr.data_type, title=agg_title - ) + x_attr_spec = alt.X(x_attr.attribute, type=x_attr.data_type, title=agg_title) y_attr_spec = alt.Y(y_attr.attribute, type=y_attr.data_type) - x_attr_field_code = f"alt.X('{x_attr.attribute}', type = '{x_attr.data_type}', title='{agg_title}')" - y_attr_fieldCode = ( - f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}')" + x_attr_field_code = ( + f"alt.X('{x_attr.attribute}', type = '{x_attr.data_type}', title='{agg_title}')" ) + y_attr_fieldCode = f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}')" chart = alt.Chart(self.data).mark_line().encode(x=x_attr_spec, y=y_attr_spec) chart = chart.interactive() # Enable Zooming and Panning diff --git a/tests/test_action.py b/tests/test_action.py index 3b3097ad..5775c614 100644 --- a/tests/test_action.py +++ b/tests/test_action.py @@ -20,9 +20,7 @@ def test_vary_filter_val(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vis = Vis(["Height", "SportType=Ball"], df) df.set_intent_as_vis(vis) @@ -82,9 +80,7 @@ def test_row_column_group(): tseries = df.pivot(index="State", columns="Date", values="Value") # Interpolating missing values tseries[tseries.columns.min()] = tseries[tseries.columns.min()].fillna(0) - tseries[tseries.columns.max()] = tseries[tseries.columns.max()].fillna( - tseries.max(axis=1) - ) + tseries[tseries.columns.max()] = tseries[tseries.columns.max()].fillna(tseries.max(axis=1)) tseries = tseries.interpolate("zero", axis=1) tseries._repr_html_() assert list(tseries.recommendation.keys()) == ["Row Groups", "Column Groups"] @@ -183,8 +179,7 @@ def test_year_filter_value(): lambda vis: len( list( filter( - lambda clause: clause.value != "" - and clause.attribute == "Year", + lambda clause: clause.value != "" and clause.attribute == "Year", vis._intent, ) ) diff --git a/tests/test_compiler.py b/tests/test_compiler.py index 760c742c..037b7534 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -130,9 +130,7 @@ def test_sort_bar(): df = pd.read_csv("lux/data/car.csv") vis = Vis( [ - lux.Clause( - attribute="Acceleration", data_model="measure", data_type="quantitative" - ), + lux.Clause(attribute="Acceleration", data_model="measure", data_type="quantitative"), lux.Clause(attribute="Origin", data_model="dimension", data_type="nominal"), ], df, @@ -143,9 +141,7 @@ def test_sort_bar(): df = pd.read_csv("lux/data/car.csv") vis = Vis( [ - lux.Clause( - attribute="Acceleration", data_model="measure", data_type="quantitative" - ), + lux.Clause(attribute="Acceleration", data_model="measure", data_type="quantitative"), lux.Clause(attribute="Name", data_model="dimension", data_type="nominal"), ], df, diff --git a/tests/test_dates.py b/tests/test_dates.py index 4b87f7a6..8a5cc823 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -52,9 +52,7 @@ def test_period_selection(): PandasExecutor.execute(ldf.current_vis, ldf) - assert all( - [type(vlist.data) == lux.core.frame.LuxDataFrame for vlist in ldf.current_vis] - ) + assert all([type(vlist.data) == lux.core.frame.LuxDataFrame for vlist in ldf.current_vis]) assert all(ldf.current_vis[2].data.columns == ["Year", "Acceleration"]) @@ -64,16 +62,12 @@ def test_period_filter(): ldf["Year"] = pd.DatetimeIndex(ldf["Year"]).to_period(freq="A") - ldf.set_intent( - [lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")] - ) + ldf.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) PandasExecutor.execute(ldf.current_vis, ldf) ldf._repr_html_() - assert isinstance( - ldf.recommendation["Filter"][2]._inferred_intent[2].value, pd.Period - ) + assert isinstance(ldf.recommendation["Filter"][2]._inferred_intent[2].value, pd.Period) def test_period_to_altair(): @@ -83,9 +77,7 @@ def test_period_to_altair(): df["Year"] = pd.DatetimeIndex(df["Year"]).to_period(freq="A") - df.set_intent( - [lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")] - ) + df.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) PandasExecutor.execute(df.current_vis, df) df._repr_html_() @@ -102,9 +94,7 @@ def test_refresh_inplace(): "value": [10.5, 15.2, 20.3, 25.2], } ) - with pytest.warns( - UserWarning, match="Lux detects that the attribute 'date' may be temporal." - ): + with pytest.warns(UserWarning, match="Lux detects that the attribute 'date' may be temporal."): df._repr_html_() assert df.data_type_lookup["date"] == "temporal" diff --git a/tests/test_error_warning.py b/tests/test_error_warning.py index d5fe49ff..a34b349f 100644 --- a/tests/test_error_warning.py +++ b/tests/test_error_warning.py @@ -54,9 +54,7 @@ def test_multi_vis(): SyntaxError, match="The intent that you specified corresponds to more than one visualization.", ): - Vis( - ["SATAverage", "AverageCost", "Region=New England|Southeast"], df - )._repr_html_() + Vis(["SATAverage", "AverageCost", "Region=New England|Southeast"], df)._repr_html_() # Test Properties with Private Variables Readable but not Writable diff --git a/tests/test_executor.py b/tests/test_executor.py index d1a18a6b..d4a05d01 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -89,10 +89,7 @@ def test_colored_bar_chart(): group_by_cardinality = len(df.unique_values["Origin"]) assert len(new_vis.data.columns) == 3 assert ( - len(new_vis.data) - == 15 - > group_by_cardinality - < color_cardinality * group_by_cardinality + len(new_vis.data) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality ) # Not color_cardinality*group_by_cardinality since some combinations have 0 values @@ -115,12 +112,7 @@ def test_colored_line_chart(): group_by_cardinality = len(df.unique_values["Year"]) assert len(new_vis.data.columns) == 3 # Not color_cardinality*group_by_cardinality since some combinations have 0 values - assert ( - len(new_vis.data) - == 60 - > group_by_cardinality - < color_cardinality * group_by_cardinality - ) + assert len(new_vis.data) == 60 > group_by_cardinality < color_cardinality * group_by_cardinality def test_filter(): @@ -188,23 +180,12 @@ def test_filter_aggregation_fillzero_aligned(): ] vis = Vis(intent, df) result = vis.data - externalValidation = ( - df[df["Origin"] == "Japan"].groupby("Cylinders").mean()["MilesPerGal"] - ) + externalValidation = df[df["Origin"] == "Japan"].groupby("Cylinders").mean()["MilesPerGal"] assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0 assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0 - assert ( - result[result["Cylinders"] == 3]["MilesPerGal"].values[0] - == externalValidation[3] - ) - assert ( - result[result["Cylinders"] == 4]["MilesPerGal"].values[0] - == externalValidation[4] - ) - assert ( - result[result["Cylinders"] == 6]["MilesPerGal"].values[0] - == externalValidation[6] - ) + assert result[result["Cylinders"] == 3]["MilesPerGal"].values[0] == externalValidation[3] + assert result[result["Cylinders"] == 4]["MilesPerGal"].values[0] == externalValidation[4] + assert result[result["Cylinders"] == 6]["MilesPerGal"].values[0] == externalValidation[6] def test_exclude_attribute(): diff --git a/tests/test_interestingness.py b/tests/test_interestingness.py index a42766ce..d62b4b40 100644 --- a/tests/test_interestingness.py +++ b/tests/test_interestingness.py @@ -85,14 +85,12 @@ def test_interestingness_0_1_0(): for f in range(0, len(df.recommendation["Enhance"])): if ( df.recommendation["Enhance"][f].mark == "scatter" - and df.recommendation["Enhance"][f]._inferred_intent[1].attribute - == "Weight" + and df.recommendation["Enhance"][f]._inferred_intent[1].attribute == "Weight" ): rank1 = f if ( df.recommendation["Enhance"][f].mark == "scatter" - and df.recommendation["Enhance"][f]._inferred_intent[1].attribute - == "Acceleration" + and df.recommendation["Enhance"][f]._inferred_intent[1].attribute == "Acceleration" ): rank2 = f if ( @@ -181,20 +179,17 @@ def test_interestingness_1_1_1(): for f in range(0, len(df.recommendation["Enhance"])): if ( str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" - and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) - == "Cylinders" + and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) == "Cylinders" ): rank1 = f if ( str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" - and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) - == "Weight" + and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) == "Weight" ): rank2 = f if ( str(df.recommendation["Enhance"][f]._inferred_intent[2].value) == "USA" - and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) - == "Horsepower" + and str(df.recommendation["Enhance"][f]._inferred_intent[1].attribute) == "Horsepower" ): rank3 = f assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 @@ -224,9 +219,7 @@ def test_interestingness_0_2_0(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") - df.set_intent( - [lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Acceleration")] - ) + df.set_intent([lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Acceleration")]) df._repr_html_() # check that top recommended enhance graph score is not none and that ordering makes intuitive sense assert interestingness(df.recommendation["Enhance"][0], df) != None @@ -235,14 +228,12 @@ def test_interestingness_0_2_0(): rank3 = -1 for f in range(0, len(df.recommendation["Enhance"])): if ( - str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) - == "Origin" + str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) == "Origin" and str(df.recommendation["Enhance"][f].mark) == "scatter" ): rank1 = f if ( - str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) - == "Displacement" + str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute) == "Displacement" and str(df.recommendation["Enhance"][f].mark) == "scatter" ): rank2 = f diff --git a/tests/test_maintainence.py b/tests/test_maintainence.py index 35f4ec71..1c2137ca 100644 --- a/tests/test_maintainence.py +++ b/tests/test_maintainence.py @@ -40,9 +40,7 @@ def test_metadata_inplace_operation(): df._repr_html_() assert df._metadata_fresh == True, "Failed to maintain metadata after display df" df.dropna(inplace=True) - assert ( - df._metadata_fresh == False - ), "Failed to expire metadata after in-place Pandas operation" + assert df._metadata_fresh == False, "Failed to expire metadata after in-place Pandas operation" def test_metadata_new_df_operation(): @@ -64,9 +62,7 @@ def test_metadata_column_group_reset_df(): assert not hasattr(result, "_metadata_fresh") # Note that this should trigger two compute metadata (one for df, and one for an intermediate df.reset_index used to feed inside created Vis) result._repr_html_() - assert ( - result._metadata_fresh == True - ), "Failed to maintain metadata after display df" + assert result._metadata_fresh == True, "Failed to maintain metadata after display df" colgroup_recs = result.recommendation["Column Groups"] assert len(colgroup_recs) == 5 @@ -81,9 +77,7 @@ def test_recs_inplace_operation(): assert len(df.recommendation["Occurrence"]) == 4 df.drop(columns=["Name"], inplace=True) assert "Name" not in df.columns, "Failed to perform `drop` operation in-place" - assert ( - df._recs_fresh == False - ), "Failed to maintain recommendation after in-place Pandas operation" + assert df._recs_fresh == False, "Failed to maintain recommendation after in-place Pandas operation" df._repr_html_() assert len(df.recommendation["Occurrence"]) == 3 assert df._recs_fresh == True, "Failed to maintain recommendation after display df" diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index ad5008de..d5ebfeb3 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -126,9 +126,7 @@ def test_concat(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") - new_df = pd.concat( - [df.loc[:, "Name":"Cylinders"], df.loc[:, "Year":"Origin"]], axis="columns" - ) + new_df = pd.concat([df.loc[:, "Name":"Cylinders"], df.loc[:, "Year":"Origin"]], axis="columns") new_df._repr_html_() assert list(new_df.recommendation.keys()) == [ "Distribution", @@ -156,9 +154,7 @@ def test_qcut(): def test_cut(): df = pd.read_csv("lux/data/car.csv") - df["Weight"] = pd.cut( - df["Weight"], bins=[0, 2500, 7500, 10000], labels=["small", "medium", "large"] - ) + df["Weight"] = pd.cut(df["Weight"], bins=[0, 2500, 7500, 10000], labels=["small", "medium", "large"]) df._repr_html_() @@ -371,9 +367,7 @@ def test_loc(): assert len(new_df.cardinality) == 2 import numpy as np - inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg( - np.mean - ) + inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg(np.mean) new_df = inter_df.loc["chevrolet":"fiat", "Acceleration":"Weight"] new_df._repr_html_() assert list(new_df.recommendation.keys()) == ["Column Groups"] @@ -402,9 +396,7 @@ def test_iloc(): assert len(new_df.cardinality) == 2 import numpy as np - inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg( - np.mean - ) + inter_df = df.groupby("Brand")[["Acceleration", "Weight", "Horsepower"]].agg(np.mean) new_df = inter_df.iloc[5:10, 0:2] new_df._repr_html_() assert list(new_df.recommendation.keys()) == ["Column Groups"] @@ -486,9 +478,7 @@ def test_df_to_series(): df._repr_html_() # compute metadata assert df.cardinality is not None series = df["Weight"] - assert isinstance( - series, lux.core.series.LuxSeries - ), "Derived series is type LuxSeries." + assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries." df["Weight"]._metadata assert df["Weight"]._metadata == [ "_intent", @@ -509,12 +499,8 @@ def test_df_to_series(): "_history", "_saved_export", ], "Metadata is lost when going from Dataframe to Series." - assert ( - df.cardinality is not None - ), "Metadata is lost when going from Dataframe to Series." - assert ( - series.name == "Weight" - ), "Pandas Series original `name` property not retained." + assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series." + assert series.name == "Weight", "Pandas Series original `name` property not retained." def test_value_counts(): @@ -523,9 +509,7 @@ def test_value_counts(): assert df.cardinality is not None series = df["Weight"] series.value_counts() - assert isinstance( - series, lux.core.series.LuxSeries - ), "Derived series is type LuxSeries." + assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries." assert df["Weight"]._metadata == [ "_intent", "data_type_lookup", @@ -545,12 +529,8 @@ def test_value_counts(): "_history", "_saved_export", ], "Metadata is lost when going from Dataframe to Series." - assert ( - df.cardinality is not None - ), "Metadata is lost when going from Dataframe to Series." - assert ( - series.name == "Weight" - ), "Pandas Series original `name` property not retained." + assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series." + assert series.name == "Weight", "Pandas Series original `name` property not retained." def test_str_replace(): @@ -558,9 +538,7 @@ def test_str_replace(): df._repr_html_() # compute metadata assert df.cardinality is not None series = df["Brand"].str.replace("chevrolet", "chevy") - assert isinstance( - series, lux.core.series.LuxSeries - ), "Derived series is type LuxSeries." + assert isinstance(series, lux.core.series.LuxSeries), "Derived series is type LuxSeries." assert df["Brand"]._metadata == [ "_intent", "data_type_lookup", @@ -580,9 +558,5 @@ def test_str_replace(): "_history", "_saved_export", ], "Metadata is lost when going from Dataframe to Series." - assert ( - df.cardinality is not None - ), "Metadata is lost when going from Dataframe to Series." - assert ( - series.name == "Brand" - ), "Pandas Series original `name` property not retained." + assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series." + assert series.name == "Brand", "Pandas Series original `name` property not retained." diff --git a/tests/test_type.py b/tests/test_type.py index f71766c0..a531fe3a 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -45,9 +45,7 @@ def test_check_int_id(): def test_check_str_id(): - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/churn.csv?raw=true" - ) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/churn.csv?raw=true") df._repr_html_() assert ( "customerID is not visualized since it resembles an ID field." @@ -56,9 +54,9 @@ def test_check_str_id(): def test_check_hpi(): - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/hpi.csv?raw=true" - ).head(10) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/hpi.csv?raw=true").head( + 10 + ) df.maintain_metadata() @@ -80,9 +78,7 @@ def test_check_hpi(): def test_check_airbnb(): - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/airbnb_nyc.csv?raw=true" - ) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/airbnb_nyc.csv?raw=true") df.maintain_metadata() assert df.data_type_lookup == { "id": "id", diff --git a/tests/test_vis.py b/tests/test_vis.py index ff3b6f63..bf1879fd 100644 --- a/tests/test_vis.py +++ b/tests/test_vis.py @@ -20,9 +20,7 @@ def test_vis(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vis = Vis(["Height", "SportType=Ball"], df) assert vis.get_attr_by_attr_name("Height")[0].bin_size != 0 @@ -30,9 +28,7 @@ def test_vis(): def test_vis_set_specs(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vis = Vis(["Height", "SportType=Ball"], df) vis.set_intent(["Height", "SportType=Ice"]) @@ -40,14 +36,10 @@ def test_vis_set_specs(): def test_vis_collection(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vlist = VisList(["Height", "SportType=Ball", "?"], df) - vis_with_year = list( - filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist) - )[0] + vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute assert len(vlist) == len(df.columns) - 1 - 1 @@ -56,9 +48,7 @@ def test_vis_collection(): def test_vis_collection_set_intent(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) vlist = VisList(["Height", "SportType=Ice", "?"], df) vlist.set_intent(["Height", "SportType=Boat", "?"]) @@ -100,17 +90,13 @@ def test_remove_identity(): vis = Vis(["Horsepower", "Horsepower"], df) vis.remove_column_from_spec("Horsepower", remove_first=True) assert len(vis._inferred_intent) == 1, "Remove only 1 instances of Horsepower" - assert ( - vis._inferred_intent[0].attribute == "Horsepower" - ), "Remove only 1 instances of Horsepower" + assert vis._inferred_intent[0].attribute == "Horsepower", "Remove only 1 instances of Horsepower" def test_refresh_collection(): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") - df.set_intent( - [lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")] - ) + df.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) df._repr_html_() enhanceCollection = df.recommendation["Enhance"] enhanceCollection.refresh_source(df[df["Origin"] == "USA"]) @@ -136,9 +122,7 @@ def test_vis_custom_aggregation_as_numpy_func(): def test_vis_collection_via_list_of_vis(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") @@ -177,15 +161,12 @@ def test_vis_to_Altair_standalone(): assert ( "chart = alt.Chart(pd.DataFrame({'Weight': {0: 3504, 1: 3693, 2: 3436, 3: 3433, 4: 3449, 5: 43" in code - or "alt.Chart(pd.DataFrame({'Horsepower': {0: 130, 1: 165, 2: 150, 3: 150, 4: 140," - in code + or "alt.Chart(pd.DataFrame({'Horsepower': {0: 130, 1: 165, 2: 150, 3: 150, 4: 140," in code ) def test_vis_list_custom_title_override(): - url = ( - "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - ) + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" df = pd.read_csv(url) df["Year"] = pd.to_datetime(df["Year"], format="%Y") From d43dab9e4b9d8abb9bb1091f91dac3fb1e90dc9b Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 14:04:01 -0800 Subject: [PATCH 14/22] executor --- lux/executor/PandasExecutor.py | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 6c5c0da2..144ad3a7 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -247,9 +247,8 @@ def execute_binning(vis: Vis): bin_attribute = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0] if not np.isnan(vis.data[bin_attribute.attribute]).all(): - series = vis.data[ - bin_attribute.attribute - ].dropna() # np.histogram breaks if array contain NaN + # np.histogram breaks if array contain NaN + series = vis.data[bin_attribute.attribute].dropna() # TODO:binning runs for name attribte. Name attribute has datatype quantitative which is wrong. counts, bin_edges = np.histogram(series, bins=bin_attribute.bin_size) # bin_edges of size N+1, so need to compute bin_center as the bin location @@ -319,13 +318,8 @@ def execute_2D_binning(vis: Vis): x_attr = vis.get_attr_by_channel("x")[0].attribute y_attr = vis.get_attr_by_channel("y")[0].attribute -<<<<<<< HEAD - vis._vis_data.loc[:, "xBin"] = pd.cut(vis._vis_data[x_attr.attribute], bins=40) - vis._vis_data.loc[:, "yBin"] = pd.cut(vis._vis_data[y_attr.attribute], bins=40) -======= vis._vis_data["xBin"] = pd.cut(vis._vis_data[x_attr], bins=40) vis._vis_data["yBin"] = pd.cut(vis._vis_data[y_attr], bins=40) ->>>>>>> af0043a3619eac15e962a4270f86f47affa5f126 color_attr = vis.get_attr_by_channel("color") if len(color_attr) > 0: @@ -352,19 +346,11 @@ def execute_2D_binning(vis: Vis): result = result[result["count"] != 0] # convert type to facilitate weighted correlation interestingess calculation -<<<<<<< HEAD - result.loc[:, "xBinStart"] = result["xBin"].apply(lambda x: x.left).astype("float") - result.loc[:, "xBinEnd"] = result["xBin"].apply(lambda x: x.right) - - result.loc[:, "yBinStart"] = result["yBin"].apply(lambda x: x.left).astype("float") - result.loc[:, "yBinEnd"] = result["yBin"].apply(lambda x: x.right) -======= result["xBinStart"] = result["xBin"].apply(lambda x: x.left).astype("float") result["xBinEnd"] = result["xBin"].apply(lambda x: x.right) result["yBinStart"] = result["yBin"].apply(lambda x: x.left).astype("float") result["yBinEnd"] = result["yBin"].apply(lambda x: x.right) ->>>>>>> af0043a3619eac15e962a4270f86f47affa5f126 vis._vis_data = result.drop(columns=["xBin", "yBin"]) @@ -407,9 +393,8 @@ def compute_data_type(self, ldf: LuxDataFrame): ldf.data_type_lookup[attr] = "id" else: ldf.data_type_lookup[attr] = "nominal" - elif is_datetime_series( - ldf.dtypes[attr] - ): # check if attribute is any type of datetime dtype + # check if attribute is any type of datetime dtype + elif is_datetime_series(ldf.dtypes[attr]): ldf.data_type_lookup[attr] = "temporal" else: ldf.data_type_lookup[attr] = "nominal" From 104c365ab3351be821638e9ada71a9073ea48f6a Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 14:11:41 -0800 Subject: [PATCH 15/22] interestingness --- lux/interestingness/interestingness.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lux/interestingness/interestingness.py b/lux/interestingness/interestingness.py index f70a658b..bc6fcbb3 100644 --- a/lux/interestingness/interestingness.py +++ b/lux/interestingness/interestingness.py @@ -213,9 +213,8 @@ def deviation_from_overall(vis: Vis, ldf: LuxDataFrame, filter_specs: list, msr_ import copy unfiltered_vis = copy.copy(vis) - unfiltered_vis._inferred_intent = utils.get_attrs_specs( - vis._inferred_intent - ) # Remove filters, keep only attribute intent + # Remove filters, keep only attribute intent + unfiltered_vis._inferred_intent = utils.get_attrs_specs(vis._inferred_intent) ldf.executor.execute([unfiltered_vis], ldf) v = unfiltered_vis.data[msr_attribute] From 41306c3d59ccae110e2f8dbae0e440380bf0d3f3 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 14:30:07 -0800 Subject: [PATCH 16/22] processor --- lux/processor/Compiler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lux/processor/Compiler.py b/lux/processor/Compiler.py index aa02af6d..d5558f34 100644 --- a/lux/processor/Compiler.py +++ b/lux/processor/Compiler.py @@ -378,8 +378,9 @@ def enforce_specified_channel(vis: Vis, auto_channel: Dict[str, str]): # remove the specified channel from auto_channel (matching by value, since channel key may not be same) for i in list(auto_channel.keys()): # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name) - if (auto_channel[i].attribute == sAttr[0].attribute) and ( - auto_channel[i].channel == sVal + if ( + auto_channel[i].attribute == sAttr[0].attribute + and auto_channel[i].channel == sVal ): auto_channel.pop(i) break From a466a08ca0118a464020f66c0f37c63a0a69b172 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Sat, 14 Nov 2020 14:44:35 -0800 Subject: [PATCH 17/22] vislib --- lux/vislib/altair/AltairRenderer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lux/vislib/altair/AltairRenderer.py b/lux/vislib/altair/AltairRenderer.py index 1d10aeb0..110ea0c8 100644 --- a/lux/vislib/altair/AltairRenderer.py +++ b/lux/vislib/altair/AltairRenderer.py @@ -119,7 +119,6 @@ def create_vis(self, vis, standalone=True): f"pd.DataFrame({str(vis.data.to_dict())})", ) else: - chart.code = chart.code.replace( - "placeholder_variable", found_variable - ) # TODO: Placeholder (need to read dynamically via locals()) + # TODO: Placeholder (need to read dynamically via locals()) + chart.code = chart.code.replace("placeholder_variable", found_variable) return chart.code From a702ab1ad74e01b8016e4d5e426fc2d73c0cacf1 Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 16 Nov 2020 02:28:44 -0800 Subject: [PATCH 18/22] tests, travis, CONTRIBUTING --- .travis.yml | 2 +- CONTRIBUTING.md | 2 +- tests/test_executor.py | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 98bde1cf..6dfca243 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,7 +7,7 @@ install: - pip install git+https://github.com/lux-org/lux-widget # command to run tests script: - - black --target-version py37 --check . + - black --target-version py37 --line-length 105 --check . - python -m pytest tests/*.py - pytest --cov-report term --cov=lux tests/ after_success: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ac05767b..a241410a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -46,7 +46,7 @@ python -m pytest tests/*.py ``` # Code Formatting -In order to keep our codebase clean and readible, we are using PEP8 guidelines. To help us maintain and check code style, we are using [black](https://github.com/psf/black). Simply run `black .` before commiting. Failure to do so may fail the tests run on Travis. This package should have been installed for you. +In order to keep our codebase clean and readible, we are using PEP8 guidelines. To help us maintain and check code style, we are using [black](https://github.com/psf/black). Simply run `black --line-length 105 .` before commiting. Failure to do so may fail the tests run on Travis. This package should have been installed for you. # Submitting a Pull Request diff --git a/tests/test_executor.py b/tests/test_executor.py index d4a05d01..268243f0 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -88,9 +88,8 @@ def test_colored_bar_chart(): color_cardinality = len(df.unique_values["Cylinders"]) group_by_cardinality = len(df.unique_values["Origin"]) assert len(new_vis.data.columns) == 3 - assert ( - len(new_vis.data) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality - ) # Not color_cardinality*group_by_cardinality since some combinations have 0 values + # Not color_cardinality*group_by_cardinality since some combinations have 0 values + assert len(new_vis.data) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality def test_colored_line_chart(): From eccb8e4d05256792cdd392f2875c8bbb1a3995de Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 16 Nov 2020 03:02:51 -0800 Subject: [PATCH 19/22] .format () changed --- lux/core/frame.py | 12 ++++----- lux/executor/SQLExecutor.py | 49 ++++++------------------------------- 2 files changed, 14 insertions(+), 47 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 080c0294..47748a77 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -364,8 +364,8 @@ def get_SQL_attributes(self): table_name = self.table_name[self.table_name.index(".") + 1 :] else: table_name = self.table_name - attr_query = "SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{}'".format( - table_name + attr_query = ( + f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'" ) attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) for attr in attributes: @@ -375,7 +375,7 @@ def get_SQL_cardinality(self): cardinality = {} for attr in list(self.columns): card_query = pd.read_sql( - "SELECT Count(Distinct({})) FROM {}".format(attr, self.table_name), + f"SELECT Count(Distinct({attr})) FROM {self.table_name}", self.SQLconnection, ) cardinality[attr] = list(card_query["count"])[0] @@ -385,7 +385,7 @@ def get_SQL_unique_values(self): unique_vals = {} for attr in list(self.columns): unique_query = pd.read_sql( - "SELECT Distinct({}) FROM {}".format(attr, self.table_name), + f"SELECT Distinct({attr}) FROM {self.table_name}", self.SQLconnection, ) unique_vals[attr] = list(unique_query[attr]) @@ -401,8 +401,8 @@ def compute_SQL_data_type(self): table_name = self.table_name # get the data types of the attributes in the SQL table for attr in list(self.columns): - datatype_query = "SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{}' AND COLUMN_NAME = '{}'".format( - table_name, attr + datatype_query = ( + f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'", ) datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0] sql_dtypes[attr] = datatype diff --git a/lux/executor/SQLExecutor.py b/lux/executor/SQLExecutor.py index c3978975..05c608d1 100644 --- a/lux/executor/SQLExecutor.py +++ b/lux/executor/SQLExecutor.py @@ -60,18 +60,14 @@ def execute(vislist: VisList, ldf: LuxDataFrame): required_variables = ",".join(required_variables) row_count = list( pd.read_sql( - "SELECT COUNT(*) FROM {} {}".format(ldf.table_name, where_clause), + f"SELECT COUNT(*) FROM {ldf.table_name} {where_clause}", ldf.SQLconnection, )["count"] )[0] if row_count > 10000: - query = "SELECT {} FROM {} {} ORDER BY random() LIMIT 10000".format( - required_variables, ldf.table_name, where_clause - ) + query = f"SELECT {required_variables} FROM {ldf.table_name} {where_clause} ORDER BY random() LIMIT 10000" else: - query = "SELECT {} FROM {} {}".format( - required_variables, ldf.table_name, where_clause - ) + query = f"SELECT {required_variables} FROM {ldf.table_name} {where_clause}" data = pd.read_sql(query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(data) if vis.mark == "bar" or vis.mark == "line": @@ -100,13 +96,7 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame): # barchart case, need count data for each group if measure_attr.attribute == "Record": where_clause, filterVars = SQLExecutor.execute_filter(vis) - count_query = "SELECT {}, COUNT({}) FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - groupby_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + count_query = f"SELECT {groupby_attr.attribute}, COUNT({groupby_attr.attribute}) FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}" vis._vis_data = pd.read_sql(count_query, ldf.SQLconnection) vis._vis_data = vis.data.rename(columns={"count": "Record"}) vis._vis_data = utils.pandas_to_lux(vis.data) @@ -114,36 +104,15 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame): else: where_clause, filterVars = SQLExecutor.execute_filter(vis) if agg_func == "mean": - mean_query = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = f"SELECT {groupby_attr.attribute}, AVG({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}" vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) if agg_func == "sum": - mean_query = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = f"SELECT {groupby_attr.attribute}, SUM({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}" vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) if agg_func == "max": - mean_query = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format( - groupby_attr.attribute, - measure_attr.attribute, - measure_attr.attribute, - ldf.table_name, - where_clause, - groupby_attr.attribute, - ) + mean_query = f"SELECT {groupby_attr.attribute}, MAX({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}" vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection) vis._vis_data = utils.pandas_to_lux(vis.data) @@ -181,9 +150,7 @@ def execute_binning(vis: Vis, ldf: LuxDataFrame): upper_edges.append(str(curr_edge)) upper_edges = ",".join(upper_edges) vis_filter, filter_vars = SQLExecutor.execute_filter(vis) - bin_count_query = "SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket({}, '{}') FROM {}) as Buckets GROUP BY width_bucket ORDER BY width_bucket".format( - bin_attribute.attribute, "{" + upper_edges + "}", ldf.table_name - ) + bin_count_query = f"SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket({bin_attribute.attribute}, '{{{upper_edges}}}') FROM {ldf.table_name}) as Buckets GROUP BY width_bucket ORDER BY width_bucket" bin_count_data = pd.read_sql(bin_count_query, ldf.SQLconnection) # counts,binEdges = np.histogram(ldf[bin_attribute.attribute],bins=bin_attribute.bin_size) From 15963436414682b0638879743398c46d15f38a6e Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 16 Nov 2020 03:10:49 -0800 Subject: [PATCH 20/22] replace tabs with escape chars --- lux/vislib/altair/AltairChart.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lux/vislib/altair/AltairChart.py b/lux/vislib/altair/AltairChart.py index f0ccb869..a069efeb 100644 --- a/lux/vislib/altair/AltairChart.py +++ b/lux/vislib/altair/AltairChart.py @@ -74,10 +74,10 @@ def apply_default_config(self): ) self.code += "chart = chart.configure_axis(titleFontWeight=500,titleFontSize=11,titleFont='Helvetica Neue',\n" self.code += ( - " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" + "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" ) self.code += "chart = chart.configure_legend(titleFontWeight=500,titleFontSize=10,titleFont='Helvetica Neue',\n" - self.code += " labelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" + self.code += "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" self.code += "chart = chart.properties(width=160,height=150)\n" def encode_color(self): From 8c3b2c16d8c5e2b055a6da4444592bb24007b24f Mon Sep 17 00:00:00 2001 From: Ujjaini Mukhopadhyay Date: Mon, 16 Nov 2020 03:26:06 -0800 Subject: [PATCH 21/22] update using black --- lux/vislib/altair/AltairChart.py | 4 +--- tests/test_type.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/lux/vislib/altair/AltairChart.py b/lux/vislib/altair/AltairChart.py index a069efeb..de4830f7 100644 --- a/lux/vislib/altair/AltairChart.py +++ b/lux/vislib/altair/AltairChart.py @@ -73,9 +73,7 @@ def apply_default_config(self): "\nchart = chart.configure_title(fontWeight=500,fontSize=13,font='Helvetica Neue')\n" ) self.code += "chart = chart.configure_axis(titleFontWeight=500,titleFontSize=11,titleFont='Helvetica Neue',\n" - self.code += ( - "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" - ) + self.code += "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue',labelColor='#505050')\n" self.code += "chart = chart.configure_legend(titleFontWeight=500,titleFontSize=10,titleFont='Helvetica Neue',\n" self.code += "\t\t\t\t\tlabelFontWeight=400,labelFontSize=8,labelFont='Helvetica Neue')\n" self.code += "chart = chart.properties(width=160,height=150)\n" diff --git a/tests/test_type.py b/tests/test_type.py index 1937b26f..aa1b3b53 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -127,9 +127,7 @@ def test_check_datetime(): def test_check_stock(): - df = pd.read_csv( - "https://github.com/lux-org/lux-datasets/blob/master/data/stocks.csv?raw=true" - ) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/stocks.csv?raw=true") df.maintain_metadata() assert df.data_type_lookup == { "symbol": "nominal", From b468b07f4cd95fdac37199c7df59cdb08ff0c482 Mon Sep 17 00:00:00 2001 From: Doris Lee Date: Mon, 16 Nov 2020 21:00:30 +0800 Subject: [PATCH 22/22] more rewrites and merges into single line --- lux/core/frame.py | 12 ++++-------- lux/executor/PandasExecutor.py | 11 +++++------ lux/vislib/altair/BarChart.py | 2 +- tests/test_action.py | 3 +-- tests/test_performance.py | 3 +-- tests/test_vis.py | 15 +++++---------- 6 files changed, 17 insertions(+), 29 deletions(-) diff --git a/lux/core/frame.py b/lux/core/frame.py index 47748a77..3c6b3977 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -364,10 +364,8 @@ def get_SQL_attributes(self): table_name = self.table_name[self.table_name.index(".") + 1 :] else: table_name = self.table_name - attr_query = ( - f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'" - ) - attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) + query = f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'" + attributes = list(pd.read_sql(query, self.SQLconnection)["column_name"]) for attr in attributes: self[attr] = None @@ -401,10 +399,8 @@ def compute_SQL_data_type(self): table_name = self.table_name # get the data types of the attributes in the SQL table for attr in list(self.columns): - datatype_query = ( - f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'", - ) - datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0] + query = f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'" + datatype = list(pd.read_sql(query, self.SQLconnection)["data_type"])[0] sql_dtypes[attr] = datatype data_type = {"quantitative": [], "nominal": [], "temporal": []} diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 6a41379d..a73e607b 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -246,18 +246,17 @@ def execute_binning(vis: Vis): import numpy as np bin_attribute = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0] - if not np.isnan(vis.data[bin_attribute.attribute]).all(): + bin_attr = bin_attribute.attribute + if not np.isnan(vis.data[bin_attr]).all(): # np.histogram breaks if array contain NaN - series = vis.data[bin_attribute.attribute].dropna() + series = vis.data[bin_attr].dropna() # TODO:binning runs for name attribte. Name attribute has datatype quantitative which is wrong. counts, bin_edges = np.histogram(series, bins=bin_attribute.bin_size) # bin_edges of size N+1, so need to compute bin_center as the bin location bin_center = np.mean(np.vstack([bin_edges[0:-1], bin_edges[1:]]), axis=0) # TODO: Should vis.data be a LuxDataFrame or a Pandas DataFrame? - vis._vis_data = pd.DataFrame( - np.array([bin_center, counts]).T, - columns=[bin_attribute.attribute, "Number of Records"], - ) + binned_result = np.array([bin_center, counts]).T + vis._vis_data = pd.DataFrame(binned_result, columns=[bin_attr, "Number of Records"]) @staticmethod def execute_filter(vis: Vis): diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py index 0550e590..99e9b1fd 100644 --- a/lux/vislib/altair/BarChart.py +++ b/lux/vislib/altair/BarChart.py @@ -67,8 +67,8 @@ def initialize_chart(self): type=x_attr.data_type, axis=alt.Axis(labelOverlap=True), ) - y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True))" + y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) y_attr_field_code = ( f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" ) diff --git a/tests/test_action.py b/tests/test_action.py index 5775c614..44337181 100644 --- a/tests/test_action.py +++ b/tests/test_action.py @@ -20,8 +20,7 @@ def test_vary_filter_val(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) df.set_intent_as_vis(vis) df._repr_html_() diff --git a/tests/test_performance.py b/tests/test_performance.py index a30b4cd2..66a9bd6b 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -20,8 +20,7 @@ # To run the script and see the printed result, run: # python -m pytest -s tests/test_performance.py def test_q1_performance_census(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true") tic = time.perf_counter() df._repr_html_() toc = time.perf_counter() diff --git a/tests/test_vis.py b/tests/test_vis.py index bf1879fd..122c1e3c 100644 --- a/tests/test_vis.py +++ b/tests/test_vis.py @@ -20,24 +20,21 @@ def test_vis(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) assert vis.get_attr_by_attr_name("Height")[0].bin_size != 0 assert vis.get_attr_by_attr_name("Record")[0].aggregation == "count" def test_vis_set_specs(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) vis.set_intent(["Height", "SportType=Ice"]) assert vis.get_attr_by_attr_name("SportType")[0].value == "Ice" def test_vis_collection(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vlist = VisList(["Height", "SportType=Ball", "?"], df) vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" @@ -48,8 +45,7 @@ def test_vis_collection(): def test_vis_collection_set_intent(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vlist = VisList(["Height", "SportType=Ice", "?"], df) vlist.set_intent(["Height", "SportType=Boat", "?"]) for v in vlist._collection: @@ -166,8 +162,7 @@ def test_vis_to_Altair_standalone(): def test_vis_list_custom_title_override(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") df["Year"] = pd.to_datetime(df["Year"], format="%Y") vcLst = []