From 939799f80e92ebc564ea2b409cf1342b148127d0 Mon Sep 17 00:00:00 2001 From: thyneb19 Date: Mon, 12 Apr 2021 10:25:43 -0700 Subject: [PATCH] Updating Lux Test Suite (#356) * Merging Recent SQL Executor changes * Fix to Validator Uses unique value metadata to verify if a value is valid * Fix Bug with Widget Rendering frame.py was trying to import luxWidget instead of luxwidget * Added Number of Observations to MetaData, Fixed Interestingness issue with SQL Executor Some interestingness functions required the number of observations in the data and visualization, so I added these values to the metadata to make the scoring work when using the SQL executor Added tests for SQL executor * Re-added Licensing Headers * Adding Recent frame.py changes * Adjusted SQL Executor Tests Removed lines that changed Year column type to datetime * Update Frame with new Action Registering * Resolving Conflicts in frame.py * Commenting out local SQL Executor tests SQL Executor tests interfering with travis build, commenting out for now * Update correlation.py * Update frame.py * Fixing Code Format * Cleaning up Pandas Executor imports * Fix Validation Bug Issue where validator was relying on metadata which was not yet generated, moved metadata calculation before validation step in frame.py * Changed metadata variable name Renamed num_obs to length, removed ordinal variable from Executor mapping function * Moving Current SQL Executor changes to new branch (#119) * Merging Recent SQL Executor changes * Fix to Validator Uses unique value metadata to verify if a value is valid * Fix Bug with Widget Rendering frame.py was trying to import luxWidget instead of luxwidget * Added Number of Observations to MetaData, Fixed Interestingness issue with SQL Executor Some interestingness functions required the number of observations in the data and visualization, so I added these values to the metadata to make the scoring work when using the SQL executor Added tests for SQL executor * Re-added Licensing Headers * Adding Recent frame.py changes * Adjusted SQL Executor Tests Removed lines that changed Year column type to datetime * Update Frame with new Action Registering * Resolving Conflicts in frame.py * Commenting out local SQL Executor tests SQL Executor tests interfering with travis build, commenting out for now * Update correlation.py * Update frame.py * Fixing Code Format * Cleaning up Pandas Executor imports * Fix Validation Bug Issue where validator was relying on metadata which was not yet generated, moved metadata calculation before validation step in frame.py * Changed metadata variable name Renamed num_obs to length, removed ordinal variable from Executor mapping function Co-authored-by: 19thyneb Co-authored-by: Doris Lee * Added script to generate Postgresql database Updated travis.yml file to create postgresql database in test instance. Added script to populate test database with data. * Update upload_car_data.py Updated database credentials * Updated script name in travis.yml * Removed unnecessary import from travis.yml * Added psycopg2 to requirements.txt * Creating Postgres test database in travis * Fixed directory issue * Added test environment for Postgresql Executor (#124) * Merging Recent SQL Executor changes * Fix to Validator Uses unique value metadata to verify if a value is valid * Fix Bug with Widget Rendering frame.py was trying to import luxWidget instead of luxwidget * Added Number of Observations to MetaData, Fixed Interestingness issue with SQL Executor Some interestingness functions required the number of observations in the data and visualization, so I added these values to the metadata to make the scoring work when using the SQL executor Added tests for SQL executor * Re-added Licensing Headers * Adding Recent frame.py changes * Adjusted SQL Executor Tests Removed lines that changed Year column type to datetime * Update Frame with new Action Registering * Resolving Conflicts in frame.py * Commenting out local SQL Executor tests SQL Executor tests interfering with travis build, commenting out for now * Update correlation.py * Update frame.py * Fixing Code Format * Cleaning up Pandas Executor imports * Fix Validation Bug Issue where validator was relying on metadata which was not yet generated, moved metadata calculation before validation step in frame.py * Changed metadata variable name Renamed num_obs to length, removed ordinal variable from Executor mapping function * Added script to generate Postgresql database Updated travis.yml file to create postgresql database in test instance. Added script to populate test database with data. * Update upload_car_data.py Updated database credentials * Updated script name in travis.yml * Removed unnecessary import from travis.yml * Added psycopg2 to requirements.txt * Creating Postgres test database in travis * Fixed directory issue Co-authored-by: 19thyneb Co-authored-by: Doris Lee * Updated SQL Executor Tests Added tests for basic SQL Executor functionality. * Added sql_executor example notebook, minor bug fix Added an example notebook to showcase how to use the sql-engine. Fixed variable reference in interestingness.py that was causing issues. * Cleaned SQL Executor Example Notebook restarted kernel and cleared output * Update custom action reference to executor Now uses executor tied to the dataframe for execution * Added example notebook, fixed variable reference (#130) * Merging Recent SQL Executor changes * Fix to Validator Uses unique value metadata to verify if a value is valid * Fix Bug with Widget Rendering frame.py was trying to import luxWidget instead of luxwidget * Added Number of Observations to MetaData, Fixed Interestingness issue with SQL Executor Some interestingness functions required the number of observations in the data and visualization, so I added these values to the metadata to make the scoring work when using the SQL executor Added tests for SQL executor * Re-added Licensing Headers * Adding Recent frame.py changes * Adjusted SQL Executor Tests Removed lines that changed Year column type to datetime * Update Frame with new Action Registering * Resolving Conflicts in frame.py * Commenting out local SQL Executor tests SQL Executor tests interfering with travis build, commenting out for now * Update correlation.py * Update frame.py * Fixing Code Format * Cleaning up Pandas Executor imports * Fix Validation Bug Issue where validator was relying on metadata which was not yet generated, moved metadata calculation before validation step in frame.py * Changed metadata variable name Renamed num_obs to length, removed ordinal variable from Executor mapping function * Added script to generate Postgresql database Updated travis.yml file to create postgresql database in test instance. Added script to populate test database with data. * Update upload_car_data.py Updated database credentials * Updated script name in travis.yml * Removed unnecessary import from travis.yml * Added psycopg2 to requirements.txt * Creating Postgres test database in travis * Fixed directory issue * Updated SQL Executor Tests Added tests for basic SQL Executor functionality. * Added sql_executor example notebook, minor bug fix Added an example notebook to showcase how to use the sql-engine. Fixed variable reference in interestingness.py that was causing issues. * Cleaned SQL Executor Example Notebook restarted kernel and cleared output * Update custom action reference to executor Now uses executor tied to the dataframe for execution Co-authored-by: 19thyneb Co-authored-by: Doris Lee * Updated Tests, Added benchmarking for SQL Executor Updated Compiler and Interestingness tests to work for SQL executor. Updated SQL Executor to have some benchmarking code for tracking query performance. * Merge with upstream branch, added preliminary benchmarking code * Added 2D Binning functionality to SQL Executor added 2D binning to replace scatterplots when using SQL executor. * Updated 2D Binning Functionality Interestingness function now compatible with 2D binning with SQL Executor. Fixed issue where some datapoints in heatmap data were of string type instead of integer. * Added Heatmap generation to SQL Executor, Bug fix in PandasExecutor Added 2D Binning function to collect data for heatmaps in the SQL Executor and updated compiler test to reflect heatmap changes in SQL Executor Fixed issue within PandasExecutor where 2D binning function would not color heatmap using temporal variables * Updated Code Formatting with Black * Update Requirements to include psycopg2 * Update upload_car_data.py Updated to use newest car.csv file * Update Compiler tests to use correct test DB * Removed Benchmarking Code * Fixing Black Formatting * Updating SQL-Engine branch to main branch, Adding Heatmap Functionality to SQL Executor (#154) * Merging Recent SQL Executor changes * Fix to Validator Uses unique value metadata to verify if a value is valid * Fix Bug with Widget Rendering frame.py was trying to import luxWidget instead of luxwidget * Added Number of Observations to MetaData, Fixed Interestingness issue with SQL Executor Some interestingness functions required the number of observations in the data and visualization, so I added these values to the metadata to make the scoring work when using the SQL executor Added tests for SQL executor * Re-added Licensing Headers * Adding Recent frame.py changes * Adjusted SQL Executor Tests Removed lines that changed Year column type to datetime * Update Frame with new Action Registering * Resolving Conflicts in frame.py * Commenting out local SQL Executor tests SQL Executor tests interfering with travis build, commenting out for now * Update correlation.py * Update frame.py * bugfix: "number of remaining bars" text overcounts for colored bar charts * update number of bars calculation to account for when len(data) double counts * Fixing Code Format * Cleaning up Pandas Executor imports * Fix Validation Bug Issue where validator was relying on metadata which was not yet generated, moved metadata calculation before validation step in frame.py * Changed metadata variable name Renamed num_obs to length, removed ordinal variable from Executor mapping function * Adding support for setting intent on front end (#112) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * Make default_display a global setting (#121) * remove and register action functions * update changes inframe.py * update changes inframe.py * add documentation and changes * indentation and comments * new line * globally defined default display works with warning * no examples * add back space * new line * uncomment docstring Co-authored-by: Caitlyn Chen * Added script to generate Postgresql database Updated travis.yml file to create postgresql database in test instance. Added script to populate test database with data. * Update upload_car_data.py Updated database credentials * Updated script name in travis.yml * Removed unnecessary import from travis.yml * Added psycopg2 to requirements.txt * Creating Postgres test database in travis * Fixed directory issue * Updated SQL Executor Tests Added tests for basic SQL Executor functionality. * Update requirements.txt (#128) * basic scatterplot experiments * experiment results with manually binned heatmaps * experiment result * incorporated heatmap code into executor and renderer * additional experiments to evaluate scatter v.s. heatmap performance * experiment based on real estate and airbnb data * modified general sampling criteria, suppress SettingWithCopyWarning stemming from groupby .agg (#93) * decrease sampling parameter * change sampling strategy (above threshold keep 3/4 of data) * remove experiment dir * modified performance param * enforce lux-widget minimum version * update requirement.txt * separate dev and install requirements * replaced _exportedVisIdxs --> _selectedVisIdxs * bugfix: plot config error when current_vis is None * Added sql_executor example notebook, minor bug fix Added an example notebook to showcase how to use the sql-engine. Fixed variable reference in interestingness.py that was causing issues. * Add LuxSeries Implementation (#122) * add preliminary groupby fixes * preliminary LuxSeries implementation * add tests for new Series implementation * clean up the added code * minor code changes * fix issues with Vis with index * small fixes * remove comments * bugfix column group display empty Vis involving groupby index * bugfix Cylinders not showing up as bar charts Co-authored-by: Doris Lee * add black to travis (#127) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * fix doc failing from black format * Cleaned SQL Executor Example Notebook restarted kernel and cleared output * Update custom action reference to executor Now uses executor tied to the dataframe for execution * Updated Interestingness Tests (#133) * add black * update cars dataset and tests * Delete old dataset * Updated Interestingness Tests Updated tests to use the newly updated cars dataset * switch to local cars reference Co-authored-by: Kunal Agarwal Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: 19thyneb Co-authored-by: Doris Lee * fix broken link in docs * Updated Tests, Added benchmarking for SQL Executor Updated Compiler and Interestingness tests to work for SQL executor. Updated SQL Executor to have some benchmarking code for tracking query performance. * Merge with upstream branch, added preliminary benchmarking code * Better warning message for Vis and VisList (#135) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * adding warning message for Vis intents being > 1 * passes tests and intent < 3 * minor change to error message, added test * run black * accounted for more edge cases and hid traceback * fixed typo * added tests * format w/ black * ran black again * Update Vis.py minor readability changes Co-authored-by: Doris Lee * Pandas rewrite Performance optimizations (#136) * basic scatterplot experiments * experiment results with manually binned heatmaps * experiment result * incorporated heatmap code into executor and renderer * additional experiments to evaluate scatter v.s. heatmap performance * experiment based on real estate and airbnb data * modified general sampling criteria, suppress SettingWithCopyWarning stemming from groupby .agg (#93) * decrease sampling parameter * change sampling strategy (above threshold keep 3/4 of data) * remove experiment dir * modified performance param * enforce lux-widget minimum version * update requirement.txt * testing out modin (Recursion error) * create modin executor, all else in sync with master changes * rewrote .loc with column reference, speed up by 100x * replace agg("count") with .count() --> ~0.1ms speedup * run black * Added 2D Binning functionality to SQL Executor added 2D binning to replace scatterplots when using SQL executor. * Update README.md update slack link * Updated temporal detection and tests (#139) * Updated temporal detection and tests * Reformatted code with black * Update PandasExecutor.py * added stock date test Co-authored-by: Doris Lee * Fix Inline comments breaking to new lines (#137) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line Co-authored-by: Doris Lee * Improve warning message when values specified as attributes (#143) * Improve warning message when values specified as attributes (#142) * added test, ran black * bugfix test * Better warning message for Vis and VisList (#146) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * adding warning message for Vis intents being > 1 * passes tests and intent < 3 * minor change to error message, added test * run black * accounted for more edge cases and hid traceback * fixed typo * added tests * format w/ black * ran black again * Update Vis.py minor readability changes * added check and tests for Vis list and | syntax * ran black Co-authored-by: Doris Lee * Updated docs for JupyterLab (#148) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * updated install and faq * added install_lab.sh script * Global shared variable in test (#144) (#149) * using global shared variable in test (lux-org#144) * modified fixture scope as session, resolved dependency test cases * run black Co-authored-by: Doris Lee * Updated 2D Binning Functionality Interestingness function now compatible with 2D binning with SQL Executor. Fixed issue where some datapoints in heatmap data were of string type instead of integer. * Added Heatmap generation to SQL Executor, Bug fix in PandasExecutor Added 2D Binning function to collect data for heatmaps in the SQL Executor and updated compiler test to reflect heatmap changes in SQL Executor Fixed issue within PandasExecutor where 2D binning function would not color heatmap using temporal variables * Updated Code Formatting with Black * Update Requirements to include psycopg2 * Update upload_car_data.py Updated to use newest car.csv file * Update Compiler tests to use correct test DB * Removed Benchmarking Code * Fixing Black Formatting Co-authored-by: 19thyneb Co-authored-by: Doris Lee Co-authored-by: cjachekang <47467363+cjachekang@users.noreply.github.com> Co-authored-by: Caitlyn Chen Co-authored-by: Caitlyn Chen Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: jinimukh <46768380+jinimukh@users.noreply.github.com> Co-authored-by: Kunal Agarwal Co-authored-by: Jared Zhao Co-authored-by: Piyush Gupta * Moved Executor Parameters to Global Config * Black formatting * Moved table_name parameter to frame.py. Removed executor_type parameter executor_type parameter no longer necessary to maintain * Fixed reference to table_name parameter table_name is now a parameter within frame.py * Adjusted Functions to Set SQL Connection Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe * Update SQLExecutor name parameter * Merging master branch with sql engine. Moving executor parameters to config * Parameter Bug Fix Lux DataFrame current_vis should be an empty list after recs are expired. Executor name for small_df and very_small_df checks should be "PandasExecutor" * Reference Fix in Warning Executor check here should look for name "PandasExecutor" * Black Formatting * Black formatting * Fix Executor Reference Update current_vis() to reference lux.config.executor * Update frame.py * Moved set functions to global config * Cleaned up executor imports, Fixed issue in AltairRenderer Issue where Altair Renderer was not using lux.config.executor * Black formatting * Merged changes from Master branch, Moved Executor Parameters to Config (#163) * Merging Recent SQL Executor changes * Fix to Validator Uses unique value metadata to verify if a value is valid * Fix Bug with Widget Rendering frame.py was trying to import luxWidget instead of luxwidget * Added Number of Observations to MetaData, Fixed Interestingness issue with SQL Executor Some interestingness functions required the number of observations in the data and visualization, so I added these values to the metadata to make the scoring work when using the SQL executor Added tests for SQL executor * Re-added Licensing Headers * Adding Recent frame.py changes * Adjusted SQL Executor Tests Removed lines that changed Year column type to datetime * Update Frame with new Action Registering * Resolving Conflicts in frame.py * Commenting out local SQL Executor tests SQL Executor tests interfering with travis build, commenting out for now * Update correlation.py * Update frame.py * bugfix: "number of remaining bars" text overcounts for colored bar charts * update number of bars calculation to account for when len(data) double counts * Fixing Code Format * Cleaning up Pandas Executor imports * Fix Validation Bug Issue where validator was relying on metadata which was not yet generated, moved metadata calculation before validation step in frame.py * Changed metadata variable name Renamed num_obs to length, removed ordinal variable from Executor mapping function * Adding support for setting intent on front end (#112) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * Make default_display a global setting (#121) * remove and register action functions * update changes inframe.py * update changes inframe.py * add documentation and changes * indentation and comments * new line * globally defined default display works with warning * no examples * add back space * new line * uncomment docstring Co-authored-by: Caitlyn Chen * Added script to generate Postgresql database Updated travis.yml file to create postgresql database in test instance. Added script to populate test database with data. * Update upload_car_data.py Updated database credentials * Updated script name in travis.yml * Removed unnecessary import from travis.yml * Added psycopg2 to requirements.txt * Creating Postgres test database in travis * Fixed directory issue * Updated SQL Executor Tests Added tests for basic SQL Executor functionality. * Update requirements.txt (#128) * basic scatterplot experiments * experiment results with manually binned heatmaps * experiment result * incorporated heatmap code into executor and renderer * additional experiments to evaluate scatter v.s. heatmap performance * experiment based on real estate and airbnb data * modified general sampling criteria, suppress SettingWithCopyWarning stemming from groupby .agg (#93) * decrease sampling parameter * change sampling strategy (above threshold keep 3/4 of data) * remove experiment dir * modified performance param * enforce lux-widget minimum version * update requirement.txt * separate dev and install requirements * replaced _exportedVisIdxs --> _selectedVisIdxs * bugfix: plot config error when current_vis is None * Added sql_executor example notebook, minor bug fix Added an example notebook to showcase how to use the sql-engine. Fixed variable reference in interestingness.py that was causing issues. * Add LuxSeries Implementation (#122) * add preliminary groupby fixes * preliminary LuxSeries implementation * add tests for new Series implementation * clean up the added code * minor code changes * fix issues with Vis with index * small fixes * remove comments * bugfix column group display empty Vis involving groupby index * bugfix Cylinders not showing up as bar charts Co-authored-by: Doris Lee * add black to travis (#127) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * fix doc failing from black format * Cleaned SQL Executor Example Notebook restarted kernel and cleared output * Update custom action reference to executor Now uses executor tied to the dataframe for execution * Updated Interestingness Tests (#133) * add black * update cars dataset and tests * Delete old dataset * Updated Interestingness Tests Updated tests to use the newly updated cars dataset * switch to local cars reference Co-authored-by: Kunal Agarwal Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: 19thyneb Co-authored-by: Doris Lee * fix broken link in docs * Updated Tests, Added benchmarking for SQL Executor Updated Compiler and Interestingness tests to work for SQL executor. Updated SQL Executor to have some benchmarking code for tracking query performance. * Merge with upstream branch, added preliminary benchmarking code * Better warning message for Vis and VisList (#135) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * adding warning message for Vis intents being > 1 * passes tests and intent < 3 * minor change to error message, added test * run black * accounted for more edge cases and hid traceback * fixed typo * added tests * format w/ black * ran black again * Update Vis.py minor readability changes Co-authored-by: Doris Lee * Pandas rewrite Performance optimizations (#136) * basic scatterplot experiments * experiment results with manually binned heatmaps * experiment result * incorporated heatmap code into executor and renderer * additional experiments to evaluate scatter v.s. heatmap performance * experiment based on real estate and airbnb data * modified general sampling criteria, suppress SettingWithCopyWarning stemming from groupby .agg (#93) * decrease sampling parameter * change sampling strategy (above threshold keep 3/4 of data) * remove experiment dir * modified performance param * enforce lux-widget minimum version * update requirement.txt * testing out modin (Recursion error) * create modin executor, all else in sync with master changes * rewrote .loc with column reference, speed up by 100x * replace agg("count") with .count() --> ~0.1ms speedup * run black * Added 2D Binning functionality to SQL Executor added 2D binning to replace scatterplots when using SQL executor. * Update README.md update slack link * Updated temporal detection and tests (#139) * Updated temporal detection and tests * Reformatted code with black * Update PandasExecutor.py * added stock date test Co-authored-by: Doris Lee * Fix Inline comments breaking to new lines (#137) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line Co-authored-by: Doris Lee * Improve warning message when values specified as attributes (#143) * Improve warning message when values specified as attributes (#142) * added test, ran black * bugfix test * Better warning message for Vis and VisList (#146) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * adding warning message for Vis intents being > 1 * passes tests and intent < 3 * minor change to error message, added test * run black * accounted for more edge cases and hid traceback * fixed typo * added tests * format w/ black * ran black again * Update Vis.py minor readability changes * added check and tests for Vis list and | syntax * ran black Co-authored-by: Doris Lee * Updated docs for JupyterLab (#148) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * updated install and faq * added install_lab.sh script * Global shared variable in test (#144) (#149) * using global shared variable in test (lux-org#144) * modified fixture scope as session, resolved dependency test cases * run black Co-authored-by: Doris Lee * Updated 2D Binning Functionality Interestingness function now compatible with 2D binning with SQL Executor. Fixed issue where some datapoints in heatmap data were of string type instead of integer. * documentation for custom actions (#134) * documentation for custom actions * fix config tests * fix documentaion links * Add default display Config Class documentation * delete hpi * doc resolved changes Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee * Patch failing test (#134) * Patch failing test (#134) * Patch failing test (#134) * Added Heatmap generation to SQL Executor, Bug fix in PandasExecutor Added 2D Binning function to collect data for heatmaps in the SQL Executor and updated compiler test to reflect heatmap changes in SQL Executor Fixed issue within PandasExecutor where 2D binning function would not color heatmap using temporal variables * Updated Code Formatting with Black * Update Requirements to include psycopg2 * Update upload_car_data.py Updated to use newest car.csv file * Update Compiler tests to use correct test DB * Removed Benchmarking Code * Fixing Black Formatting * Add Toggle button for LuxSeries (#153) * add black * add groupby to history * add LuxSeries visualization implementation * remove groupby extending * run black * merge master into branch * run black * Reformat Warnings Labels (#151) * add format for warnings * fix small details * globally defined lux * fix formatting * Delete similarity.py * Rename similarity_old.py to similarity.py * fix commit * update master with changes to warning labels * fix formatting Co-authored-by: Caitlyn Chen * Make plot_config a global variable (#152) * plot_config made global config * docs and tests updates; black formatting * Delete Untitled.ipynb * cleanup * remove data encapsulation * black formatting * Update style.rst Co-authored-by: Doris Lee * Patch documentation (#134) * code cleanup * making LuxDataFrame pickle-able * various simplification and rewrite to Compiler and Vis * changing `render_VSpec` to `to_code` * bugfix in filter (list comprehension lazily evaluated) * Moved Executor Parameters to Global Config * Black formatting * Fix issue with read_json and add tests for different file types (#156) * fix read_json bug and add tests * convert tests to reference lux-datasets * run black * remove comments * Update __init__.py * Update test_pandas_coverage.py new_df --> df * fix tests to work with lux-datasets * fix init * remove lxml dependency * remove html test Co-authored-by: Doris Lee * Improved warning message stack trace for unexpected error (#145) * Moved table_name parameter to frame.py. Removed executor_type parameter executor_type parameter no longer necessary to maintain * Fixed reference to table_name parameter table_name is now a parameter within frame.py * bug fix patching #146 (#159) * remove sys.tracebacklimit to avoid `ERROR:root:Internal Python error in the inspect module` * changed SyntaxError to TypeError to reserved SyntaxError for Pandas native syntax problems * Ensure that recommendation and current_vis properties are accessible before df first printed (#159) * bugfix erased `name` dtype in LuxSeries (#140) * Adjusted Functions to Set SQL Connection Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe * Update Makefile and pyproject.toml (#160) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line * update pyproject.toml and makefile * updating contributing doc * Update CONTRIBUTING.md Co-authored-by: Doris Lee * Update README.md * Update SQLExecutor name parameter * Merging master branch with sql engine. Moving executor parameters to config * Parameter Bug Fix Lux DataFrame current_vis should be an empty list after recs are expired. Executor name for small_df and very_small_df checks should be "PandasExecutor" * Reference Fix in Warning Executor check here should look for name "PandasExecutor" * Black Formatting * Black formatting * Cleaned up executor imports, Fixed issue in AltairRenderer Issue where Altair Renderer was not using lux.config.executor * Black formatting Co-authored-by: 19thyneb Co-authored-by: Doris Lee Co-authored-by: cjachekang <47467363+cjachekang@users.noreply.github.com> Co-authored-by: Caitlyn Chen Co-authored-by: Caitlyn Chen Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: jinimukh <46768380+jinimukh@users.noreply.github.com> Co-authored-by: Kunal Agarwal Co-authored-by: Jared Zhao Co-authored-by: Piyush Gupta * Fixed Index Issue in Pandas Executor Issue caused when user sets an index. The Pandas Executor was not correctly renaming this new index column to Record in execute_aggregate() * Added tests for set_index functions * Black formatting * Update Pandas Executor to handle NA values Readded missing dropna parameter within execute_aggregate() groupby function call * Update to Config, and Compiler/Interestingness Tests Removed duplicate set_SQL_connection function from config. Updated tests to reflect these changes. Merged in recent changes in master branch. * Black formatting * Update Requirements.txt Updated to include numpy version requirement * Update to Sql-Engine (#190) * Merging Recent SQL Executor changes * Fix to Validator Uses unique value metadata to verify if a value is valid * Fix Bug with Widget Rendering frame.py was trying to import luxWidget instead of luxwidget * Added Number of Observations to MetaData, Fixed Interestingness issue with SQL Executor Some interestingness functions required the number of observations in the data and visualization, so I added these values to the metadata to make the scoring work when using the SQL executor Added tests for SQL executor * Re-added Licensing Headers * Adding Recent frame.py changes * Adjusted SQL Executor Tests Removed lines that changed Year column type to datetime * Update Frame with new Action Registering * Resolving Conflicts in frame.py * Commenting out local SQL Executor tests SQL Executor tests interfering with travis build, commenting out for now * Update correlation.py * Update frame.py * bugfix: "number of remaining bars" text overcounts for colored bar charts * update number of bars calculation to account for when len(data) double counts * Fixing Code Format * Cleaning up Pandas Executor imports * Fix Validation Bug Issue where validator was relying on metadata which was not yet generated, moved metadata calculation before validation step in frame.py * Changed metadata variable name Renamed num_obs to length, removed ordinal variable from Executor mapping function * Adding support for setting intent on front end (#112) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * Make default_display a global setting (#121) * remove and register action functions * update changes inframe.py * update changes inframe.py * add documentation and changes * indentation and comments * new line * globally defined default display works with warning * no examples * add back space * new line * uncomment docstring Co-authored-by: Caitlyn Chen * Added script to generate Postgresql database Updated travis.yml file to create postgresql database in test instance. Added script to populate test database with data. * Update upload_car_data.py Updated database credentials * Updated script name in travis.yml * Removed unnecessary import from travis.yml * Added psycopg2 to requirements.txt * Creating Postgres test database in travis * Fixed directory issue * Updated SQL Executor Tests Added tests for basic SQL Executor functionality. * Update requirements.txt (#128) * basic scatterplot experiments * experiment results with manually binned heatmaps * experiment result * incorporated heatmap code into executor and renderer * additional experiments to evaluate scatter v.s. heatmap performance * experiment based on real estate and airbnb data * modified general sampling criteria, suppress SettingWithCopyWarning stemming from groupby .agg (#93) * decrease sampling parameter * change sampling strategy (above threshold keep 3/4 of data) * remove experiment dir * modified performance param * enforce lux-widget minimum version * update requirement.txt * separate dev and install requirements * replaced _exportedVisIdxs --> _selectedVisIdxs * bugfix: plot config error when current_vis is None * Added sql_executor example notebook, minor bug fix Added an example notebook to showcase how to use the sql-engine. Fixed variable reference in interestingness.py that was causing issues. * Add LuxSeries Implementation (#122) * add preliminary groupby fixes * preliminary LuxSeries implementation * add tests for new Series implementation * clean up the added code * minor code changes * fix issues with Vis with index * small fixes * remove comments * bugfix column group display empty Vis involving groupby index * bugfix Cylinders not showing up as bar charts Co-authored-by: Doris Lee * add black to travis (#127) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * fix doc failing from black format * Cleaned SQL Executor Example Notebook restarted kernel and cleared output * Update custom action reference to executor Now uses executor tied to the dataframe for execution * Updated Interestingness Tests (#133) * add black * update cars dataset and tests * Delete old dataset * Updated Interestingness Tests Updated tests to use the newly updated cars dataset * switch to local cars reference Co-authored-by: Kunal Agarwal Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: 19thyneb Co-authored-by: Doris Lee * fix broken link in docs * Updated Tests, Added benchmarking for SQL Executor Updated Compiler and Interestingness tests to work for SQL executor. Updated SQL Executor to have some benchmarking code for tracking query performance. * Merge with upstream branch, added preliminary benchmarking code * Better warning message for Vis and VisList (#135) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * adding warning message for Vis intents being > 1 * passes tests and intent < 3 * minor change to error message, added test * run black * accounted for more edge cases and hid traceback * fixed typo * added tests * format w/ black * ran black again * Update Vis.py minor readability changes Co-authored-by: Doris Lee * Pandas rewrite Performance optimizations (#136) * basic scatterplot experiments * experiment results with manually binned heatmaps * experiment result * incorporated heatmap code into executor and renderer * additional experiments to evaluate scatter v.s. heatmap performance * experiment based on real estate and airbnb data * modified general sampling criteria, suppress SettingWithCopyWarning stemming from groupby .agg (#93) * decrease sampling parameter * change sampling strategy (above threshold keep 3/4 of data) * remove experiment dir * modified performance param * enforce lux-widget minimum version * update requirement.txt * testing out modin (Recursion error) * create modin executor, all else in sync with master changes * rewrote .loc with column reference, speed up by 100x * replace agg("count") with .count() --> ~0.1ms speedup * run black * Added 2D Binning functionality to SQL Executor added 2D binning to replace scatterplots when using SQL executor. * Update README.md update slack link * Updated temporal detection and tests (#139) * Updated temporal detection and tests * Reformatted code with black * Update PandasExecutor.py * added stock date test Co-authored-by: Doris Lee * Fix Inline comments breaking to new lines (#137) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line Co-authored-by: Doris Lee * Improve warning message when values specified as attributes (#143) * Improve warning message when values specified as attributes (#142) * added test, ran black * bugfix test * Better warning message for Vis and VisList (#146) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * adding warning message for Vis intents being > 1 * passes tests and intent < 3 * minor change to error message, added test * run black * accounted for more edge cases and hid traceback * fixed typo * added tests * format w/ black * ran black again * Update Vis.py minor readability changes * added check and tests for Vis list and | syntax * ran black Co-authored-by: Doris Lee * Updated docs for JupyterLab (#148) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * updated install and faq * added install_lab.sh script * Global shared variable in test (#144) (#149) * using global shared variable in test (lux-org#144) * modified fixture scope as session, resolved dependency test cases * run black Co-authored-by: Doris Lee * Updated 2D Binning Functionality Interestingness function now compatible with 2D binning with SQL Executor. Fixed issue where some datapoints in heatmap data were of string type instead of integer. * documentation for custom actions (#134) * documentation for custom actions * fix config tests * fix documentaion links * Add default display Config Class documentation * delete hpi * doc resolved changes Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee * Patch failing test (#134) * Patch failing test (#134) * Patch failing test (#134) * Added Heatmap generation to SQL Executor, Bug fix in PandasExecutor Added 2D Binning function to collect data for heatmaps in the SQL Executor and updated compiler test to reflect heatmap changes in SQL Executor Fixed issue within PandasExecutor where 2D binning function would not color heatmap using temporal variables * Updated Code Formatting with Black * Update Requirements to include psycopg2 * Update upload_car_data.py Updated to use newest car.csv file * Update Compiler tests to use correct test DB * Removed Benchmarking Code * Fixing Black Formatting * Add Toggle button for LuxSeries (#153) * add black * add groupby to history * add LuxSeries visualization implementation * remove groupby extending * run black * merge master into branch * run black * Reformat Warnings Labels (#151) * add format for warnings * fix small details * globally defined lux * fix formatting * Delete similarity.py * Rename similarity_old.py to similarity.py * fix commit * update master with changes to warning labels * fix formatting Co-authored-by: Caitlyn Chen * Make plot_config a global variable (#152) * plot_config made global config * docs and tests updates; black formatting * Delete Untitled.ipynb * cleanup * remove data encapsulation * black formatting * Update style.rst Co-authored-by: Doris Lee * Patch documentation (#134) * code cleanup * making LuxDataFrame pickle-able * various simplification and rewrite to Compiler and Vis * changing `render_VSpec` to `to_code` * bugfix in filter (list comprehension lazily evaluated) * Moved Executor Parameters to Global Config * Black formatting * Fix issue with read_json and add tests for different file types (#156) * fix read_json bug and add tests * convert tests to reference lux-datasets * run black * remove comments * Update __init__.py * Update test_pandas_coverage.py new_df --> df * fix tests to work with lux-datasets * fix init * remove lxml dependency * remove html test Co-authored-by: Doris Lee * Improved warning message stack trace for unexpected error (#145) * Moved table_name parameter to frame.py. Removed executor_type parameter executor_type parameter no longer necessary to maintain * Fixed reference to table_name parameter table_name is now a parameter within frame.py * bug fix patching #146 (#159) * remove sys.tracebacklimit to avoid `ERROR:root:Internal Python error in the inspect module` * changed SyntaxError to TypeError to reserved SyntaxError for Pandas native syntax problems * Ensure that recommendation and current_vis properties are accessible before df first printed (#159) * bugfix erased `name` dtype in LuxSeries (#140) * Adjusted Functions to Set SQL Connection Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe * Update Makefile and pyproject.toml (#160) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line * update pyproject.toml and makefile * updating contributing doc * Update CONTRIBUTING.md Co-authored-by: Doris Lee * Update README.md * Update SQLExecutor name parameter * Merging master branch with sql engine. Moving executor parameters to config * Parameter Bug Fix Lux DataFrame current_vis should be an empty list after recs are expired. Executor name for small_df and very_small_df checks should be "PandasExecutor" * Reference Fix in Warning Executor check here should look for name "PandasExecutor" * Black Formatting * Black formatting * bump version number and requirements * PATCH v0.2.1.1 * remove __future__ annotations dependency * remove channel inheritance in Enhance * bugfix 3-clause Vis example in 5-datetime.ipynb * update README * Fix Executor Reference Update current_vis() to reference lux.config.executor * Update frame.py * Moved set functions to global config * Cleaned up executor imports, Fixed issue in AltairRenderer Issue where Altair Renderer was not using lux.config.executor * Black formatting * Moved Executor Parameters to Global Config (#157) * Moved Executor Parameters to Global Config * Black formatting * Moved table_name parameter to frame.py. Removed executor_type parameter executor_type parameter no longer necessary to maintain * Fixed reference to table_name parameter table_name is now a parameter within frame.py * Adjusted Functions to Set SQL Connection Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe * Update SQLExecutor name parameter * Fix Executor Reference Update current_vis() to reference lux.config.executor * Update frame.py * Moved set functions to global config Co-authored-by: 19thyneb Co-authored-by: Doris Lee * save_as_html feature (#170) * note this requires unpkg depedency from latest npm release (for now download https://github.com/lux-org/lux-widget/blob/master/luxwidget/nbextension/static/index.js, rename it luxwidget.js and place it in the same directory as the exported HTML) * Updating documentation for lux-widget v0.1.2 release (#176) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * Updated readme * updated readthedoc documentation * added config to default display documentation * Update README.md Co-authored-by: Doris Lee * Fix bug caused by `groupby.agg` on column with many unique values (#174) * update export tutorial to add explanation for standalone argument * minor fixes and remove cell output in notebooks * added contributing doc * fix bugs and uncomment some tests * remove raise warning * remove unnecessary import * split up rename test into two parts * fix setting warning, fix data_type bugs and add relevant tests * remove ordinal data type * add test for small dataframe resetting index * add loc and iloc tests * fix attribute access directly to dataframe * add small changes to code * added test for qcut and cut * add check if dtype is Interval * added qcut test * fix Record KeyError * add tests * take care of reset_index case * small edits * add data_model to column_group Clause * small edits for row_group * fixes to row group Co-authored-by: Doris Lee * Update save_as_html docs (#170) * Update README *remove jupyter lab url * Update README.md fix slack link * Fixed Index Issue in Pandas Executor Issue caused when user sets an index. The Pandas Executor was not correctly renaming this new index column to Record in execute_aggregate() * Making Lux more robust with missing values and NaN (#179) (#180) * improve datetime warning message with starter templates * Handling NaN value errors * skipping validator check for NaN filter values * adding special case for PandasExecutor to map filter NaN to isna() * fixing unevenness metric when bar values are NaN * eliminate 1-cardinality filters in Filter action (since equal to overall) * fixed deviation array unequal bug when NaN * Handling NaN filter and data type * fixed data type detection when int coerced to float when containing NaN * added test for applying NaN filter * Ensure that LuxSeries displayed when there is NaN * ensure that NaNs are not dropped in groupbys * exclude NaN values in deviation calculation * fix unnamed series issue * improved debugging message for LuxSeries * Override pd.Series with LuxSeries * Fixes for type checking and line charts with NaNs * exclude NaN for line charts to prevent large axes offsetting * improved type checking for float no-longer NaN columns * fixed and improved deviation calculation test * added float categorical test * bump version and requirement * Added tests for set_index functions * Black formatting * Update Pandas Executor to handle NA values Readded missing dropna parameter within execute_aggregate() groupby function call * Update to Config, and Compiler/Interestingness Tests Removed duplicate set_SQL_connection function from config. Updated tests to reflect these changes. Merged in recent changes in master branch. * Black formatting * Update Requirements.txt Updated to include numpy version requirement Co-authored-by: 19thyneb Co-authored-by: Doris Lee Co-authored-by: cjachekang <47467363+cjachekang@users.noreply.github.com> Co-authored-by: Caitlyn Chen Co-authored-by: Caitlyn Chen Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: jinimukh <46768380+jinimukh@users.noreply.github.com> Co-authored-by: Kunal Agarwal Co-authored-by: Jared Zhao Co-authored-by: Piyush Gupta * Update SQL Executor Documentation Updated documentation in SQLExecutor.py. Updated example notebook for SQLExecutor * Updated SQLExecutor Example Notebook Updated example notebook for the SQL Executor and the script to upload the example dataset to a local Postgres database. * Black Formatting * Update to SQL Executor Example Notebook (#193) * Merging Recent SQL Executor changes * Fix to Validator Uses unique value metadata to verify if a value is valid * Fix Bug with Widget Rendering frame.py was trying to import luxWidget instead of luxwidget * Added Number of Observations to MetaData, Fixed Interestingness issue with SQL Executor Some interestingness functions required the number of observations in the data and visualization, so I added these values to the metadata to make the scoring work when using the SQL executor Added tests for SQL executor * Re-added Licensing Headers * Adding Recent frame.py changes * Adjusted SQL Executor Tests Removed lines that changed Year column type to datetime * Update Frame with new Action Registering * Resolving Conflicts in frame.py * Commenting out local SQL Executor tests SQL Executor tests interfering with travis build, commenting out for now * Update correlation.py * Update frame.py * bugfix: "number of remaining bars" text overcounts for colored bar charts * update number of bars calculation to account for when len(data) double counts * Fixing Code Format * Cleaning up Pandas Executor imports * Fix Validation Bug Issue where validator was relying on metadata which was not yet generated, moved metadata calculation before validation step in frame.py * Changed metadata variable name Renamed num_obs to length, removed ordinal variable from Executor mapping function * Adding support for setting intent on front end (#112) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * Make default_display a global setting (#121) * remove and register action functions * update changes inframe.py * update changes inframe.py * add documentation and changes * indentation and comments * new line * globally defined default display works with warning * no examples * add back space * new line * uncomment docstring Co-authored-by: Caitlyn Chen * Added script to generate Postgresql database Updated travis.yml file to create postgresql database in test instance. Added script to populate test database with data. * Update upload_car_data.py Updated database credentials * Updated script name in travis.yml * Removed unnecessary import from travis.yml * Added psycopg2 to requirements.txt * Creating Postgres test database in travis * Fixed directory issue * Updated SQL Executor Tests Added tests for basic SQL Executor functionality. * Update requirements.txt (#128) * basic scatterplot experiments * experiment results with manually binned heatmaps * experiment result * incorporated heatmap code into executor and renderer * additional experiments to evaluate scatter v.s. heatmap performance * experiment based on real estate and airbnb data * modified general sampling criteria, suppress SettingWithCopyWarning stemming from groupby .agg (#93) * decrease sampling parameter * change sampling strategy (above threshold keep 3/4 of data) * remove experiment dir * modified performance param * enforce lux-widget minimum version * update requirement.txt * separate dev and install requirements * replaced _exportedVisIdxs --> _selectedVisIdxs * bugfix: plot config error when current_vis is None * Added sql_executor example notebook, minor bug fix Added an example notebook to showcase how to use the sql-engine. Fixed variable reference in interestingness.py that was causing issues. * Add LuxSeries Implementation (#122) * add preliminary groupby fixes * preliminary LuxSeries implementation * add tests for new Series implementation * clean up the added code * minor code changes * fix issues with Vis with index * small fixes * remove comments * bugfix column group display empty Vis involving groupby index * bugfix Cylinders not showing up as bar charts Co-authored-by: Doris Lee * add black to travis (#127) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * fix doc failing from black format * Cleaned SQL Executor Example Notebook restarted kernel and cleared output * Update custom action reference to executor Now uses executor tied to the dataframe for execution * Updated Interestingness Tests (#133) * add black * update cars dataset and tests * Delete old dataset * Updated Interestingness Tests Updated tests to use the newly updated cars dataset * switch to local cars reference Co-authored-by: Kunal Agarwal Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: 19thyneb Co-authored-by: Doris Lee * fix broken link in docs * Updated Tests, Added benchmarking for SQL Executor Updated Compiler and Interestingness tests to work for SQL executor. Updated SQL Executor to have some benchmarking code for tracking query performance. * Merge with upstream branch, added preliminary benchmarking code * Better warning message for Vis and VisList (#135) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * adding warning message for Vis intents being > 1 * passes tests and intent < 3 * minor change to error message, added test * run black * accounted for more edge cases and hid traceback * fixed typo * added tests * format w/ black * ran black again * Update Vis.py minor readability changes Co-authored-by: Doris Lee * Pandas rewrite Performance optimizations (#136) * basic scatterplot experiments * experiment results with manually binned heatmaps * experiment result * incorporated heatmap code into executor and renderer * additional experiments to evaluate scatter v.s. heatmap performance * experiment based on real estate and airbnb data * modified general sampling criteria, suppress SettingWithCopyWarning stemming from groupby .agg (#93) * decrease sampling parameter * change sampling strategy (above threshold keep 3/4 of data) * remove experiment dir * modified performance param * enforce lux-widget minimum version * update requirement.txt * testing out modin (Recursion error) * create modin executor, all else in sync with master changes * rewrote .loc with column reference, speed up by 100x * replace agg("count") with .count() --> ~0.1ms speedup * run black * Added 2D Binning functionality to SQL Executor added 2D binning to replace scatterplots when using SQL executor. * Update README.md update slack link * Updated temporal detection and tests (#139) * Updated temporal detection and tests * Reformatted code with black * Update PandasExecutor.py * added stock date test Co-authored-by: Doris Lee * Fix Inline comments breaking to new lines (#137) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line Co-authored-by: Doris Lee * Improve warning message when values specified as attributes (#143) * Improve warning message when values specified as attributes (#142) * added test, ran black * bugfix test * Better warning message for Vis and VisList (#146) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * adding warning message for Vis intents being > 1 * passes tests and intent < 3 * minor change to error message, added test * run black * accounted for more edge cases and hid traceback * fixed typo * added tests * format w/ black * ran black again * Update Vis.py minor readability changes * added check and tests for Vis list and | syntax * ran black Co-authored-by: Doris Lee * Updated docs for JupyterLab (#148) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * updated install and faq * added install_lab.sh script * Global shared variable in test (#144) (#149) * using global shared variable in test (lux-org#144) * modified fixture scope as session, resolved dependency test cases * run black Co-authored-by: Doris Lee * Updated 2D Binning Functionality Interestingness function now compatible with 2D binning with SQL Executor. Fixed issue where some datapoints in heatmap data were of string type instead of integer. * documentation for custom actions (#134) * documentation for custom actions * fix config tests * fix documentaion links * Add default display Config Class documentation * delete hpi * doc resolved changes Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee * Patch failing test (#134) * Patch failing test (#134) * Patch failing test (#134) * Added Heatmap generation to SQL Executor, Bug fix in PandasExecutor Added 2D Binning function to collect data for heatmaps in the SQL Executor and updated compiler test to reflect heatmap changes in SQL Executor Fixed issue within PandasExecutor where 2D binning function would not color heatmap using temporal variables * Updated Code Formatting with Black * Update Requirements to include psycopg2 * Update upload_car_data.py Updated to use newest car.csv file * Update Compiler tests to use correct test DB * Removed Benchmarking Code * Fixing Black Formatting * Add Toggle button for LuxSeries (#153) * add black * add groupby to history * add LuxSeries visualization implementation * remove groupby extending * run black * merge master into branch * run black * Reformat Warnings Labels (#151) * add format for warnings * fix small details * globally defined lux * fix formatting * Delete similarity.py * Rename similarity_old.py to similarity.py * fix commit * update master with changes to warning labels * fix formatting Co-authored-by: Caitlyn Chen * Make plot_config a global variable (#152) * plot_config made global config * docs and tests updates; black formatting * Delete Untitled.ipynb * cleanup * remove data encapsulation * black formatting * Update style.rst Co-authored-by: Doris Lee * Patch documentation (#134) * code cleanup * making LuxDataFrame pickle-able * various simplification and rewrite to Compiler and Vis * changing `render_VSpec` to `to_code` * bugfix in filter (list comprehension lazily evaluated) * Moved Executor Parameters to Global Config * Black formatting * Fix issue with read_json and add tests for different file types (#156) * fix read_json bug and add tests * convert tests to reference lux-datasets * run black * remove comments * Update __init__.py * Update test_pandas_coverage.py new_df --> df * fix tests to work with lux-datasets * fix init * remove lxml dependency * remove html test Co-authored-by: Doris Lee * Improved warning message stack trace for unexpected error (#145) * Moved table_name parameter to frame.py. Removed executor_type parameter executor_type parameter no longer necessary to maintain * Fixed reference to table_name parameter table_name is now a parameter within frame.py * bug fix patching #146 (#159) * remove sys.tracebacklimit to avoid `ERROR:root:Internal Python error in the inspect module` * changed SyntaxError to TypeError to reserved SyntaxError for Pandas native syntax problems * Ensure that recommendation and current_vis properties are accessible before df first printed (#159) * bugfix erased `name` dtype in LuxSeries (#140) * Adjusted Functions to Set SQL Connection Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe * Update Makefile and pyproject.toml (#160) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line * update pyproject.toml and makefile * updating contributing doc * Update CONTRIBUTING.md Co-authored-by: Doris Lee * Update README.md * Update SQLExecutor name parameter * Merging master branch with sql engine. Moving executor parameters to config * Parameter Bug Fix Lux DataFrame current_vis should be an empty list after recs are expired. Executor name for small_df and very_small_df checks should be "PandasExecutor" * Reference Fix in Warning Executor check here should look for name "PandasExecutor" * Black Formatting * Black formatting * bump version number and requirements * PATCH v0.2.1.1 * remove __future__ annotations dependency * remove channel inheritance in Enhance * bugfix 3-clause Vis example in 5-datetime.ipynb * update README * Fix Executor Reference Update current_vis() to reference lux.config.executor * Update frame.py * Moved set functions to global config * Cleaned up executor imports, Fixed issue in AltairRenderer Issue where Altair Renderer was not using lux.config.executor * Black formatting * Moved Executor Parameters to Global Config (#157) * Moved Executor Parameters to Global Config * Black formatting * Moved table_name parameter to frame.py. Removed executor_type parameter executor_type parameter no longer necessary to maintain * Fixed reference to table_name parameter table_name is now a parameter within frame.py * Adjusted Functions to Set SQL Connection Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe * Update SQLExecutor name parameter * Fix Executor Reference Update current_vis() to reference lux.config.executor * Update frame.py * Moved set functions to global config Co-authored-by: 19thyneb Co-authored-by: Doris Lee * save_as_html feature (#170) * note this requires unpkg depedency from latest npm release (for now download https://github.com/lux-org/lux-widget/blob/master/luxwidget/nbextension/static/index.js, rename it luxwidget.js and place it in the same directory as the exported HTML) * Updating documentation for lux-widget v0.1.2 release (#176) * added functionality to delete Vis * fixed deletion logic * add observer to automatically update deletions * able to refresh widget on setting intent * support for setting intent from frontend * quick fix to output * changed variable intentindex name * added better error msg for > 1 intent for vis * reverting some changes * Updated readme * updated readthedoc documentation * added config to default display documentation * Update README.md Co-authored-by: Doris Lee * Fix bug caused by `groupby.agg` on column with many unique values (#174) * update export tutorial to add explanation for standalone argument * minor fixes and remove cell output in notebooks * added contributing doc * fix bugs and uncomment some tests * remove raise warning * remove unnecessary import * split up rename test into two parts * fix setting warning, fix data_type bugs and add relevant tests * remove ordinal data type * add test for small dataframe resetting index * add loc and iloc tests * fix attribute access directly to dataframe * add small changes to code * added test for qcut and cut * add check if dtype is Interval * added qcut test * fix Record KeyError * add tests * take care of reset_index case * small edits * add data_model to column_group Clause * small edits for row_group * fixes to row group Co-authored-by: Doris Lee * Update save_as_html docs (#170) * Update README *remove jupyter lab url * Update README.md fix slack link * Fixed Index Issue in Pandas Executor Issue caused when user sets an index. The Pandas Executor was not correctly renaming this new index column to Record in execute_aggregate() * Making Lux more robust with missing values and NaN (#179) (#180) * improve datetime warning message with starter templates * Handling NaN value errors * skipping validator check for NaN filter values * adding special case for PandasExecutor to map filter NaN to isna() * fixing unevenness metric when bar values are NaN * eliminate 1-cardinality filters in Filter action (since equal to overall) * fixed deviation array unequal bug when NaN * Handling NaN filter and data type * fixed data type detection when int coerced to float when containing NaN * added test for applying NaN filter * Ensure that LuxSeries displayed when there is NaN * ensure that NaNs are not dropped in groupbys * exclude NaN values in deviation calculation * fix unnamed series issue * improved debugging message for LuxSeries * Override pd.Series with LuxSeries * Fixes for type checking and line charts with NaNs * exclude NaN for line charts to prevent large axes offsetting * improved type checking for float no-longer NaN columns * fixed and improved deviation calculation test * added float categorical test * bump version and requirement * Added tests for set_index functions * Black formatting * Update Pandas Executor to handle NA values Readded missing dropna parameter within execute_aggregate() groupby function call * Update to Config, and Compiler/Interestingness Tests Removed duplicate set_SQL_connection function from config. Updated tests to reflect these changes. Merged in recent changes in master branch. * Black formatting * Update Requirements.txt Updated to include numpy version requirement * Update SQL Executor Documentation Updated documentation in SQLExecutor.py. Updated example notebook for SQLExecutor * Updated SQLExecutor Example Notebook Updated example notebook for the SQL Executor and the script to upload the example dataset to a local Postgres database. * Black Formatting Co-authored-by: 19thyneb Co-authored-by: Doris Lee Co-authored-by: cjachekang <47467363+cjachekang@users.noreply.github.com> Co-authored-by: Caitlyn Chen Co-authored-by: Caitlyn Chen Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: jinimukh <46768380+jinimukh@users.noreply.github.com> Co-authored-by: Kunal Agarwal Co-authored-by: Jared Zhao Co-authored-by: Piyush Gupta * Update to SQL Executor Tests Adjusted tests to reference correct PostgreSQL table * Update Travis file and SQL Executor Tests Updated the travis.yml and sql executor test scripts to refer to the correct postgres database * Update .travis.yml * fixed merge conflict issues. vis.data shows None DF. * Merge master into sql-engine + minor mergeconflict fixes * Removing the PYNB * Cleaning up obsolete code * Merging the master branch changes into sql-engine (#208) * Similarity as a default action (#182) * similarity formatting fixed * added another similarity test case; fixed bug where colored heatmap dimension is temporal (invalidate all 2 msr 1 temporal case) * filter and similarity together * filter and similarity together * remove filter * black line length * file reorg and clean; change sim metric Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee * bump numpy min version for travis * Special character issue (#184) * rename col * broken * fixed period replacement bug * add tests * refine tests * refine tests * remove cols * fix tests * add agg * fixed tests * clean up PR Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee * Colored bar interestingness bug (#189) * rewrote chi2 contingency with pd.crosstab * catching KeyError issue with chi2 contingency * padding interestingness with warning instead of error * interestingness now reuses ndim and nmsr computed in Compiler * bug fix for parser with int values * improve Vis repr to better display inferred intent when data is absent but fully compiled intent (all clauses) * Add sampling parameters as a global config (#192) * update export tutorial to add explanation for standalone argument * minor fixes and remove cell output in notebooks * added contributing doc * fix bugs and uncomment some tests * remove raise warning * remove unnecessary import * split up rename test into two parts * fix setting warning, fix data_type bugs and add relevant tests * remove ordinal data type * add test for small dataframe resetting index * add loc and iloc tests * fix attribute access directly to dataframe * add small changes to code * added test for qcut and cut * add check if dtype is Interval * added qcut test * fix Record KeyError * add tests * take care of reset_index case * small edits * add data_model to column_group Clause * small edits for row_group * fixes to row group * add config for start and cap for samples * finish sampling config and tests * black formatting * add documentation for sampling config * remove small added issues * minor changes to docs * implement heatmap flag and add tests * black formatting and documentation edits Co-authored-by: Doris Lee * Coalesce all data_type attributes of frame into one (#185) * coalesce data_types into data_type_lookup * black reformat * changed to better variable names * lux not defined error * fixed * black format * Update CONTRIBUTING.md * Bug Fix: User-provided Index causes KeyError in Pandas Execution (#191) * Moved Executor Parameters to Global Config * Black formatting * Moved table_name parameter to frame.py. Removed executor_type parameter executor_type parameter no longer necessary to maintain * Fixed reference to table_name parameter table_name is now a parameter within frame.py * Adjusted Functions to Set SQL Connection Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe * Update SQLExecutor name parameter * Fix Executor Reference Update current_vis() to reference lux.config.executor * Update frame.py * Moved set functions to global config * Fixed Index Issue in Pandas Executor Issue caused when user sets an index. The Pandas Executor was not correctly renaming this new index column to Record in execute_aggregate() * Added tests for set_index functions * Black formatting * Update Pandas Executor to handle NA values Readded missing dropna parameter within execute_aggregate() groupby function call * Updated Pandas Coverage Tests Commented out set_index case which has not been addressed yet * Black Formatting * Update to Pandas Executor Index Handling Cleaned up how execute_aggregrate renames index columns. Now retrieves the index name from vis.data instead of filtering out non-index columns. Created separate test function for when user specifies an index in read_csv. Co-authored-by: 19thyneb Co-authored-by: Doris Lee * Initialize Config once only during __init__ (#194) * basic matplotlib chart example * migrate register default action to init * config class * move actions * fixed tests * changes * alright * fix plot_config * black reformat * black reformat Co-authored-by: Doris Lee Co-authored-by: Caitlyn Chen Co-authored-by: Ujjaini Mukhopadhyay * Update README.md * Series Bugfix for describe and convert_dtypes (#197) * bugfix for describe and convert_dtypes * added back metadata series test * black * default to pandas display when df.dtypes printed * Update Lux Docs (#195) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line * update pyproject.toml and makefile * coalesce data_types into data_type_lookup * black reformat * changed to better variable names * lux not defined error * fixed * black format * config doc updated * fix link for executor * more links * fixed overview * more links fixed * pandas methods no longer included * updates to some docstrings * black reformat * minor fixes * minor fix Co-authored-by: Doris Lee * Supporting dataframe with integer columns (#203) * bugfix for describe and convert_dtypes * added back metadata series test * black * default to pandas display when df.dtypes printed * various fixes to support int columns * fixed merge conflict issues. vis.data shows None DF. * Merge master into sql-engine + minor mergeconflict fixes * Removing the PYNB * Cleaning up obsolete code Co-authored-by: Caitlyn Chen Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: jinimukh <46768380+jinimukh@users.noreply.github.com> Co-authored-by: thyneb19 Co-authored-by: 19thyneb Co-authored-by: Ujjaini Mukhopadhyay * Updating sql-engine after merge with the travis build fix (#213) * Similarity as a default action (#182) * similarity formatting fixed * added another similarity test case; fixed bug where colored heatmap dimension is temporal (invalidate all 2 msr 1 temporal case) * filter and similarity together * filter and similarity together * remove filter * black line length * file reorg and clean; change sim metric Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee * bump numpy min version for travis * Special character issue (#184) * rename col * broken * fixed period replacement bug * add tests * refine tests * refine tests * remove cols * fix tests * add agg * fixed tests * clean up PR Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee * Colored bar interestingness bug (#189) * rewrote chi2 contingency with pd.crosstab * catching KeyError issue with chi2 contingency * padding interestingness with warning instead of error * interestingness now reuses ndim and nmsr computed in Compiler * bug fix for parser with int values * improve Vis repr to better display inferred intent when data is absent but fully compiled intent (all clauses) * Add sampling parameters as a global config (#192) * update export tutorial to add explanation for standalone argument * minor fixes and remove cell output in notebooks * added contributing doc * fix bugs and uncomment some tests * remove raise warning * remove unnecessary import * split up rename test into two parts * fix setting warning, fix data_type bugs and add relevant tests * remove ordinal data type * add test for small dataframe resetting index * add loc and iloc tests * fix attribute access directly to dataframe * add small changes to code * added test for qcut and cut * add check if dtype is Interval * added qcut test * fix Record KeyError * add tests * take care of reset_index case * small edits * add data_model to column_group Clause * small edits for row_group * fixes to row group * add config for start and cap for samples * finish sampling config and tests * black formatting * add documentation for sampling config * remove small added issues * minor changes to docs * implement heatmap flag and add tests * black formatting and documentation edits Co-authored-by: Doris Lee * Coalesce all data_type attributes of frame into one (#185) * coalesce data_types into data_type_lookup * black reformat * changed to better variable names * lux not defined error * fixed * black format * Update CONTRIBUTING.md * Bug Fix: User-provided Index causes KeyError in Pandas Execution (#191) * Moved Executor Parameters to Global Config * Black formatting * Moved table_name parameter to frame.py. Removed executor_type parameter executor_type parameter no longer necessary to maintain * Fixed reference to table_name parameter table_name is now a parameter within frame.py * Adjusted Functions to Set SQL Connection Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe * Update SQLExecutor name parameter * Fix Executor Reference Update current_vis() to reference lux.config.executor * Update frame.py * Moved set functions to global config * Fixed Index Issue in Pandas Executor Issue caused when user sets an index. The Pandas Executor was not correctly renaming this new index column to Record in execute_aggregate() * Added tests for set_index functions * Black formatting * Update Pandas Executor to handle NA values Readded missing dropna parameter within execute_aggregate() groupby function call * Updated Pandas Coverage Tests Commented out set_index case which has not been addressed yet * Black Formatting * Update to Pandas Executor Index Handling Cleaned up how execute_aggregrate renames index columns. Now retrieves the index name from vis.data instead of filtering out non-index columns. Created separate test function for when user specifies an index in read_csv. Co-authored-by: 19thyneb Co-authored-by: Doris Lee * Initialize Config once only during __init__ (#194) * basic matplotlib chart example * migrate register default action to init * config class * move actions * fixed tests * changes * alright * fix plot_config * black reformat * black reformat Co-authored-by: Doris Lee Co-authored-by: Caitlyn Chen Co-authored-by: Ujjaini Mukhopadhyay * Update README.md * Series Bugfix for describe and convert_dtypes (#197) * bugfix for describe and convert_dtypes * added back metadata series test * black * default to pandas display when df.dtypes printed * Update Lux Docs (#195) * add black to travis * reformat all code and adjust test * remove .idea * fix contributing doc * small change in contributing * update * reformat, update command to fix version * remove dev dependencies * first pass -- inline comments * _config/config.py * delete test notebook * action * line length 105 * executor * interestingness * processor * vislib * tests, travis, CONTRIBUTING * .format () changed * replace tabs with escape chars * update using black * more rewrites and merges into single line * update pyproject.toml and makefile * coalesce data_types into data_type_lookup * black reformat * changed to better variable names * lux not defined error * fixed * black format * config doc updated * fix link for executor * more links * fixed overview * more links fixed * pandas methods no longer included * updates to some docstrings * black reformat * minor fixes * minor fix Co-authored-by: Doris Lee * Supporting dataframe with integer columns (#203) * bugfix for describe and convert_dtypes * added back metadata series test * black * default to pandas display when df.dtypes printed * various fixes to support int columns * fixed merge conflict issues. vis.data shows None DF. * Override Pandas DataFrames created from I/O pandas operations (#207) * update export tutorial to add explanation for standalone argument * minor fixes and remove cell output in notebooks * added contributing doc * fix bugs and uncomment some tests * remove raise warning * remove unnecessary import * split up rename test into two parts * fix setting warning, fix data_type bugs and add relevant tests * remove ordinal data type * add test for small dataframe resetting index * add loc and iloc tests * fix attribute access directly to dataframe * add small changes to code * added test for qcut and cut * add check if dtype is Interval * added qcut test * fix Record KeyError * add tests * take care of reset_index case * small edits * add data_model to column_group Clause * small edits for row_group * fixes to row group * add config for start and cap for samples * finish sampling config and tests * black formatting * add documentation for sampling config * remove small added issues * minor changes to docs * implement heatmap flag and add tests * black formatting and documentation edits * add pd.io equalities for DataFrames Co-authored-by: Doris Lee * Merge master into sql-engine + minor mergeconflict fixes * Removing the PYNB * Cleaning up obsolete code * Configuration for topk and sort order (#206) * bugfix for describe and convert_dtypes * added back metadata series test * black * default to pandas display when df.dtypes printed * various fixes to support int columns * skip series vis for df.iterrows series element * config setting for modifying top K and sorting * note about regenerated config * Version lock for jupyter-client (#211) * move to single requirements-dev without lux-widget install manually * pin jedi version * pin jupyter-client version * add back old travis and requirement-dev * Mixed dtype issue (#205) * coalesce data_types into data_type_lookup * merge fixed * merge conflicts * add warning and suggestion on how to fix * formatting for warnings version * change to internal data * legibility update * test added * update test * test updated * xlrd in dev reqs * black * update link * changes to test logic, minor string format for warning Co-authored-by: Doris Lee * Fixes issue where value_counts was not returning LuxSeries (#210) * add series equality and value counts test * black formatting * fix old value counts test instead * minor fix Co-authored-by: Doris Lee * bump version * update README Co-authored-by: Caitlyn Chen Co-authored-by: Caitlyn Chen Co-authored-by: Doris Lee Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: jinimukh <46768380+jinimukh@users.noreply.github.com> Co-authored-by: thyneb19 Co-authored-by: 19thyneb Co-authored-by: Ujjaini Mukhopadhyay * Merged in SQL-Engine changes, Cleaned up method to connect Lux to SQL database * Fixed SQLExecutor's Variable Handling Issue where if Postgres database table did not have columns that were all lower case, SQLExecutor queries would fail. Updated the executor's handling to deal with non-lower cased variable names. * Black Formatting * Updated data_type reference in SQLExecutor * Update Datetime Numeric Check Changed to handle other pandas and numpy integer types such as int64. * Update travis file to generate Postgres Test DB * Update test_vis.py fixed test that was missing df initialization * Improved SQLExecutor Warning Handling, Bugfix with 2D Binning * Adjustment to AltairRenderer Now only does heatmap conversion here for the PandasExecutor since colored heatmap not yet implemented in SQLExecutor * Black Formatting * Added Better Null Value Handling to SQLExecutor SQLExecutor binning function was breaking if there was any Null value in the data it was trying to query. Adjusted querying to always filter out Null values from the data. * SQLExecutor execute_binning Fixes Fixed issue where width_bucket query was not working with variables of integer type. Resolved by casting the variable to float. Fixed issue in creating bucket edges for histograms. Resolved by using min_max parameter of the dataframe rather than the min and max of a variable's unique values. * Update SQLExecutor Tests Updated Tests to follow changes to SQLExecutor filtering * Black Formatting * Removing Test Print Statement * Optimizing SQLExecutor 2D Binning Changed SQLExecutor 2D binning to collect all necessary data via a single query versus one query for each x-axis bin. Updated SQLExecutor tests to reflect updated filter handling * Black Formatting * Added Null Value Filtering to SQLExecutor There are cases where null values in a query break executor binning. Added Null value filtering to SQL queries to remove any null values from aggregated data * Fixed Handling of String Filter Values Issue caused by string values that contained apostrophes. Updated filter handling to process these values properly * Added Better Handling for Lazy Execution Previously, when a vis had no mark assigned the SQLExecutor would use refresh source to populate the vis' data. However, this causes an issue when the Lux compiler is trying to create colored barcharts in recommendation actions. Issue is resolved by approaching the problem similarly to the PandasExecutor. Take a sample of the original dataset and set that as the Vis' data. The vis will then have a mark assigned and execution will continue normally. * Updated Heatmap Threshold * Updated SQLExecutor to Not Include Null Values in Metadata Previously if a database table contained rows with null values, the SQLExecutor would include these in the cardinality and unique values lists. These inclusions would cause issues where the null value was included in barcharts and the interestingness calculations for these charts would break. Added addition test to SQLExecutor test suite to ensure that the executor was not including null values in its unique_values list. Included with this was an additional script to upload the aug_test dataset which was used for this test. * Black formatting * Fixed Issue with SQLExecutor and Custom Actions Issue with the check in custom_actions function. Did not have a case to check if actions were applicable for the SQLExecutor * Created LuxSQLTable Object Created the LuxSQLTable object to differentiate between Lux' pandas and SQL functionality. Users can create a LuxSQLTable object and specify which table they would like to connect the object to within the constructor. All of Lux' recommendation functionality is still available, but users will not be able to perform pandas operations on the object as no data is stored locally. * Added query parameter to Vis objects To make Lux' SQLExecutor more transparent, added a query parameter to Vis objects so that users can see what query was used to gather the data for that visualization. Updated syntax in SQLExecutor tests to reflect the LuxSQLTable Changes * Some minor datatype detection changes to SQLExecutor * Revert "Added query parameter to Vis objects" This reverts commit fa917fb01edad65dbd43993b4a15466b1173e81a. * Update python-app.yml to set up Postgres in test instance Added the postgres service and scripts to populate the database with tables used in testing the SQLExecutor. * Removed Example Notebooks for SQLExecutor These will be moved to the lux-binder-sql instance. * Update to Script Uploading Car dataset to Postgres Changed to use sqlalchemy * Update python-app.yml Updated yml file to properly set up Postgres database for the test instance. * Update Lux SQLTable Frontend Added disclaimer for the data preview * Delete sql_benchmarking.csv * Update CONTRIBUTING.md * Update CONTRIBUTING.md * cleaning up PR * fix flights data upload * Some changes for length calculation * Test Commit of SQLTable * Cleaned Up Test Suite Removed unnecessary print statement and redundant initialization of test dataframe * Some changes to datatype, test SQL Executor, count queries to count(1) for SQL Executor * removing travis.tml and date_utils import from sqlexecutor * Clean up LuxSQLTable Removed unnecessary parameter initialization in the LuxSQLTable init function as they are being set in the super constructor. * Update test_vis.py Issue when running test_vis in Github Actions. Adjusted test_scatter_chart and test_colored_scatter_chart * Black Formatting * Updated LuxSQLTable notification - Text -> [Toggle Table/Lux] - Caption's appearance with preview table only - Caption has table name * Remove Out of Date LuxSQLTableNotice * Black Reformatting * Remove redundantly added parameters "name" parameter was being added again unnecessarily in lux series and tests for series * Update config error handling and LuxSQLTable description Lux config now shoots an error if a user specifies a non-existent executor type * Clean up Vis.py Add greater connection visibility in LuxSQLTable Removed unnecessary hyperparameter transferring in the Vis object. Update LuxSQLTable to display have more verbose display notification. * Update executor.rst * Refactor length parameter to _length * Update test_interestingness.py Update test_interestingness to test for deviation_from_overall changes when using the SQLExecutor * Added _length Parameter to LuxSQLTable The _length parameter keeps track of the number of datapoints within the table/view that a LuxSQLTable is connected to. Added a len() function to make this parameter accessible. Unable to override the __len__() function as doing so breaks an assertion within the parent DataFrame class Updated interestingness.py to use the new len() function * Black Reformatting, Reverting_length change in LuxDataFrame Adding back in the _length parameter to the LuxDataFrame * Update LuxSQLTable __len__() and metadata computation Rather than referencing the _length parameter throughout the code, update and use the LuxSQLTable len() function. Added _setup_done parameter to the LuxSQLTable. This will check if the initial setup of the table, retrieving and populating attributes, is completed. This will inform which len() function to use, as the parent len() is required while populating the columns of the LuxSQLTable. * Removed unnecessary __repr__() function * Updated LuxSQLTable repr Rename _repr_html_() to _ipython_display_() * Revert "Updated LuxSQLTable repr" This reverts commit e350ab4eea10ec0e59ae3c4d28c205286140be2d. * Revert "Revert "Updated LuxSQLTable repr"" This reverts commit 5d1a2f4fa72384faca2efa0e21c988fb9703c415. * Revert "Update LuxSQLTable __len__() and metadata computation" This reverts commit 7c7dcd373b7e98bf25c27c59c2fe60134a1f8a19. * Revert "Revert "Update LuxSQLTable __len__() and metadata computation"" This reverts commit b5998c710b273aa0781859ff9ddfd61c8c4d24c9. * Cleaned up datatype and SQLExecutor checks Updated _is_datetime_number() in the PandasExecutor to use the is_integer_dtype() function to check if a series is of int dtype. Cleaned up SQLExecutor checks in frame.py * Update LuxSQLTable __len__() and metadata computation"" (#331) * Revert "Revert "Update LuxSQLTable __len__() and metadata computation"" This reverts commit b5998c710b273aa0781859ff9ddfd61c8c4d24c9. * Cleaned up datatype and SQLExecutor checks Updated _is_datetime_number() in the PandasExecutor to use the is_integer_dtype() function to check if a series is of int dtype. Cleaned up SQLExecutor checks in frame.py * Black Reformatting * minor changes to requirements and cleanup * Removed psycopg2 from Lux requirements Aiming to simplify the initial Lux installation. Will include a notice in the SQL documentation letting users know that they will have to install the library themselves if the want to use the LuxSQLTable functionality. * Revert "Merge remote-tracking branch 'upstream/master' into Database-Executor" This reverts commit 68c774738668ca102bfc7f6f7364610bfeab7237, reversing changes made to 801f3cd6c6a06b21682d3d708d3ee75f081e3ddc. * add back merged overridden changes * merge conflict fixed * Separate Pandas and SQL test suite Separated and moved the SQLExecutor tests to the tests_sql folder. Updated the make file to include a command to test both the pandas and SQL functionalities. Updated the github workflow to also run the tests_sql folder * Separate Pandas and SQL test suite (#354) Separated and moved the SQLExecutor tests to the tests_sql folder. Updated the make file to include a command to test both the pandas and SQL functionalities. Updated the github workflow to also run the tests_sql folder * Black reformatting * Update python-app.yml * Fixing issues in test suite Need to set the executor type to "Pandas" at the start of the PandasExecutor test suite. Fixed column name references in the SQLExecutor tests * Fixing Lux SQL test suite Co-authored-by: 19thyneb Co-authored-by: Doris Lee Co-authored-by: cjachekang <47467363+cjachekang@users.noreply.github.com> Co-authored-by: Caitlyn Chen Co-authored-by: Caitlyn Chen Co-authored-by: Kunal Agarwal <32151899+westernguy2@users.noreply.github.com> Co-authored-by: jinimukh <46768380+jinimukh@users.noreply.github.com> Co-authored-by: Kunal Agarwal Co-authored-by: Jared Zhao Co-authored-by: Piyush Gupta Co-authored-by: Dheeraj Khandelwal Co-authored-by: dj-khandelwal <54646793+dj-khandelwal@users.noreply.github.com> Co-authored-by: Ujjaini Mukhopadhyay Co-authored-by: Sophia Huang <6860749+sophiahhuang@users.noreply.github.com> --- .github/workflows/python-app.yml | 2 +- Makefile | 5 +- lux/core/sqltable.py | 1 - tests/test_action.py | 1 + tests/test_compiler.py | 269 ++-------------- tests/test_interestingness.py | 51 ---- tests/test_sql_executor.py | 247 --------------- tests_sql/__init__.py | 13 + tests_sql/conftest.py | 33 ++ tests_sql/context.py | 23 ++ tests_sql/test_sql_compiler.py | 421 ++++++++++++++++++++++++++ tests_sql/test_sql_executor.py | 223 ++++++++++++++ tests_sql/test_sql_interestingness.py | 71 +++++ 13 files changed, 814 insertions(+), 546 deletions(-) delete mode 100644 tests/test_sql_executor.py create mode 100644 tests_sql/__init__.py create mode 100644 tests_sql/conftest.py create mode 100644 tests_sql/context.py create mode 100644 tests_sql/test_sql_compiler.py create mode 100644 tests_sql/test_sql_executor.py create mode 100644 tests_sql/test_sql_interestingness.py diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 668a7bc3..26fcb844 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -54,5 +54,5 @@ jobs: black --target-version py37 --line-length 105 --check . - name: Test with Pytest and Code Coverage Report run: | - pytest --cov-report term --cov=lux tests/ + pytest --cov-report term --cov=lux tests/ tests_sql/ bash <(curl -s https://codecov.io/bash) diff --git a/Makefile b/Makefile index 6d51d1b8..3d264512 100644 --- a/Makefile +++ b/Makefile @@ -3,5 +3,8 @@ init: test: black --check . python -m pytest tests/ - +test_all: + black --check . + python -m pytest tests/ + python -m pytest tests_sql/ .PHONY: init test \ No newline at end of file diff --git a/lux/core/sqltable.py b/lux/core/sqltable.py index de426f70..5535dc14 100644 --- a/lux/core/sqltable.py +++ b/lux/core/sqltable.py @@ -86,7 +86,6 @@ def set_SQL_table(self, t_name): ) else: self.table_name = t_name - import psycopg2 try: lux.config.executor.compute_dataset_metadata(self) diff --git a/tests/test_action.py b/tests/test_action.py index 5e22938c..893420b6 100644 --- a/tests/test_action.py +++ b/tests/test_action.py @@ -20,6 +20,7 @@ def test_vary_filter_val(global_var): + lux.config.set_executor_type("Pandas") df = pytest.olympic vis = Vis(["Height", "SportType=Ball"], df) df.set_intent_as_vis(vis) diff --git a/tests/test_compiler.py b/tests/test_compiler.py index d425e41d..d9e377ea 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -32,22 +32,8 @@ def test_underspecified_no_vis(global_var, test_recs): assert len(df.current_vis) == 0 df.clear_intent() - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - - test_recs(sql_df, no_vis_actions) - assert len(sql_df.current_vis) == 0 - - # test only one filter context case. - sql_df.set_intent([lux.Clause(attribute="origin", filter_op="=", value="USA")]) - test_recs(sql_df, no_vis_actions) - assert len(sql_df.current_vis) == 0 - def test_underspecified_single_vis(global_var, test_recs): - lux.config.set_executor_type("Pandas") one_vis_actions = ["Enhance", "Filter", "Generalize"] df = pytest.car_df df.set_intent([lux.Clause(attribute="MilesPerGal"), lux.Clause(attribute="Weight")]) @@ -60,18 +46,6 @@ def test_underspecified_single_vis(global_var, test_recs): assert attr.data_type == "quantitative" df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - sql_df.set_intent([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")]) - test_recs(sql_df, one_vis_actions) - assert len(sql_df.current_vis) == 1 - assert sql_df.current_vis[0].mark == "scatter" - for attr in sql_df.current_vis[0]._inferred_intent: - assert attr.data_model == "measure" - for attr in sql_df.current_vis[0]._inferred_intent: - assert attr.data_type == "quantitative" - # def test_underspecified_vis_collection(test_recs): # multiple_vis_actions = ["Current viss"] @@ -115,15 +89,6 @@ def test_set_intent_as_vis(global_var, test_recs): df._ipython_display_() test_recs(df, ["Enhance", "Filter", "Generalize"]) - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - sql_df._repr_html_() - vis = sql_df.recommendation["Correlation"][0] - sql_df.intent = vis - sql_df._repr_html_() - test_recs(sql_df, ["Enhance", "Filter", "Generalize"]) - @pytest.fixture def test_recs(): @@ -150,18 +115,6 @@ def test_parse(global_var): vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3 - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vlst = VisList([lux.Clause("origin=?"), lux.Clause(attribute="milespergal")], sql_df) - assert len(vlst) == 3 - - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vlst = VisList([lux.Clause("origin=?"), lux.Clause("milespergal")], sql_df) - assert len(vlst) == 3 - def test_underspecified_vis_collection_zval(global_var): lux.config.set_executor_type("Pandas") @@ -181,18 +134,6 @@ def test_underspecified_vis_collection_zval(global_var): # vlst = VisList([lux.Clause(attribute = ["Origin","Cylinders"], filter_op="=",value="?"),lux.Clause(attribute = ["Horsepower"]),lux.Clause(attribute = "Weight")],df) # assert len(vlst) == 8 - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vlst = VisList( - [ - lux.Clause(attribute="origin", filter_op="=", value="?"), - lux.Clause(attribute="milespergal"), - ], - sql_df, - ) - assert len(vlst) == 3 - def test_sort_bar(global_var): from lux.processor.Compiler import Compiler @@ -221,32 +162,6 @@ def test_sort_bar(global_var): assert vis.mark == "bar" assert vis._inferred_intent[1].sort == "ascending" - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis( - [ - lux.Clause(attribute="acceleration", data_model="measure", data_type="quantitative"), - lux.Clause(attribute="origin", data_model="dimension", data_type="nominal"), - ], - sql_df, - ) - assert vis.mark == "bar" - assert vis._inferred_intent[1].sort == "" - - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis( - [ - lux.Clause(attribute="acceleration", data_model="measure", data_type="quantitative"), - lux.Clause(attribute="name", data_model="dimension", data_type="nominal"), - ], - sql_df, - ) - assert vis.mark == "bar" - assert vis._inferred_intent[1].sort == "ascending" - def test_specified_vis_collection(global_var): lux.config.set_executor_type("Pandas") @@ -335,16 +250,6 @@ def test_autoencoding_scatter(global_var): ) df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - visList = VisList( - [lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x")], - sql_df, - ) - for vis in visList: - check_attribute_on_channel(vis, "milespergal", "x") - def test_autoencoding_scatter(): lux.config.set_executor_type("Pandas") @@ -388,42 +293,46 @@ def test_autoencoding_scatter(): ] ) - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")], sql_df) - check_attribute_on_channel(vis, "milespergal", "x") - check_attribute_on_channel(vis, "weight", "y") + +def test_autoencoding_scatter(): + lux.config.set_executor_type("Pandas") + # No channel specified + df = pd.read_csv("lux/data/car.csv") + df["Year"] = pd.to_datetime( + df["Year"], format="%Y" + ) # change pandas dtype for the column "Year" to datetype + vis = Vis([lux.Clause(attribute="MilesPerGal"), lux.Clause(attribute="Weight")], df) + check_attribute_on_channel(vis, "MilesPerGal", "x") + check_attribute_on_channel(vis, "Weight", "y") # Partial channel specified vis = Vis( [ - lux.Clause(attribute="milespergal", channel="y"), - lux.Clause(attribute="weight"), + lux.Clause(attribute="MilesPerGal", channel="y"), + lux.Clause(attribute="Weight"), ], - sql_df, + df, ) - check_attribute_on_channel(vis, "milespergal", "y") - check_attribute_on_channel(vis, "weight", "x") + check_attribute_on_channel(vis, "MilesPerGal", "y") + check_attribute_on_channel(vis, "Weight", "x") # Full channel specified vis = Vis( [ - lux.Clause(attribute="milespergal", channel="y"), - lux.Clause(attribute="weight", channel="x"), + lux.Clause(attribute="MilesPerGal", channel="y"), + lux.Clause(attribute="Weight", channel="x"), ], - sql_df, + df, ) - check_attribute_on_channel(vis, "milespergal", "y") - check_attribute_on_channel(vis, "weight", "x") + check_attribute_on_channel(vis, "MilesPerGal", "y") + check_attribute_on_channel(vis, "Weight", "x") # Duplicate channel specified with pytest.raises(ValueError): # Should throw error because there should not be columns with the same channel specified - sql_df.set_intent( + df.set_intent( [ - lux.Clause(attribute="milespergal", channel="x"), - lux.Clause(attribute="weight", channel="x"), + lux.Clause(attribute="MilesPerGal", channel="x"), + lux.Clause(attribute="Weight", channel="x"), ] ) @@ -441,18 +350,6 @@ def test_autoencoding_histogram(global_var): assert vis.get_attr_by_channel("x")[0].attribute == "MilesPerGal" assert vis.get_attr_by_channel("y")[0].attribute == "Record" - # No channel specified - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis([lux.Clause(attribute="milespergal", channel="y")], sql_df) - check_attribute_on_channel(vis, "milespergal", "y") - - vis = Vis([lux.Clause(attribute="milespergal", channel="x")], sql_df) - assert vis.get_attr_by_channel("x")[0].attribute == "milespergal" - assert vis.get_attr_by_channel("y")[0].attribute == "Record" - def test_autoencoding_line_chart(global_var): lux.config.set_executor_type("Pandas") @@ -495,45 +392,6 @@ def test_autoencoding_line_chart(global_var): ) df.clear_intent() - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis([lux.Clause(attribute="year"), lux.Clause(attribute="acceleration")], sql_df) - check_attribute_on_channel(vis, "year", "x") - check_attribute_on_channel(vis, "acceleration", "y") - - # Partial channel specified - vis = Vis( - [ - lux.Clause(attribute="year", channel="y"), - lux.Clause(attribute="acceleration"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "year", "y") - check_attribute_on_channel(vis, "acceleration", "x") - - # Full channel specified - vis = Vis( - [ - lux.Clause(attribute="year", channel="y"), - lux.Clause(attribute="acceleration", channel="x"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "year", "y") - check_attribute_on_channel(vis, "acceleration", "x") - - with pytest.raises(ValueError): - # Should throw error because there should not be columns with the same channel specified - sql_df.set_intent( - [ - lux.Clause(attribute="year", channel="x"), - lux.Clause(attribute="acceleration", channel="x"), - ] - ) - def test_autoencoding_color_line_chart(global_var): lux.config.set_executor_type("Pandas") @@ -550,20 +408,6 @@ def test_autoencoding_color_line_chart(global_var): check_attribute_on_channel(vis, "Acceleration", "y") check_attribute_on_channel(vis, "Origin", "color") - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - intent = [ - lux.Clause(attribute="year"), - lux.Clause(attribute="acceleration"), - lux.Clause(attribute="origin"), - ] - vis = Vis(intent, sql_df) - check_attribute_on_channel(vis, "year", "x") - check_attribute_on_channel(vis, "acceleration", "y") - check_attribute_on_channel(vis, "origin", "color") - def test_autoencoding_color_scatter_chart(global_var): lux.config.set_executor_type("Pandas") @@ -590,30 +434,6 @@ def test_autoencoding_color_scatter_chart(global_var): ) check_attribute_on_channel(vis, "Acceleration", "color") - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis( - [ - lux.Clause(attribute="horsepower"), - lux.Clause(attribute="acceleration"), - lux.Clause(attribute="origin"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "origin", "color") - - vis = Vis( - [ - lux.Clause(attribute="horsepower"), - lux.Clause(attribute="acceleration", channel="color"), - lux.Clause(attribute="origin"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "acceleration", "color") - def test_populate_options(global_var): lux.config.set_executor_type("Pandas") @@ -644,33 +464,6 @@ def test_populate_options(global_var): ) df.clear_intent() - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - sql_df.set_intent([lux.Clause(attribute="?"), lux.Clause(attribute="milespergal")]) - col_set = set() - for specOptions in Compiler.populate_wildcard_options(sql_df._intent, sql_df)["attributes"]: - for clause in specOptions: - col_set.add(clause.attribute) - assert list_equal(list(col_set), list(sql_df.columns)) - - sql_df.set_intent( - [ - lux.Clause(attribute="?", data_model="measure"), - lux.Clause(attribute="milespergal"), - ] - ) - sql_df._repr_html_() - col_set = set() - for specOptions in Compiler.populate_wildcard_options(sql_df._intent, sql_df)["attributes"]: - for clause in specOptions: - col_set.add(clause.attribute) - assert list_equal( - list(col_set), - ["acceleration", "weight", "horsepower", "milespergal", "displacement"], - ) - def test_remove_all_invalid(global_var): lux.config.set_executor_type("Pandas") @@ -687,20 +480,6 @@ def test_remove_all_invalid(global_var): assert len(df.current_vis) == 0 df.clear_intent() - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - # with pytest.warns(UserWarning,match="duplicate attribute specified in the intent"): - sql_df.set_intent( - [ - lux.Clause(attribute="origin", filter_op="=", value="USA"), - lux.Clause(attribute="origin"), - ] - ) - sql_df._repr_html_() - assert len(sql_df.current_vis) == 0 - def list_equal(l1, l2): l1.sort() diff --git a/tests/test_interestingness.py b/tests/test_interestingness.py index aac79256..0d421cd0 100644 --- a/tests/test_interestingness.py +++ b/tests/test_interestingness.py @@ -74,23 +74,6 @@ def test_interestingness_1_0_1(global_var): assert df.current_vis[0].score == 0 df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - tbl.set_intent( - [ - lux.Clause(attribute="Origin", filter_op="=", value="USA"), - lux.Clause(attribute="Cylinders"), - ] - ) - tbl._repr_html_() - filter_score = tbl.recommendation["Filter"][0].score - assert tbl.current_vis[0].score == 0 - assert filter_score > 0 - tbl.clear_intent() - def test_interestingness_0_1_0(global_var): lux.config.set_executor_type("Pandas") @@ -153,22 +136,6 @@ def test_interestingness_0_1_1(global_var): assert str(df.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - tbl.set_intent( - [ - lux.Clause(attribute="Origin", filter_op="=", value="?"), - lux.Clause(attribute="MilesPerGal"), - ] - ) - tbl._repr_html_() - assert interestingness(tbl.recommendation["Current Vis"][0], tbl) != None - assert str(tbl.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" - tbl.clear_intent() - def test_interestingness_1_1_0(global_var): lux.config.set_executor_type("Pandas") @@ -240,24 +207,6 @@ def test_interestingness_1_1_1(global_var): assert interestingness(df.recommendation["Filter"][0], df) != None df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - tbl.set_intent( - [ - lux.Clause(attribute="Horsepower"), - lux.Clause(attribute="Origin", filter_op="=", value="USA", bin_size=20), - ] - ) - tbl._repr_html_() - assert interestingness(tbl.recommendation["Enhance"][0], tbl) != None - - # check for top recommended Filter graph score is not none - assert interestingness(tbl.recommendation["Filter"][0], tbl) != None - tbl.clear_intent() - def test_interestingness_1_2_0(global_var): from lux.vis.Vis import Vis diff --git a/tests/test_sql_executor.py b/tests/test_sql_executor.py deleted file mode 100644 index f8d8d907..00000000 --- a/tests/test_sql_executor.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright 2019-2020 The Lux Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 - -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .context import lux -import pytest -import pandas as pd -from lux.executor.SQLExecutor import SQLExecutor -from lux.vis.Vis import Vis -from lux.vis.VisList import VisList -import psycopg2 - - -def test_lazy_execution(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - intent = [ - lux.Clause(attribute="Horsepower", aggregation="mean"), - lux.Clause(attribute="Origin"), - ] - vis = Vis(intent) - # Check data field in vis is empty before calling executor - assert vis.data is None - SQLExecutor.execute([vis], tbl) - assert type(vis.data) == lux.core.frame.LuxDataFrame - - -def test_selection(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - intent = [ - lux.Clause(attribute=["Horsepower", "Weight", "Acceleration"]), - lux.Clause(attribute="Year"), - ] - vislist = VisList(intent, tbl) - assert all([type(vis.data) == lux.core.frame.LuxDataFrame for vis in vislist]) - assert all(vislist[2].data.columns == ["Year", "Acceleration"]) - - -def test_aggregation(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - intent = [ - lux.Clause(attribute="Horsepower", aggregation="mean"), - lux.Clause(attribute="Origin"), - ] - vis = Vis(intent, tbl) - result_df = vis.data - assert int(result_df[result_df["Origin"] == "USA"]["Horsepower"]) == 119 - - intent = [ - lux.Clause(attribute="Horsepower", aggregation="sum"), - lux.Clause(attribute="Origin"), - ] - vis = Vis(intent, tbl) - result_df = vis.data - assert int(result_df[result_df["Origin"] == "Japan"]["Horsepower"]) == 6307 - - intent = [ - lux.Clause(attribute="Horsepower", aggregation="max"), - lux.Clause(attribute="Origin"), - ] - vis = Vis(intent, tbl) - result_df = vis.data - assert int(result_df[result_df["Origin"] == "Europe"]["Horsepower"]) == 133 - - -def test_colored_bar_chart(): - from lux.vis.Vis import Vis - from lux.vis.Vis import Clause - - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - x_clause = Clause(attribute="MilesPerGal", channel="x") - y_clause = Clause(attribute="Origin", channel="y") - color_clause = Clause(attribute="Cylinders", channel="color") - - new_vis = Vis([x_clause, y_clause, color_clause], tbl) - # make sure dimention of the data is correct - color_cardinality = len(tbl.unique_values["Cylinders"]) - group_by_cardinality = len(tbl.unique_values["Origin"]) - assert len(new_vis.data.columns) == 3 - assert ( - len(new_vis.data) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality - ) # Not color_cardinality*group_by_cardinality since some combinations have 0 values - - -def test_colored_line_chart(): - from lux.vis.Vis import Vis - from lux.vis.Vis import Clause - - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - x_clause = Clause(attribute="Year", channel="x") - y_clause = Clause(attribute="MilesPerGal", channel="y") - color_clause = Clause(attribute="Cylinders", channel="color") - - new_vis = Vis([x_clause, y_clause, color_clause], tbl) - - # make sure dimention of the data is correct - color_cardinality = len(tbl.unique_values["Cylinders"]) - group_by_cardinality = len(tbl.unique_values["Year"]) - assert len(new_vis.data.columns) == 3 - assert ( - len(new_vis.data) == 60 > group_by_cardinality < color_cardinality * group_by_cardinality - ) # Not color_cardinality*group_by_cardinality since some combinations have 0 values - - -def test_filter(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - intent = [ - lux.Clause(attribute="Horsepower"), - lux.Clause(attribute="Year"), - lux.Clause(attribute="Origin", filter_op="=", value="USA"), - ] - vis = Vis(intent, tbl) - vis._vis_data = tbl - filter_output = SQLExecutor.execute_filter(vis) - where_clause = filter_output[0] - where_clause_list = where_clause.split(" AND ") - assert ( - "WHERE \"Origin\" = 'USA'" in where_clause_list - and '"Horsepower" IS NOT NULL' in where_clause_list - and '"Year" IS NOT NULL' in where_clause_list - ) - assert filter_output[1] == ["Origin"] - - -def test_inequalityfilter(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - vis = Vis( - [ - lux.Clause(attribute="Horsepower", filter_op=">", value=50), - lux.Clause(attribute="MilesPerGal"), - ] - ) - vis._vis_data = tbl - filter_output = SQLExecutor.execute_filter(vis) - assert filter_output[0] == 'WHERE "Horsepower" > \'50\' AND "MilesPerGal" IS NOT NULL' - assert filter_output[1] == ["Horsepower"] - - intent = [ - lux.Clause(attribute="Horsepower", filter_op="<=", value=100), - lux.Clause(attribute="MilesPerGal"), - ] - vis = Vis(intent, tbl) - vis._vis_data = tbl - filter_output = SQLExecutor.execute_filter(vis) - assert filter_output[0] == 'WHERE "Horsepower" <= \'100\' AND "MilesPerGal" IS NOT NULL' - assert filter_output[1] == ["Horsepower"] - - -def test_binning(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - vis = Vis([lux.Clause(attribute="Horsepower")], tbl) - nbins = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0].bin_size - assert len(vis.data) == nbins - - -def test_record(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - vis = Vis([lux.Clause(attribute="Cylinders")], tbl) - assert len(vis.data) == len(tbl.unique_values["Cylinders"]) - - -def test_filter_aggregation_fillzero_aligned(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - intent = [ - lux.Clause(attribute="Cylinders"), - lux.Clause(attribute="MilesPerGal"), - lux.Clause("Origin=Japan"), - ] - vis = Vis(intent, tbl) - result = vis.data - assert result[result["Cylinders"] == 5]["MilesPerGal"].values[0] == 0 - assert result[result["Cylinders"] == 8]["MilesPerGal"].values[0] == 0 - - -def test_exclude_attribute(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - intent = [lux.Clause("?", exclude=["Name", "Year"]), lux.Clause("Horsepower")] - vislist = VisList(intent, tbl) - for vis in vislist: - assert vis.get_attr_by_channel("x")[0].attribute != "Year" - assert vis.get_attr_by_channel("x")[0].attribute != "name" - assert vis.get_attr_by_channel("y")[0].attribute != "Year" - assert vis.get_attr_by_channel("y")[0].attribute != "Year" - - -def test_null_values(): - # checks that the SQLExecutor has filtered out any None or Null values from its metadata - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("aug_test_table") - - assert None not in tbl.unique_values["enrolled_university"] diff --git a/tests_sql/__init__.py b/tests_sql/__init__.py new file mode 100644 index 00000000..948becf5 --- /dev/null +++ b/tests_sql/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests_sql/conftest.py b/tests_sql/conftest.py new file mode 100644 index 00000000..8ee3ddbb --- /dev/null +++ b/tests_sql/conftest.py @@ -0,0 +1,33 @@ +import pytest +import pandas as pd + + +@pytest.fixture(scope="session") +def global_var(): + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" + pytest.olympic = pd.read_csv(url) + pytest.car_df = pd.read_csv("lux/data/car.csv") + pytest.college_df = pd.read_csv("lux/data/college.csv") + pytest.metadata = [ + "_intent", + "_inferred_intent", + "_data_type", + "unique_values", + "cardinality", + "_rec_info", + "_min_max", + "plotting_style", + "_current_vis", + "_widget", + "_recommendation", + "_prev", + "_history", + "_saved_export", + "name", + "_sampled", + "_toggle_pandas_display", + "_message", + "_pandas_only", + "pre_aggregated", + "_type_override", + ] diff --git a/tests_sql/context.py b/tests_sql/context.py new file mode 100644 index 00000000..b55d161b --- /dev/null +++ b/tests_sql/context.py @@ -0,0 +1,23 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +import lux + +lux.config.interestingness_fallback = False +lux.config.pandas_fallback = False diff --git a/tests_sql/test_sql_compiler.py b/tests_sql/test_sql_compiler.py new file mode 100644 index 00000000..56d4d0f5 --- /dev/null +++ b/tests_sql/test_sql_compiler.py @@ -0,0 +1,421 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .context import lux +import pytest +import pandas as pd +from lux.vis.Vis import Vis +from lux.vis.VisList import VisList +import psycopg2 + + +def test_underspecified_no_vis(global_var, test_recs): + connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") + lux.config.set_SQL_connection(connection) + + no_vis_actions = ["Correlation", "Distribution", "Occurrence", "Temporal"] + sql_df = lux.LuxSQLTable(table_name="cars") + + test_recs(sql_df, no_vis_actions) + assert len(sql_df.current_vis) == 0 + + # test only one filter context case. + sql_df.set_intent([lux.Clause(attribute="origin", filter_op="=", value="USA")]) + test_recs(sql_df, no_vis_actions) + assert len(sql_df.current_vis) == 0 + + +def test_underspecified_single_vis(global_var, test_recs): + one_vis_actions = ["Enhance", "Filter", "Generalize"] + sql_df = lux.LuxSQLTable(table_name="cars") + sql_df.set_intent([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")]) + test_recs(sql_df, one_vis_actions) + assert len(sql_df.current_vis) == 1 + assert sql_df.current_vis[0].mark == "scatter" + for attr in sql_df.current_vis[0]._inferred_intent: + assert attr.data_model == "measure" + for attr in sql_df.current_vis[0]._inferred_intent: + assert attr.data_type == "quantitative" + + +def test_set_intent_as_vis(global_var, test_recs): + sql_df = lux.LuxSQLTable(table_name="cars") + sql_df._repr_html_() + vis = sql_df.recommendation["Correlation"][0] + sql_df.intent = vis + sql_df._repr_html_() + test_recs(sql_df, ["Enhance", "Filter", "Generalize"]) + + +@pytest.fixture +def test_recs(): + def test_recs_function(df, actions): + df._ipython_display_() + assert len(df.recommendation) > 0 + recKeys = list(df.recommendation.keys()) + list_equal(recKeys, actions) + + return test_recs_function + + +def test_parse(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + vlst = VisList([lux.Clause("origin=?"), lux.Clause(attribute="milespergal")], sql_df) + assert len(vlst) == 3 + + sql_df = lux.LuxSQLTable(table_name="cars") + vlst = VisList([lux.Clause("origin=?"), lux.Clause("milespergal")], sql_df) + assert len(vlst) == 3 + + +def test_underspecified_vis_collection_zval(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + vlst = VisList( + [ + lux.Clause(attribute="origin", filter_op="=", value="?"), + lux.Clause(attribute="milespergal"), + ], + sql_df, + ) + assert len(vlst) == 3 + + +def test_sort_bar(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis( + [ + lux.Clause(attribute="acceleration", data_model="measure", data_type="quantitative"), + lux.Clause(attribute="origin", data_model="dimension", data_type="nominal"), + ], + sql_df, + ) + assert vis.mark == "bar" + assert vis._inferred_intent[1].sort == "" + + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis( + [ + lux.Clause(attribute="acceleration", data_model="measure", data_type="quantitative"), + lux.Clause(attribute="name", data_model="dimension", data_type="nominal"), + ], + sql_df, + ) + assert vis.mark == "bar" + assert vis._inferred_intent[1].sort == "ascending" + + +def test_specified_vis_collection(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + + vlst = VisList( + [ + lux.Clause(attribute="horsepower"), + lux.Clause(attribute="brand"), + lux.Clause(attribute="origin", value=["Japan", "USA"]), + ], + sql_df, + ) + assert len(vlst) == 2 + + vlst = VisList( + [ + lux.Clause(attribute=["horsepower", "weight"]), + lux.Clause(attribute="brand"), + lux.Clause(attribute="origin", value=["Japan", "USA"]), + ], + sql_df, + ) + assert len(vlst) == 4 + + # test if z axis has been filtered correctly + chart_titles = [vis.title for vis in vlst] + assert "origin = USA" and "origin = Japan" in chart_titles + assert "origin = Europe" not in chart_titles + + +def test_specified_channel_enforced_vis_collection(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + + visList = VisList( + [lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x")], + sql_df, + ) + for vis in visList: + check_attribute_on_channel(vis, "milespergal", "x") + + +def test_autoencoding_scatter(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + + vis = Vis([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")], df) + check_attribute_on_channel(vis, "milespergal", "x") + check_attribute_on_channel(vis, "weight", "y") + + # Partial channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + + # Full channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight", channel="x"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + # Duplicate channel specified + with pytest.raises(ValueError): + # Should throw error because there should not be columns with the same channel specified + sql_df.set_intent( + [ + lux.Clause(attribute="milespergal", channel="x"), + lux.Clause(attribute="weight", channel="x"), + ] + ) + df.clear_intent() + + sql_df = lux.LuxSQLTable(table_name="cars") + visList = VisList( + [lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x")], + sql_df, + ) + for vis in visList: + check_attribute_on_channel(vis, "milespergal", "x") + + +def test_autoencoding_scatter(): + sql_df = lux.LuxSQLTable(table_name="cars") + + vis = Vis([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")], sql_df) + check_attribute_on_channel(vis, "milespergal", "x") + check_attribute_on_channel(vis, "weight", "y") + + # Partial channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + + # Full channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight", channel="x"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + # Duplicate channel specified + with pytest.raises(ValueError): + # Should throw error because there should not be columns with the same channel specified + sql_df.set_intent( + [ + lux.Clause(attribute="milespergal", channel="x"), + lux.Clause(attribute="weight", channel="x"), + ] + ) + + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")], sql_df) + check_attribute_on_channel(vis, "milespergal", "x") + check_attribute_on_channel(vis, "weight", "y") + + # Partial channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + + # Full channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight", channel="x"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + # Duplicate channel specified + with pytest.raises(ValueError): + # Should throw error because there should not be columns with the same channel specified + sql_df.set_intent( + [ + lux.Clause(attribute="milespergal", channel="x"), + lux.Clause(attribute="weight", channel="x"), + ] + ) + + +def test_autoencoding_histogram(global_var): + # No channel specified + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis([lux.Clause(attribute="milespergal", channel="y")], sql_df) + check_attribute_on_channel(vis, "milespergal", "y") + + vis = Vis([lux.Clause(attribute="milespergal", channel="x")], sql_df) + assert vis.get_attr_by_channel("x")[0].attribute == "milespergal" + assert vis.get_attr_by_channel("y")[0].attribute == "Record" + + +def test_autoencoding_line_chart(global_var): + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis([lux.Clause(attribute="year"), lux.Clause(attribute="acceleration")], sql_df) + check_attribute_on_channel(vis, "year", "x") + check_attribute_on_channel(vis, "acceleration", "y") + + # Partial channel specified + vis = Vis( + [ + lux.Clause(attribute="year", channel="y"), + lux.Clause(attribute="acceleration"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "year", "y") + check_attribute_on_channel(vis, "acceleration", "x") + + # Full channel specified + vis = Vis( + [ + lux.Clause(attribute="year", channel="y"), + lux.Clause(attribute="acceleration", channel="x"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "year", "y") + check_attribute_on_channel(vis, "acceleration", "x") + + with pytest.raises(ValueError): + # Should throw error because there should not be columns with the same channel specified + sql_df.set_intent( + [ + lux.Clause(attribute="year", channel="x"), + lux.Clause(attribute="acceleration", channel="x"), + ] + ) + + +def test_autoencoding_color_line_chart(global_var): + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + intent = [ + lux.Clause(attribute="year"), + lux.Clause(attribute="acceleration"), + lux.Clause(attribute="origin"), + ] + vis = Vis(intent, sql_df) + check_attribute_on_channel(vis, "year", "x") + check_attribute_on_channel(vis, "acceleration", "y") + check_attribute_on_channel(vis, "origin", "color") + + +def test_autoencoding_color_scatter_chart(global_var): + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis( + [ + lux.Clause(attribute="horsepower"), + lux.Clause(attribute="acceleration"), + lux.Clause(attribute="origin"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "origin", "color") + + vis = Vis( + [ + lux.Clause(attribute="horsepower"), + lux.Clause(attribute="acceleration", channel="color"), + lux.Clause(attribute="origin"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "acceleration", "color") + + +def test_populate_options(global_var): + from lux.processor.Compiler import Compiler + + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + sql_df.set_intent([lux.Clause(attribute="?"), lux.Clause(attribute="milespergal")]) + col_set = set() + for specOptions in Compiler.populate_wildcard_options(sql_df._intent, sql_df)["attributes"]: + for clause in specOptions: + col_set.add(clause.attribute) + assert list_equal(list(col_set), list(sql_df.columns)) + + sql_df.set_intent( + [ + lux.Clause(attribute="?", data_model="measure"), + lux.Clause(attribute="milespergal"), + ] + ) + sql_df._repr_html_() + col_set = set() + for specOptions in Compiler.populate_wildcard_options(sql_df._intent, sql_df)["attributes"]: + for clause in specOptions: + col_set.add(clause.attribute) + assert list_equal( + list(col_set), + ["acceleration", "weight", "horsepower", "milespergal", "displacement"], + ) + + +def test_remove_all_invalid(global_var): + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + # with pytest.warns(UserWarning,match="duplicate attribute specified in the intent"): + sql_df.set_intent( + [ + lux.Clause(attribute="origin", filter_op="=", value="USA"), + lux.Clause(attribute="origin"), + ] + ) + sql_df._repr_html_() + assert len(sql_df.current_vis) == 0 + + +def list_equal(l1, l2): + l1.sort() + l2.sort() + return l1 == l2 + + +def check_attribute_on_channel(vis, attr_name, channelName): + assert vis.get_attr_by_channel(channelName)[0].attribute == attr_name diff --git a/tests_sql/test_sql_executor.py b/tests_sql/test_sql_executor.py new file mode 100644 index 00000000..2be37f43 --- /dev/null +++ b/tests_sql/test_sql_executor.py @@ -0,0 +1,223 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 + +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .context import lux +import pytest +import pandas as pd +from lux.executor.SQLExecutor import SQLExecutor +from lux.vis.Vis import Vis +from lux.vis.VisList import VisList +import psycopg2 + + +def test_lazy_execution(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + intent = [ + lux.Clause(attribute="horsepower", aggregation="mean"), + lux.Clause(attribute="origin"), + ] + vis = Vis(intent) + # Check data field in vis is empty before calling executor + assert vis.data is None + SQLExecutor.execute([vis], tbl) + assert type(vis.data) == lux.core.frame.LuxDataFrame + + +def test_selection(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + intent = [ + lux.Clause(attribute=["horsepower", "weight", "acceleration"]), + lux.Clause(attribute="year"), + ] + vislist = VisList(intent, tbl) + assert all([type(vis.data) == lux.core.frame.LuxDataFrame for vis in vislist]) + assert all(vislist[2].data.columns == ["year", "acceleration"]) + + +def test_aggregation(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + intent = [ + lux.Clause(attribute="horsepower", aggregation="mean"), + lux.Clause(attribute="origin"), + ] + vis = Vis(intent, tbl) + result_df = vis.data + assert int(result_df[result_df["origin"] == "USA"]["horsepower"]) == 119 + + intent = [ + lux.Clause(attribute="horsepower", aggregation="sum"), + lux.Clause(attribute="origin"), + ] + vis = Vis(intent, tbl) + result_df = vis.data + assert int(result_df[result_df["origin"] == "Japan"]["horsepower"]) == 6307 + + intent = [ + lux.Clause(attribute="horsepower", aggregation="max"), + lux.Clause(attribute="origin"), + ] + vis = Vis(intent, tbl) + result_df = vis.data + assert int(result_df[result_df["origin"] == "Europe"]["horsepower"]) == 133 + + +def test_colored_bar_chart(): + from lux.vis.Vis import Vis + from lux.vis.Vis import Clause + + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + x_clause = Clause(attribute="milespergal", channel="x") + y_clause = Clause(attribute="origin", channel="y") + color_clause = Clause(attribute="cylinders", channel="color") + + new_vis = Vis([x_clause, y_clause, color_clause], tbl) + # make sure dimention of the data is correct + color_carsdinality = len(tbl.unique_values["cylinders"]) + group_by_carsdinality = len(tbl.unique_values["origin"]) + assert len(new_vis.data.columns) == 3 + assert ( + len(new_vis.data) == 15 > group_by_carsdinality < color_carsdinality * group_by_carsdinality + ) # Not color_carsdinality*group_by_carsdinality since some combinations have 0 values + + +def test_colored_line_chart(): + from lux.vis.Vis import Vis + from lux.vis.Vis import Clause + + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + x_clause = Clause(attribute="year", channel="x") + y_clause = Clause(attribute="milespergal", channel="y") + color_clause = Clause(attribute="cylinders", channel="color") + + new_vis = Vis([x_clause, y_clause, color_clause], tbl) + + # make sure dimention of the data is correct + color_carsdinality = len(tbl.unique_values["cylinders"]) + group_by_carsdinality = len(tbl.unique_values["year"]) + assert len(new_vis.data.columns) == 3 + assert ( + len(new_vis.data) == 60 > group_by_carsdinality < color_carsdinality * group_by_carsdinality + ) # Not color_carsdinality*group_by_carsdinality since some combinations have 0 values + + +def test_filter(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + intent = [ + lux.Clause(attribute="horsepower"), + lux.Clause(attribute="year"), + lux.Clause(attribute="origin", filter_op="=", value="USA"), + ] + vis = Vis(intent, tbl) + vis._vis_data = tbl + filter_output = SQLExecutor.execute_filter(vis) + where_clause = filter_output[0] + where_clause_list = where_clause.split(" AND ") + assert ( + "WHERE \"origin\" = 'USA'" in where_clause_list + and '"horsepower" IS NOT NULL' in where_clause_list + and '"year" IS NOT NULL' in where_clause_list + ) + assert filter_output[1] == ["origin"] + + +def test_inequalityfilter(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + vis = Vis( + [ + lux.Clause(attribute="horsepower", filter_op=">", value=50), + lux.Clause(attribute="milespergal"), + ] + ) + vis._vis_data = tbl + filter_output = SQLExecutor.execute_filter(vis) + assert filter_output[0] == 'WHERE "horsepower" > \'50\' AND "milespergal" IS NOT NULL' + assert filter_output[1] == ["horsepower"] + + intent = [ + lux.Clause(attribute="horsepower", filter_op="<=", value=100), + lux.Clause(attribute="milespergal"), + ] + vis = Vis(intent, tbl) + vis._vis_data = tbl + filter_output = SQLExecutor.execute_filter(vis) + assert filter_output[0] == 'WHERE "horsepower" <= \'100\' AND "milespergal" IS NOT NULL' + assert filter_output[1] == ["horsepower"] + + +def test_binning(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + vis = Vis([lux.Clause(attribute="horsepower")], tbl) + nbins = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0].bin_size + assert len(vis.data) == nbins + + +def test_record(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + vis = Vis([lux.Clause(attribute="cylinders")], tbl) + assert len(vis.data) == len(tbl.unique_values["cylinders"]) + + +def test_filter_aggregation_fillzero_aligned(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + intent = [ + lux.Clause(attribute="cylinders"), + lux.Clause(attribute="milespergal"), + lux.Clause("origin=Japan"), + ] + vis = Vis(intent, tbl) + result = vis.data + assert result[result["cylinders"] == 5]["milespergal"].values[0] == 0 + assert result[result["cylinders"] == 8]["milespergal"].values[0] == 0 + + +def test_exclude_attribute(): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + intent = [lux.Clause("?", exclude=["Name", "year"]), lux.Clause("horsepower")] + vislist = VisList(intent, tbl) + for vis in vislist: + assert vis.get_attr_by_channel("x")[0].attribute != "year" + assert vis.get_attr_by_channel("x")[0].attribute != "name" + assert vis.get_attr_by_channel("y")[0].attribute != "year" + assert vis.get_attr_by_channel("y")[0].attribute != "year" + + +def test_null_values(): + # checks that the SQLExecutor has filtered out any None or Null values from its metadata + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("aug_test_table") + + assert None not in tbl.unique_values["enrolled_university"] diff --git a/tests_sql/test_sql_interestingness.py b/tests_sql/test_sql_interestingness.py new file mode 100644 index 00000000..f7fecfc4 --- /dev/null +++ b/tests_sql/test_sql_interestingness.py @@ -0,0 +1,71 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .context import lux +import pytest +import pandas as pd +import numpy as np +import psycopg2 +from lux.interestingness.interestingness import interestingness + + +def test_interestingness_1_0_1(global_var): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + tbl.set_intent( + [ + lux.Clause(attribute="origin", filter_op="=", value="USA"), + lux.Clause(attribute="cylinders"), + ] + ) + tbl._repr_html_() + filter_score = tbl.recommendation["Filter"][0].score + assert tbl.current_vis[0].score == 0 + assert filter_score > 0 + tbl.clear_intent() + + +def test_interestingness_0_1_1(global_var): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + tbl.set_intent( + [ + lux.Clause(attribute="origin", filter_op="=", value="?"), + lux.Clause(attribute="milespergal"), + ] + ) + tbl._repr_html_() + assert interestingness(tbl.recommendation["Current Vis"][0], tbl) != None + assert str(tbl.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" + tbl.clear_intent() + + +def test_interestingness_1_1_1(global_var): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("cars") + + tbl.set_intent( + [ + lux.Clause(attribute="horsepower"), + lux.Clause(attribute="origin", filter_op="=", value="USA", bin_size=20), + ] + ) + tbl._repr_html_() + assert interestingness(tbl.recommendation["Enhance"][0], tbl) != None + + # check for top recommended Filter graph score is not none + assert interestingness(tbl.recommendation["Filter"][0], tbl) != None + tbl.clear_intent()