diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 668a7bc3..be412896 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -55,4 +55,5 @@ jobs: - name: Test with Pytest and Code Coverage Report run: | pytest --cov-report term --cov=lux tests/ + pytest --cov-report term --cov=lux tests_sql/ bash <(curl -s https://codecov.io/bash) diff --git a/Makefile b/Makefile index 6d51d1b8..3d264512 100644 --- a/Makefile +++ b/Makefile @@ -3,5 +3,8 @@ init: test: black --check . python -m pytest tests/ - +test_all: + black --check . + python -m pytest tests/ + python -m pytest tests_sql/ .PHONY: init test \ No newline at end of file diff --git a/tests/test_compiler.py b/tests/test_compiler.py index d425e41d..e5ca95c0 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -32,19 +32,6 @@ def test_underspecified_no_vis(global_var, test_recs): assert len(df.current_vis) == 0 df.clear_intent() - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - - test_recs(sql_df, no_vis_actions) - assert len(sql_df.current_vis) == 0 - - # test only one filter context case. - sql_df.set_intent([lux.Clause(attribute="origin", filter_op="=", value="USA")]) - test_recs(sql_df, no_vis_actions) - assert len(sql_df.current_vis) == 0 - def test_underspecified_single_vis(global_var, test_recs): lux.config.set_executor_type("Pandas") @@ -60,18 +47,6 @@ def test_underspecified_single_vis(global_var, test_recs): assert attr.data_type == "quantitative" df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - sql_df.set_intent([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")]) - test_recs(sql_df, one_vis_actions) - assert len(sql_df.current_vis) == 1 - assert sql_df.current_vis[0].mark == "scatter" - for attr in sql_df.current_vis[0]._inferred_intent: - assert attr.data_model == "measure" - for attr in sql_df.current_vis[0]._inferred_intent: - assert attr.data_type == "quantitative" - # def test_underspecified_vis_collection(test_recs): # multiple_vis_actions = ["Current viss"] @@ -115,15 +90,6 @@ def test_set_intent_as_vis(global_var, test_recs): df._ipython_display_() test_recs(df, ["Enhance", "Filter", "Generalize"]) - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - sql_df._repr_html_() - vis = sql_df.recommendation["Correlation"][0] - sql_df.intent = vis - sql_df._repr_html_() - test_recs(sql_df, ["Enhance", "Filter", "Generalize"]) - @pytest.fixture def test_recs(): @@ -150,18 +116,6 @@ def test_parse(global_var): vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3 - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vlst = VisList([lux.Clause("origin=?"), lux.Clause(attribute="milespergal")], sql_df) - assert len(vlst) == 3 - - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vlst = VisList([lux.Clause("origin=?"), lux.Clause("milespergal")], sql_df) - assert len(vlst) == 3 - def test_underspecified_vis_collection_zval(global_var): lux.config.set_executor_type("Pandas") @@ -181,18 +135,6 @@ def test_underspecified_vis_collection_zval(global_var): # vlst = VisList([lux.Clause(attribute = ["Origin","Cylinders"], filter_op="=",value="?"),lux.Clause(attribute = ["Horsepower"]),lux.Clause(attribute = "Weight")],df) # assert len(vlst) == 8 - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vlst = VisList( - [ - lux.Clause(attribute="origin", filter_op="=", value="?"), - lux.Clause(attribute="milespergal"), - ], - sql_df, - ) - assert len(vlst) == 3 - def test_sort_bar(global_var): from lux.processor.Compiler import Compiler @@ -221,32 +163,6 @@ def test_sort_bar(global_var): assert vis.mark == "bar" assert vis._inferred_intent[1].sort == "ascending" - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis( - [ - lux.Clause(attribute="acceleration", data_model="measure", data_type="quantitative"), - lux.Clause(attribute="origin", data_model="dimension", data_type="nominal"), - ], - sql_df, - ) - assert vis.mark == "bar" - assert vis._inferred_intent[1].sort == "" - - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis( - [ - lux.Clause(attribute="acceleration", data_model="measure", data_type="quantitative"), - lux.Clause(attribute="name", data_model="dimension", data_type="nominal"), - ], - sql_df, - ) - assert vis.mark == "bar" - assert vis._inferred_intent[1].sort == "ascending" - def test_specified_vis_collection(global_var): lux.config.set_executor_type("Pandas") @@ -335,16 +251,6 @@ def test_autoencoding_scatter(global_var): ) df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - visList = VisList( - [lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x")], - sql_df, - ) - for vis in visList: - check_attribute_on_channel(vis, "milespergal", "x") - def test_autoencoding_scatter(): lux.config.set_executor_type("Pandas") @@ -388,45 +294,6 @@ def test_autoencoding_scatter(): ] ) - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")], sql_df) - check_attribute_on_channel(vis, "milespergal", "x") - check_attribute_on_channel(vis, "weight", "y") - - # Partial channel specified - vis = Vis( - [ - lux.Clause(attribute="milespergal", channel="y"), - lux.Clause(attribute="weight"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "milespergal", "y") - check_attribute_on_channel(vis, "weight", "x") - - # Full channel specified - vis = Vis( - [ - lux.Clause(attribute="milespergal", channel="y"), - lux.Clause(attribute="weight", channel="x"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "milespergal", "y") - check_attribute_on_channel(vis, "weight", "x") - # Duplicate channel specified - with pytest.raises(ValueError): - # Should throw error because there should not be columns with the same channel specified - sql_df.set_intent( - [ - lux.Clause(attribute="milespergal", channel="x"), - lux.Clause(attribute="weight", channel="x"), - ] - ) - def test_autoencoding_histogram(global_var): lux.config.set_executor_type("Pandas") @@ -441,18 +308,6 @@ def test_autoencoding_histogram(global_var): assert vis.get_attr_by_channel("x")[0].attribute == "MilesPerGal" assert vis.get_attr_by_channel("y")[0].attribute == "Record" - # No channel specified - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis([lux.Clause(attribute="milespergal", channel="y")], sql_df) - check_attribute_on_channel(vis, "milespergal", "y") - - vis = Vis([lux.Clause(attribute="milespergal", channel="x")], sql_df) - assert vis.get_attr_by_channel("x")[0].attribute == "milespergal" - assert vis.get_attr_by_channel("y")[0].attribute == "Record" - def test_autoencoding_line_chart(global_var): lux.config.set_executor_type("Pandas") @@ -495,45 +350,6 @@ def test_autoencoding_line_chart(global_var): ) df.clear_intent() - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis([lux.Clause(attribute="year"), lux.Clause(attribute="acceleration")], sql_df) - check_attribute_on_channel(vis, "year", "x") - check_attribute_on_channel(vis, "acceleration", "y") - - # Partial channel specified - vis = Vis( - [ - lux.Clause(attribute="year", channel="y"), - lux.Clause(attribute="acceleration"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "year", "y") - check_attribute_on_channel(vis, "acceleration", "x") - - # Full channel specified - vis = Vis( - [ - lux.Clause(attribute="year", channel="y"), - lux.Clause(attribute="acceleration", channel="x"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "year", "y") - check_attribute_on_channel(vis, "acceleration", "x") - - with pytest.raises(ValueError): - # Should throw error because there should not be columns with the same channel specified - sql_df.set_intent( - [ - lux.Clause(attribute="year", channel="x"), - lux.Clause(attribute="acceleration", channel="x"), - ] - ) - def test_autoencoding_color_line_chart(global_var): lux.config.set_executor_type("Pandas") @@ -550,20 +366,6 @@ def test_autoencoding_color_line_chart(global_var): check_attribute_on_channel(vis, "Acceleration", "y") check_attribute_on_channel(vis, "Origin", "color") - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - intent = [ - lux.Clause(attribute="year"), - lux.Clause(attribute="acceleration"), - lux.Clause(attribute="origin"), - ] - vis = Vis(intent, sql_df) - check_attribute_on_channel(vis, "year", "x") - check_attribute_on_channel(vis, "acceleration", "y") - check_attribute_on_channel(vis, "origin", "color") - def test_autoencoding_color_scatter_chart(global_var): lux.config.set_executor_type("Pandas") @@ -590,30 +392,6 @@ def test_autoencoding_color_scatter_chart(global_var): ) check_attribute_on_channel(vis, "Acceleration", "color") - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - vis = Vis( - [ - lux.Clause(attribute="horsepower"), - lux.Clause(attribute="acceleration"), - lux.Clause(attribute="origin"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "origin", "color") - - vis = Vis( - [ - lux.Clause(attribute="horsepower"), - lux.Clause(attribute="acceleration", channel="color"), - lux.Clause(attribute="origin"), - ], - sql_df, - ) - check_attribute_on_channel(vis, "acceleration", "color") - def test_populate_options(global_var): lux.config.set_executor_type("Pandas") @@ -644,33 +422,6 @@ def test_populate_options(global_var): ) df.clear_intent() - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - sql_df.set_intent([lux.Clause(attribute="?"), lux.Clause(attribute="milespergal")]) - col_set = set() - for specOptions in Compiler.populate_wildcard_options(sql_df._intent, sql_df)["attributes"]: - for clause in specOptions: - col_set.add(clause.attribute) - assert list_equal(list(col_set), list(sql_df.columns)) - - sql_df.set_intent( - [ - lux.Clause(attribute="?", data_model="measure"), - lux.Clause(attribute="milespergal"), - ] - ) - sql_df._repr_html_() - col_set = set() - for specOptions in Compiler.populate_wildcard_options(sql_df._intent, sql_df)["attributes"]: - for clause in specOptions: - col_set.add(clause.attribute) - assert list_equal( - list(col_set), - ["acceleration", "weight", "horsepower", "milespergal", "displacement"], - ) - def test_remove_all_invalid(global_var): lux.config.set_executor_type("Pandas") @@ -687,20 +438,6 @@ def test_remove_all_invalid(global_var): assert len(df.current_vis) == 0 df.clear_intent() - # test for sql executor - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - lux.config.set_SQL_connection(connection) - sql_df = lux.LuxSQLTable(table_name="cars") - # with pytest.warns(UserWarning,match="duplicate attribute specified in the intent"): - sql_df.set_intent( - [ - lux.Clause(attribute="origin", filter_op="=", value="USA"), - lux.Clause(attribute="origin"), - ] - ) - sql_df._repr_html_() - assert len(sql_df.current_vis) == 0 - def list_equal(l1, l2): l1.sort() diff --git a/tests/test_interestingness.py b/tests/test_interestingness.py index c2d017b5..769176fb 100644 --- a/tests/test_interestingness.py +++ b/tests/test_interestingness.py @@ -74,23 +74,6 @@ def test_interestingness_1_0_1(global_var): assert df.current_vis[0].score == 0 df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - tbl.set_intent( - [ - lux.Clause(attribute="Origin", filter_op="=", value="USA"), - lux.Clause(attribute="Cylinders"), - ] - ) - tbl._repr_html_() - filter_score = tbl.recommendation["Filter"][0].score - assert tbl.current_vis[0].score == 0 - assert filter_score > 0 - tbl.clear_intent() - def test_interestingness_0_1_0(global_var): lux.config.set_executor_type("Pandas") @@ -153,22 +136,6 @@ def test_interestingness_0_1_1(global_var): assert str(df.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - tbl.set_intent( - [ - lux.Clause(attribute="Origin", filter_op="=", value="?"), - lux.Clause(attribute="MilesPerGal"), - ] - ) - tbl._repr_html_() - assert interestingness(tbl.recommendation["Current Vis"][0], tbl) != None - assert str(tbl.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" - tbl.clear_intent() - def test_interestingness_1_1_0(global_var): lux.config.set_executor_type("Pandas") @@ -240,24 +207,6 @@ def test_interestingness_1_1_1(global_var): assert interestingness(df.recommendation["Filter"][0], df) != None df.clear_intent() - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") - tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) - tbl.set_SQL_table("car") - - tbl.set_intent( - [ - lux.Clause(attribute="Horsepower"), - lux.Clause(attribute="Origin", filter_op="=", value="USA", bin_size=20), - ] - ) - tbl._repr_html_() - assert interestingness(tbl.recommendation["Enhance"][0], tbl) != None - - # check for top recommended Filter graph score is not none - assert interestingness(tbl.recommendation["Filter"][0], tbl) != None - tbl.clear_intent() - def test_interestingness_1_2_0(global_var): from lux.vis.Vis import Vis diff --git a/tests/test_sql_interestingness.py b/tests/test_sql_interestingness.py new file mode 100644 index 00000000..394b9492 --- /dev/null +++ b/tests/test_sql_interestingness.py @@ -0,0 +1,77 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .context import lux +import pytest +import pandas as pd +import numpy as np +import psycopg2 +from lux.interestingness.interestingness import interestingness + + +def test_interestingness_1_0_1(global_var): + connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") + tbl = lux.LuxSQLTable() + lux.config.set_SQL_connection(connection) + tbl.set_SQL_table("car") + + tbl.set_intent( + [ + lux.Clause(attribute="Origin", filter_op="=", value="USA"), + lux.Clause(attribute="Cylinders"), + ] + ) + tbl._repr_html_() + filter_score = tbl.recommendation["Filter"][0].score + assert tbl.current_vis[0].score == 0 + assert filter_score > 0 + tbl.clear_intent() + + +def test_interestingness_0_1_1(global_var): + connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") + tbl = lux.LuxSQLTable() + lux.config.set_SQL_connection(connection) + tbl.set_SQL_table("car") + + tbl.set_intent( + [ + lux.Clause(attribute="Origin", filter_op="=", value="?"), + lux.Clause(attribute="MilesPerGal"), + ] + ) + tbl._repr_html_() + assert interestingness(tbl.recommendation["Current Vis"][0], tbl) != None + assert str(tbl.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" + tbl.clear_intent() + + +def test_interestingness_1_1_1(global_var): + connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") + tbl = lux.LuxSQLTable() + lux.config.set_SQL_connection(connection) + tbl.set_SQL_table("car") + + tbl.set_intent( + [ + lux.Clause(attribute="Horsepower"), + lux.Clause(attribute="Origin", filter_op="=", value="USA", bin_size=20), + ] + ) + tbl._repr_html_() + assert interestingness(tbl.recommendation["Enhance"][0], tbl) != None + + # check for top recommended Filter graph score is not none + assert interestingness(tbl.recommendation["Filter"][0], tbl) != None + tbl.clear_intent() diff --git a/tests/test_type.py b/tests/test_type.py index 5395c661..7af2b7fd 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -19,22 +19,6 @@ import warnings -# Suite of test that checks if data_type inferred correctly by Lux -def test_check_cars(): - lux.config.set_SQL_connection("") - df = pd.read_csv("lux/data/car.csv") - df.maintain_metadata() - assert df.data_type["Name"] == "nominal" - assert df.data_type["MilesPerGal"] == "quantitative" - assert df.data_type["Cylinders"] == "nominal" - assert df.data_type["Displacement"] == "quantitative" - assert df.data_type["Horsepower"] == "quantitative" - assert df.data_type["Weight"] == "quantitative" - assert df.data_type["Acceleration"] == "quantitative" - assert df.data_type["Year"] == "temporal" - assert df.data_type["Origin"] == "nominal" - - def test_check_int_id(): df = pd.read_csv( "https://github.com/lux-org/lux-datasets/blob/master/data/instacart_sample.csv?raw=true" diff --git a/tests_sql/__init__.py b/tests_sql/__init__.py new file mode 100644 index 00000000..948becf5 --- /dev/null +++ b/tests_sql/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests_sql/conftest.py b/tests_sql/conftest.py new file mode 100644 index 00000000..8ee3ddbb --- /dev/null +++ b/tests_sql/conftest.py @@ -0,0 +1,33 @@ +import pytest +import pandas as pd + + +@pytest.fixture(scope="session") +def global_var(): + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" + pytest.olympic = pd.read_csv(url) + pytest.car_df = pd.read_csv("lux/data/car.csv") + pytest.college_df = pd.read_csv("lux/data/college.csv") + pytest.metadata = [ + "_intent", + "_inferred_intent", + "_data_type", + "unique_values", + "cardinality", + "_rec_info", + "_min_max", + "plotting_style", + "_current_vis", + "_widget", + "_recommendation", + "_prev", + "_history", + "_saved_export", + "name", + "_sampled", + "_toggle_pandas_display", + "_message", + "_pandas_only", + "pre_aggregated", + "_type_override", + ] diff --git a/tests_sql/context.py b/tests_sql/context.py new file mode 100644 index 00000000..d15b9ca4 --- /dev/null +++ b/tests_sql/context.py @@ -0,0 +1,28 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +import lux + +lux.config.interestingness_fallback = False +lux.config.pandas_fallback = False + +import psycopg2 + +connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") +lux.config.set_SQL_connection(connection) diff --git a/tests_sql/test_sql_compiler.py b/tests_sql/test_sql_compiler.py new file mode 100644 index 00000000..16c3d9f0 --- /dev/null +++ b/tests_sql/test_sql_compiler.py @@ -0,0 +1,418 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .context import lux +import pytest +import pandas as pd +from lux.vis.Vis import Vis +from lux.vis.VisList import VisList +import psycopg2 + + +def test_underspecified_no_vis(global_var, test_recs): + no_vis_actions = ["Correlation", "Distribution", "Occurrence", "Temporal"] + sql_df = lux.LuxSQLTable(table_name="cars") + + test_recs(sql_df, no_vis_actions) + assert len(sql_df.current_vis) == 0 + + # test only one filter context case. + sql_df.set_intent([lux.Clause(attribute="origin", filter_op="=", value="USA")]) + test_recs(sql_df, no_vis_actions) + assert len(sql_df.current_vis) == 0 + + +def test_underspecified_single_vis(global_var, test_recs): + one_vis_actions = ["Enhance", "Filter", "Generalize"] + sql_df = lux.LuxSQLTable(table_name="cars") + sql_df.set_intent([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")]) + test_recs(sql_df, one_vis_actions) + assert len(sql_df.current_vis) == 1 + assert sql_df.current_vis[0].mark == "scatter" + for attr in sql_df.current_vis[0]._inferred_intent: + assert attr.data_model == "measure" + for attr in sql_df.current_vis[0]._inferred_intent: + assert attr.data_type == "quantitative" + + +def test_set_intent_as_vis(global_var, test_recs): + sql_df = lux.LuxSQLTable(table_name="cars") + sql_df._repr_html_() + vis = sql_df.recommendation["Correlation"][0] + sql_df.intent = vis + sql_df._repr_html_() + test_recs(sql_df, ["Enhance", "Filter", "Generalize"]) + + +@pytest.fixture +def test_recs(): + def test_recs_function(df, actions): + df._ipython_display_() + assert len(df.recommendation) > 0 + recKeys = list(df.recommendation.keys()) + list_equal(recKeys, actions) + + return test_recs_function + + +def test_parse(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + vlst = VisList([lux.Clause("origin=?"), lux.Clause(attribute="milespergal")], sql_df) + assert len(vlst) == 3 + + sql_df = lux.LuxSQLTable(table_name="cars") + vlst = VisList([lux.Clause("origin=?"), lux.Clause("milespergal")], sql_df) + assert len(vlst) == 3 + + +def test_underspecified_vis_collection_zval(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + vlst = VisList( + [ + lux.Clause(attribute="origin", filter_op="=", value="?"), + lux.Clause(attribute="milespergal"), + ], + sql_df, + ) + assert len(vlst) == 3 + + +def test_sort_bar(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis( + [ + lux.Clause(attribute="acceleration", data_model="measure", data_type="quantitative"), + lux.Clause(attribute="origin", data_model="dimension", data_type="nominal"), + ], + sql_df, + ) + assert vis.mark == "bar" + assert vis._inferred_intent[1].sort == "" + + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis( + [ + lux.Clause(attribute="acceleration", data_model="measure", data_type="quantitative"), + lux.Clause(attribute="name", data_model="dimension", data_type="nominal"), + ], + sql_df, + ) + assert vis.mark == "bar" + assert vis._inferred_intent[1].sort == "ascending" + + +def test_specified_vis_collection(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + + vlst = VisList( + [ + lux.Clause(attribute="horsepower"), + lux.Clause(attribute="brand"), + lux.Clause(attribute="origin", value=["Japan", "USA"]), + ], + sql_df, + ) + assert len(vlst) == 2 + + vlst = VisList( + [ + lux.Clause(attribute=["horsepower", "weight"]), + lux.Clause(attribute="brand"), + lux.Clause(attribute="origin", value=["Japan", "USA"]), + ], + sql_df, + ) + assert len(vlst) == 4 + + # test if z axis has been filtered correctly + chart_titles = [vis.title for vis in vlst] + assert "origin = USA" and "origin = Japan" in chart_titles + assert "origin = Europe" not in chart_titles + + +def test_specified_channel_enforced_vis_collection(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + + visList = VisList( + [lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x")], + sql_df, + ) + for vis in visList: + check_attribute_on_channel(vis, "milespergal", "x") + + +def test_autoencoding_scatter(global_var): + sql_df = lux.LuxSQLTable(table_name="cars") + + vis = Vis([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")], df) + check_attribute_on_channel(vis, "milespergal", "x") + check_attribute_on_channel(vis, "weight", "y") + + # Partial channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + + # Full channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight", channel="x"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + # Duplicate channel specified + with pytest.raises(ValueError): + # Should throw error because there should not be columns with the same channel specified + sql_df.set_intent( + [ + lux.Clause(attribute="milespergal", channel="x"), + lux.Clause(attribute="weight", channel="x"), + ] + ) + df.clear_intent() + + sql_df = lux.LuxSQLTable(table_name="cars") + visList = VisList( + [lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x")], + sql_df, + ) + for vis in visList: + check_attribute_on_channel(vis, "milespergal", "x") + + +def test_autoencoding_scatter(): + sql_df = lux.LuxSQLTable(table_name="cars") + + vis = Vis([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")], sql_df) + check_attribute_on_channel(vis, "milespergal", "x") + check_attribute_on_channel(vis, "weight", "y") + + # Partial channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + + # Full channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight", channel="x"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + # Duplicate channel specified + with pytest.raises(ValueError): + # Should throw error because there should not be columns with the same channel specified + sql_df.set_intent( + [ + lux.Clause(attribute="milespergal", channel="x"), + lux.Clause(attribute="weight", channel="x"), + ] + ) + + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis([lux.Clause(attribute="milespergal"), lux.Clause(attribute="weight")], sql_df) + check_attribute_on_channel(vis, "milespergal", "x") + check_attribute_on_channel(vis, "weight", "y") + + # Partial channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + + # Full channel specified + vis = Vis( + [ + lux.Clause(attribute="milespergal", channel="y"), + lux.Clause(attribute="weight", channel="x"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "milespergal", "y") + check_attribute_on_channel(vis, "weight", "x") + # Duplicate channel specified + with pytest.raises(ValueError): + # Should throw error because there should not be columns with the same channel specified + sql_df.set_intent( + [ + lux.Clause(attribute="milespergal", channel="x"), + lux.Clause(attribute="weight", channel="x"), + ] + ) + + +def test_autoencoding_histogram(global_var): + # No channel specified + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis([lux.Clause(attribute="milespergal", channel="y")], sql_df) + check_attribute_on_channel(vis, "milespergal", "y") + + vis = Vis([lux.Clause(attribute="milespergal", channel="x")], sql_df) + assert vis.get_attr_by_channel("x")[0].attribute == "milespergal" + assert vis.get_attr_by_channel("y")[0].attribute == "Record" + + +def test_autoencoding_line_chart(global_var): + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis([lux.Clause(attribute="year"), lux.Clause(attribute="acceleration")], sql_df) + check_attribute_on_channel(vis, "year", "x") + check_attribute_on_channel(vis, "acceleration", "y") + + # Partial channel specified + vis = Vis( + [ + lux.Clause(attribute="year", channel="y"), + lux.Clause(attribute="acceleration"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "year", "y") + check_attribute_on_channel(vis, "acceleration", "x") + + # Full channel specified + vis = Vis( + [ + lux.Clause(attribute="year", channel="y"), + lux.Clause(attribute="acceleration", channel="x"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "year", "y") + check_attribute_on_channel(vis, "acceleration", "x") + + with pytest.raises(ValueError): + # Should throw error because there should not be columns with the same channel specified + sql_df.set_intent( + [ + lux.Clause(attribute="year", channel="x"), + lux.Clause(attribute="acceleration", channel="x"), + ] + ) + + +def test_autoencoding_color_line_chart(global_var): + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + intent = [ + lux.Clause(attribute="year"), + lux.Clause(attribute="acceleration"), + lux.Clause(attribute="origin"), + ] + vis = Vis(intent, sql_df) + check_attribute_on_channel(vis, "year", "x") + check_attribute_on_channel(vis, "acceleration", "y") + check_attribute_on_channel(vis, "origin", "color") + + +def test_autoencoding_color_scatter_chart(global_var): + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + vis = Vis( + [ + lux.Clause(attribute="horsepower"), + lux.Clause(attribute="acceleration"), + lux.Clause(attribute="origin"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "origin", "color") + + vis = Vis( + [ + lux.Clause(attribute="horsepower"), + lux.Clause(attribute="acceleration", channel="color"), + lux.Clause(attribute="origin"), + ], + sql_df, + ) + check_attribute_on_channel(vis, "acceleration", "color") + + +def test_populate_options(global_var): + from lux.processor.Compiler import Compiler + + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + sql_df.set_intent([lux.Clause(attribute="?"), lux.Clause(attribute="milespergal")]) + col_set = set() + for specOptions in Compiler.populate_wildcard_options(sql_df._intent, sql_df)["attributes"]: + for clause in specOptions: + col_set.add(clause.attribute) + assert list_equal(list(col_set), list(sql_df.columns)) + + sql_df.set_intent( + [ + lux.Clause(attribute="?", data_model="measure"), + lux.Clause(attribute="milespergal"), + ] + ) + sql_df._repr_html_() + col_set = set() + for specOptions in Compiler.populate_wildcard_options(sql_df._intent, sql_df)["attributes"]: + for clause in specOptions: + col_set.add(clause.attribute) + assert list_equal( + list(col_set), + ["acceleration", "weight", "horsepower", "milespergal", "displacement"], + ) + + +def test_remove_all_invalid(global_var): + # test for sql executor + sql_df = lux.LuxSQLTable(table_name="cars") + # with pytest.warns(UserWarning,match="duplicate attribute specified in the intent"): + sql_df.set_intent( + [ + lux.Clause(attribute="origin", filter_op="=", value="USA"), + lux.Clause(attribute="origin"), + ] + ) + sql_df._repr_html_() + assert len(sql_df.current_vis) == 0 + + +def list_equal(l1, l2): + l1.sort() + l2.sort() + return l1 == l2 + + +def check_attribute_on_channel(vis, attr_name, channelName): + assert vis.get_attr_by_channel(channelName)[0].attribute == attr_name diff --git a/tests/test_sql_executor.py b/tests_sql/test_sql_executor.py similarity index 81% rename from tests/test_sql_executor.py rename to tests_sql/test_sql_executor.py index f8d8d907..322a92a3 100644 --- a/tests/test_sql_executor.py +++ b/tests_sql/test_sql_executor.py @@ -23,9 +23,7 @@ def test_lazy_execution(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") intent = [ @@ -40,9 +38,7 @@ def test_lazy_execution(): def test_selection(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") intent = [ @@ -55,9 +51,7 @@ def test_selection(): def test_aggregation(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") intent = [ @@ -89,9 +83,7 @@ def test_colored_bar_chart(): from lux.vis.Vis import Vis from lux.vis.Vis import Clause - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") x_clause = Clause(attribute="MilesPerGal", channel="x") @@ -112,9 +104,7 @@ def test_colored_line_chart(): from lux.vis.Vis import Vis from lux.vis.Vis import Clause - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") x_clause = Clause(attribute="Year", channel="x") @@ -133,9 +123,7 @@ def test_colored_line_chart(): def test_filter(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") intent = [ @@ -157,9 +145,7 @@ def test_filter(): def test_inequalityfilter(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") vis = Vis( @@ -185,9 +171,7 @@ def test_inequalityfilter(): def test_binning(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") vis = Vis([lux.Clause(attribute="Horsepower")], tbl) @@ -196,9 +180,7 @@ def test_binning(): def test_record(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") vis = Vis([lux.Clause(attribute="Cylinders")], tbl) @@ -206,9 +188,7 @@ def test_record(): def test_filter_aggregation_fillzero_aligned(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") intent = [ @@ -223,9 +203,7 @@ def test_filter_aggregation_fillzero_aligned(): def test_exclude_attribute(): - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("car") intent = [lux.Clause("?", exclude=["Name", "Year"]), lux.Clause("Horsepower")] @@ -239,9 +217,7 @@ def test_exclude_attribute(): def test_null_values(): # checks that the SQLExecutor has filtered out any None or Null values from its metadata - connection = psycopg2.connect("host=localhost dbname=postgres user=postgres password=lux") tbl = lux.LuxSQLTable() - lux.config.set_SQL_connection(connection) tbl.set_SQL_table("aug_test_table") assert None not in tbl.unique_values["enrolled_university"] diff --git a/tests_sql/test_sql_interestingness.py b/tests_sql/test_sql_interestingness.py new file mode 100644 index 00000000..351377b3 --- /dev/null +++ b/tests_sql/test_sql_interestingness.py @@ -0,0 +1,71 @@ +# Copyright 2019-2020 The Lux Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .context import lux +import pytest +import pandas as pd +import numpy as np +import psycopg2 +from lux.interestingness.interestingness import interestingness + + +def test_interestingness_1_0_1(global_var): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("car") + + tbl.set_intent( + [ + lux.Clause(attribute="Origin", filter_op="=", value="USA"), + lux.Clause(attribute="Cylinders"), + ] + ) + tbl._repr_html_() + filter_score = tbl.recommendation["Filter"][0].score + assert tbl.current_vis[0].score == 0 + assert filter_score > 0 + tbl.clear_intent() + + +def test_interestingness_0_1_1(global_var): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("car") + + tbl.set_intent( + [ + lux.Clause(attribute="Origin", filter_op="=", value="?"), + lux.Clause(attribute="MilesPerGal"), + ] + ) + tbl._repr_html_() + assert interestingness(tbl.recommendation["Current Vis"][0], tbl) != None + assert str(tbl.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" + tbl.clear_intent() + + +def test_interestingness_1_1_1(global_var): + tbl = lux.LuxSQLTable() + tbl.set_SQL_table("car") + + tbl.set_intent( + [ + lux.Clause(attribute="Horsepower"), + lux.Clause(attribute="Origin", filter_op="=", value="USA", bin_size=20), + ] + ) + tbl._repr_html_() + assert interestingness(tbl.recommendation["Enhance"][0], tbl) != None + + # check for top recommended Filter graph score is not none + assert interestingness(tbl.recommendation["Filter"][0], tbl) != None + tbl.clear_intent()