From 21e20cf67865642aa68fba56c175a4bdd5917290 Mon Sep 17 00:00:00 2001 From: Piyush Gupta Date: Thu, 19 Nov 2020 18:05:07 +0530 Subject: [PATCH] Global shared variable in test (#144) (#149) * using global shared variable in test (lux-org#144) * modified fixture scope as session, resolved dependency test cases * run black Co-authored-by: Doris Lee --- .gitignore | 1 + lux/core/frame.py | 1 + tests/conftest.py | 10 +++ tests/test_action.py | 32 +++---- tests/test_compiler.py | 67 +++++++-------- tests/test_config.py | 20 ++--- tests/test_dates.py | 8 +- tests/test_display.py | 16 ++-- tests/test_error_warning.py | 19 +++-- tests/test_executor.py | 44 +++++----- tests/test_interestingness.py | 37 ++++---- tests/test_maintainence.py | 22 ++--- tests/test_nan.py | 4 +- tests/test_pandas.py | 4 +- tests/test_pandas_coverage.py | 54 ++++++------ tests/test_parser.py | 153 ++++++++++++++++++---------------- tests/test_performance.py | 3 +- tests/test_type.py | 7 +- tests/test_vis.py | 74 ++++++++-------- 19 files changed, 301 insertions(+), 275 deletions(-) create mode 100644 tests/conftest.py diff --git a/.gitignore b/.gitignore index 4f6eff0f..ecd2d1cc 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,6 @@ static/ *.egg-info* build/ .DS_Store +.idea/ tests/.coverage tests/coverage.xml diff --git a/lux/core/frame.py b/lux/core/frame.py index fe8b9b97..45e91ed4 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -246,6 +246,7 @@ def intent(self, intent_input: Union[List[Union[str, Clause]], Vis]): def clear_intent(self): self.intent = [] + self.expire_recs() def set_intent(self, intent: List[Union[str, Clause]]): """ diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..2b95b1cd --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,10 @@ +import pytest +import pandas as pd + + +@pytest.fixture(scope="session") +def global_var(): + url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" + pytest.olympic = pd.read_csv(url) + pytest.car_df = pd.read_csv("lux/data/car.csv") + pytest.college_df = pd.read_csv("lux/data/college.csv") diff --git a/tests/test_action.py b/tests/test_action.py index 44337181..8af08658 100644 --- a/tests/test_action.py +++ b/tests/test_action.py @@ -19,16 +19,16 @@ from lux.vis.Vis import Vis -def test_vary_filter_val(): - df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") +def test_vary_filter_val(global_var): + df = pytest.olympic vis = Vis(["Height", "SportType=Ball"], df) df.set_intent_as_vis(vis) df._repr_html_() assert len(df.recommendation["Filter"]) == len(df["SportType"].unique()) - 1 -def test_filter_inequality(): - df = pd.read_csv("lux/data/car.csv") +def test_filter_inequality(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent( @@ -48,9 +48,9 @@ def test_filter_inequality(): assert fltr_clause.value == 10 -def test_generalize_action(): +def test_generalize_action(global_var): # test that generalize action creates all unique visualizations - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df df["Year"] = pd.to_datetime( df["Year"], format="%Y" ) # change pandas dtype for the column "Year" to datetype @@ -72,9 +72,10 @@ def test_generalize_action(): assert check1 and check2 and check3 -def test_row_column_group(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/state_timeseries.csv?raw=true" - df = pd.read_csv(url) +def test_row_column_group(global_var): + df = pd.read_csv( + "https://github.com/lux-org/lux-datasets/blob/master/data/state_timeseries.csv?raw=true" + ) df["Date"] = pd.to_datetime(df["Date"]) tseries = df.pivot(index="State", columns="Date", values="Value") # Interpolating missing values @@ -85,8 +86,8 @@ def test_row_column_group(): assert list(tseries.recommendation.keys()) == ["Row Groups", "Column Groups"] -def test_groupby(): - df = pd.read_csv("lux/data/college.csv") +def test_groupby(global_var): + df = pytest.college_df groupbyResult = df.groupby("Region").sum() groupbyResult._repr_html_() assert list(groupbyResult.recommendation.keys()) == ["Column Groups"] @@ -159,17 +160,17 @@ def test_crosstab(): assert list(result.recommendation.keys()) == ["Row Groups", "Column Groups"] -def test_custom_aggregation(): +def test_custom_aggregation(global_var): import numpy as np - df = pd.read_csv("lux/data/college.csv") + df = pytest.college_df df.set_intent(["HighestDegree", lux.Clause("AverageCost", aggregation=np.ptp)]) df._repr_html_() assert list(df.recommendation.keys()) == ["Enhance", "Filter", "Generalize"] -def test_year_filter_value(): - df = pd.read_csv("lux/data/car.csv") +def test_year_filter_value(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent(["Acceleration", "Horsepower"]) df._repr_html_() @@ -191,3 +192,4 @@ def test_year_filter_value(): assert ( "T00:00:00.000000000" not in vis.to_Altair() ), "Year filter title contains extraneous string, not displayed as summarized string" + df.clear_intent() diff --git a/tests/test_compiler.py b/tests/test_compiler.py index 037b7534..27857598 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -19,11 +19,11 @@ from lux.vis.VisList import VisList -def test_underspecified_no_vis(test_recs): +def test_underspecified_no_vis(global_var, test_recs): no_vis_actions = ["Correlation", "Distribution", "Occurrence", "Temporal"] - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df test_recs(df, no_vis_actions) - assert len(df.current_vis) == 0 + assert df.current_vis is None or len(df.current_vis) == 0 # test only one filter context case. df.set_intent([lux.Clause(attribute="Origin", filter_op="=", value="USA")]) @@ -31,9 +31,9 @@ def test_underspecified_no_vis(test_recs): assert len(df.current_vis) == 0 -def test_underspecified_single_vis(test_recs): +def test_underspecified_single_vis(global_var, test_recs): one_vis_actions = ["Enhance", "Filter", "Generalize"] - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df df.set_intent([lux.Clause(attribute="MilesPerGal"), lux.Clause(attribute="Weight")]) test_recs(df, one_vis_actions) assert len(df.current_vis) == 1 @@ -42,6 +42,7 @@ def test_underspecified_single_vis(test_recs): assert attr.data_model == "measure" for attr in df.current_vis[0]._inferred_intent: assert attr.data_type == "quantitative" + df.clear_intent() # def test_underspecified_vis_collection(test_recs): @@ -75,8 +76,8 @@ def test_underspecified_single_vis(test_recs): # df.set_intent([lux.Clause(attribute ="?", data_model="measure"), lux.Clause(attribute ="?", data_model="measure")]) # assert len(df.current_vis) == len([vis.get_attr_by_data_model("measure") for vis in df.current_vis]) #should be 25 # test_recs(df, multiple_vis_actions) -def test_set_intent_as_vis(test_recs): - df = pd.read_csv("lux/data/car.csv") +def test_set_intent_as_vis(global_var, test_recs): + df = pytest.car_df df._repr_html_() vis = df.recommendation["Correlation"][0] df.intent = vis @@ -95,19 +96,19 @@ def test_recs_function(df, actions): return test_recs_function -def test_parse(): - df = pd.read_csv("lux/data/car.csv") +def test_parse(global_var): + df = pytest.car_df vlst = VisList([lux.Clause("Origin=?"), lux.Clause(attribute="MilesPerGal")], df) assert len(vlst) == 3 - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3 -def test_underspecified_vis_collection_zval(): +def test_underspecified_vis_collection_zval(global_var): # check if the number of charts is correct - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df vlst = VisList( [ lux.Clause(attribute="Origin", filter_op="=", value="?"), @@ -123,11 +124,11 @@ def test_underspecified_vis_collection_zval(): # assert len(vlst) == 8 -def test_sort_bar(): +def test_sort_bar(global_var): from lux.processor.Compiler import Compiler from lux.vis.Vis import Vis - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df vis = Vis( [ lux.Clause(attribute="Acceleration", data_model="measure", data_type="quantitative"), @@ -138,7 +139,7 @@ def test_sort_bar(): assert vis.mark == "bar" assert vis._inferred_intent[1].sort == "" - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df vis = Vis( [ lux.Clause(attribute="Acceleration", data_model="measure", data_type="quantitative"), @@ -150,8 +151,8 @@ def test_sort_bar(): assert vis._inferred_intent[1].sort == "ascending" -def test_specified_vis_collection(): - df = pd.read_csv("lux/data/car.csv") +def test_specified_vis_collection(global_var): + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") @@ -181,8 +182,8 @@ def test_specified_vis_collection(): assert "Origin = Europe" not in chart_titles -def test_specified_channel_enforced_vis_collection(): - df = pd.read_csv("lux/data/car.csv") +def test_specified_channel_enforced_vis_collection(global_var): + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") visList = VisList( @@ -193,9 +194,9 @@ def test_specified_channel_enforced_vis_collection(): check_attribute_on_channel(vis, "MilesPerGal", "x") -def test_autoencoding_scatter(): +def test_autoencoding_scatter(global_var): # No channel specified - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="MilesPerGal"), lux.Clause(attribute="Weight")], df) @@ -234,9 +235,9 @@ def test_autoencoding_scatter(): ) -def test_autoencoding_histogram(): +def test_autoencoding_histogram(global_var): # No channel specified - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="MilesPerGal", channel="y")], df) @@ -247,8 +248,8 @@ def test_autoencoding_histogram(): assert vis.get_attr_by_channel("y")[0].attribute == "Record" -def test_autoencoding_line_chart(): - df = pd.read_csv("lux/data/car.csv") +def test_autoencoding_line_chart(global_var): + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis([lux.Clause(attribute="Year"), lux.Clause(attribute="Acceleration")], df) @@ -287,8 +288,8 @@ def test_autoencoding_line_chart(): ) -def test_autoencoding_color_line_chart(): - df = pd.read_csv("lux/data/car.csv") +def test_autoencoding_color_line_chart(global_var): + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ @@ -302,8 +303,8 @@ def test_autoencoding_color_line_chart(): check_attribute_on_channel(vis, "Origin", "color") -def test_autoencoding_color_scatter_chart(): - df = pd.read_csv("lux/data/car.csv") +def test_autoencoding_color_scatter_chart(global_var): + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis( @@ -327,10 +328,10 @@ def test_autoencoding_color_scatter_chart(): check_attribute_on_channel(vis, "Acceleration", "color") -def test_populate_options(): +def test_populate_options(global_var): from lux.processor.Compiler import Compiler - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df df.set_intent([lux.Clause(attribute="?"), lux.Clause(attribute="MilesPerGal")]) col_set = set() for specOptions in Compiler.populate_wildcard_options(df._intent, df)["attributes"]: @@ -355,8 +356,8 @@ def test_populate_options(): ) -def test_remove_all_invalid(): - df = pd.read_csv("lux/data/car.csv") +def test_remove_all_invalid(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") # with pytest.warns(UserWarning,match="duplicate attribute specified in the intent"): df.set_intent( diff --git a/tests/test_config.py b/tests/test_config.py index adfd2655..ba9fe05d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -48,8 +48,8 @@ def contain_horsepower(df): return df -def test_default_actions_registered(): - df = pd.read_csv("lux/data/car.csv") +def test_default_actions_registered(global_var): + df = pytest.car_df df._repr_html_() assert "Distribution" in df.recommendation assert len(df.recommendation["Distribution"]) > 0 @@ -91,13 +91,13 @@ def test_no_validator(): assert "bars" in df.recommendation -def test_invalid_function(): +def test_invalid_function(global_var): df = pd.read_csv("lux/data/car.csv") with pytest.raises(ValueError, match="Value must be a callable"): lux.register_action("bars", "not a Callable") -def test_invalid_validator(): +def test_invalid_validator(global_var): df = pd.read_csv("lux/data/car.csv") def random_categorical(ldf): @@ -136,14 +136,14 @@ def test_remove_action(): ) -def test_remove_invalid_action(): - df = pd.read_csv("lux/data/car.csv") +def test_remove_invalid_action(global_var): + df = pytest.car_df with pytest.raises(ValueError, match="Option 'bars' has not been registered"): lux.remove_action("bars") -def test_remove_default_actions(): - df = pd.read_csv("lux/data/car.csv") +def test_remove_default_actions(global_var): + df = pytest.car_df df._repr_html_() lux.remove_action("Distribution") @@ -178,8 +178,8 @@ def test_remove_default_actions(): # TODO: This test does not pass in pytest but is working in Jupyter notebook. -# def test_plot_setting(): -# df = pd.read_csv("lux/data/car.csv") +# def test_plot_setting(global_var): +# df = pytest.car_df # df["Year"] = pd.to_datetime(df["Year"], format='%Y') # def change_color_add_title(chart): # chart = chart.configure_mark(color="green") # change mark color to green diff --git a/tests/test_dates.py b/tests/test_dates.py index 8a5cc823..b8e902b9 100644 --- a/tests/test_dates.py +++ b/tests/test_dates.py @@ -20,7 +20,7 @@ from lux.executor.PandasExecutor import PandasExecutor -def test_dateformatter(): +def test_dateformatter(global_var): ldf = pd.read_csv("lux/data/car.csv") # change pandas dtype for the column "Year" to datetype ldf["Year"] = pd.to_datetime(ldf["Year"], format="%Y") @@ -37,7 +37,7 @@ def test_dateformatter(): assert date_utils.date_formatter(timestamp, ldf) == "2019-8-26" -def test_period_selection(): +def test_period_selection(global_var): ldf = pd.read_csv("lux/data/car.csv") ldf["Year"] = pd.to_datetime(ldf["Year"], format="%Y") @@ -56,7 +56,7 @@ def test_period_selection(): assert all(ldf.current_vis[2].data.columns == ["Year", "Acceleration"]) -def test_period_filter(): +def test_period_filter(global_var): ldf = pd.read_csv("lux/data/car.csv") ldf["Year"] = pd.to_datetime(ldf["Year"], format="%Y") @@ -70,7 +70,7 @@ def test_period_filter(): assert isinstance(ldf.recommendation["Filter"][2]._inferred_intent[2].value, pd.Period) -def test_period_to_altair(): +def test_period_to_altair(global_var): chart = None df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") diff --git a/tests/test_display.py b/tests/test_display.py index 54da6ca4..7716867c 100644 --- a/tests/test_display.py +++ b/tests/test_display.py @@ -19,23 +19,23 @@ from lux.vis.VisList import VisList -def test_to_pandas(): - df = pd.read_csv("lux/data/car.csv") +def test_to_pandas(global_var): + df = pytest.car_df df.to_pandas() -def test_display_LuxDataframe(): - df = pd.read_csv("lux/data/car.csv") +def test_display_LuxDataframe(global_var): + df = pytest.car_df df._repr_html_() -def test_display_Vis(): - df = pd.read_csv("lux/data/car.csv") +def test_display_Vis(global_var): + df = pytest.car_df vis = Vis(["Horsepower", "Acceleration"], df) vis._repr_html_() -def test_display_VisList(): - df = pd.read_csv("lux/data/car.csv") +def test_display_VisList(global_var): + df = pytest.car_df vislist = VisList(["?", "Acceleration"], df) vislist._repr_html_() diff --git a/tests/test_error_warning.py b/tests/test_error_warning.py index dbbffe7a..0c6bca73 100644 --- a/tests/test_error_warning.py +++ b/tests/test_error_warning.py @@ -17,27 +17,28 @@ import pandas as pd from lux.vis.Vis import Vis + # Test suite for checking if the expected errors and warnings are showing up correctly -def test_intent_str_error(): - df = pd.read_csv("lux/data/college.csv") +def test_intent_str_error(global_var): + df = pytest.college_df with pytest.raises(TypeError, match="Input intent must be either a list"): df.intent = "bad string input" -def test_export_b4_widget_created(): +def test_export_b4_widget_created(global_var): df = pd.read_csv("lux/data/college.csv") with pytest.warns(UserWarning, match="No widget attached to the dataframe"): df.exported -def test_bad_filter(): - df = pd.read_csv("lux/data/college.csv") +def test_bad_filter(global_var): + df = pytest.college_df with pytest.warns(UserWarning, match="Lux can not operate on an empty dataframe"): df[df["Region"] == "asdfgh"]._repr_html_() -def test_multi_vis(): - df = pd.read_csv("lux/data/college.csv") +def test_multi_vis(global_var): + df = pytest.college_df with pytest.raises( SyntaxError, match="The intent that you specified corresponds to more than one visualization.", @@ -70,10 +71,10 @@ def test_multi_vis(): # Test Properties with Private Variables Readable but not Writable -def test_vis_private_properties(): +def test_vis_private_properties(global_var): from lux.vis.Vis import Vis - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df vis = Vis(["Horsepower", "Weight"], df) vis._repr_html_() assert isinstance(vis.data, lux.core.frame.LuxDataFrame) diff --git a/tests/test_executor.py b/tests/test_executor.py index 268243f0..c3d6aa78 100644 --- a/tests/test_executor.py +++ b/tests/test_executor.py @@ -20,8 +20,8 @@ from lux.vis.VisList import VisList -def test_lazy_execution(): - df = pd.read_csv("lux/data/car.csv") +def test_lazy_execution(global_var): + df = pytest.car_df intent = [ lux.Clause(attribute="Horsepower", aggregation="mean"), lux.Clause(attribute="Origin"), @@ -33,8 +33,8 @@ def test_lazy_execution(): assert type(vis.data) == lux.core.frame.LuxDataFrame -def test_selection(): - df = pd.read_csv("lux/data/car.csv") +def test_selection(global_var): + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ @@ -46,8 +46,8 @@ def test_selection(): assert all(vislist[2].data.columns == ["Year", "Acceleration"]) -def test_aggregation(): - df = pd.read_csv("lux/data/car.csv") +def test_aggregation(global_var): + df = pytest.car_df intent = [ lux.Clause(attribute="Horsepower", aggregation="mean"), lux.Clause(attribute="Origin"), @@ -73,11 +73,11 @@ def test_aggregation(): assert int(result_df[result_df["Origin"] == "Europe"]["Horsepower"]) == 133 -def test_colored_bar_chart(): +def test_colored_bar_chart(global_var): from lux.vis.Vis import Vis from lux.vis.Vis import Clause - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df x_clause = Clause(attribute="MilesPerGal", channel="x") y_clause = Clause(attribute="Origin", channel="y") @@ -92,11 +92,11 @@ def test_colored_bar_chart(): assert len(new_vis.data) == 15 > group_by_cardinality < color_cardinality * group_by_cardinality -def test_colored_line_chart(): +def test_colored_line_chart(global_var): from lux.vis.Vis import Vis from lux.vis.Vis import Clause - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") @@ -114,8 +114,8 @@ def test_colored_line_chart(): assert len(new_vis.data) == 60 > group_by_cardinality < color_cardinality * group_by_cardinality -def test_filter(): - df = pd.read_csv("lux/data/car.csv") +def test_filter(global_var): + df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") intent = [ @@ -129,8 +129,8 @@ def test_filter(): assert len(vis.data) == len(df[df["Origin"] == "USA"]) -def test_inequalityfilter(): - df = pd.read_csv("lux/data/car.csv") +def test_inequalityfilter(global_var): + df = pytest.car_df vis = Vis( [ lux.Clause(attribute="Horsepower", filter_op=">", value=50), @@ -157,21 +157,21 @@ def test_inequalityfilter(): assert len(vis.data) == Nbins -def test_binning(): - df = pd.read_csv("lux/data/car.csv") +def test_binning(global_var): + df = pytest.car_df vis = Vis([lux.Clause(attribute="Horsepower")], df) nbins = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0].bin_size assert len(vis.data) == nbins -def test_record(): - df = pd.read_csv("lux/data/car.csv") +def test_record(global_var): + df = pytest.car_df vis = Vis([lux.Clause(attribute="Cylinders")], df) assert len(vis.data) == len(df["Cylinders"].unique()) -def test_filter_aggregation_fillzero_aligned(): - df = pd.read_csv("lux/data/car.csv") +def test_filter_aggregation_fillzero_aligned(global_var): + df = pytest.car_df intent = [ lux.Clause(attribute="Cylinders"), lux.Clause(attribute="MilesPerGal"), @@ -187,8 +187,8 @@ def test_filter_aggregation_fillzero_aligned(): assert result[result["Cylinders"] == 6]["MilesPerGal"].values[0] == externalValidation[6] -def test_exclude_attribute(): - df = pd.read_csv("lux/data/car.csv") +def test_exclude_attribute(global_var): + df = pytest.car_df intent = [lux.Clause("?", exclude=["Name", "Year"]), lux.Clause("Horsepower")] vislist = VisList(intent, df) for vis in vislist: diff --git a/tests/test_interestingness.py b/tests/test_interestingness.py index d62b4b40..eceadcde 100644 --- a/tests/test_interestingness.py +++ b/tests/test_interestingness.py @@ -18,9 +18,10 @@ import numpy as np from lux.interestingness.interestingness import interestingness + # The following test cases are labelled for vis with -def test_interestingness_1_0_0(): - df = pd.read_csv("lux/data/car.csv") +def test_interestingness_1_0_0(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([lux.Clause(attribute="Origin")]) @@ -57,8 +58,8 @@ def test_interestingness_1_0_0(): assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 -def test_interestingness_1_0_1(): - df = pd.read_csv("lux/data/car.csv") +def test_interestingness_1_0_1(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent( @@ -71,8 +72,8 @@ def test_interestingness_1_0_1(): assert df.current_vis[0].score == 0 -def test_interestingness_0_1_0(): - df = pd.read_csv("lux/data/car.csv") +def test_interestingness_0_1_0(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([lux.Clause(attribute="Horsepower")]) @@ -115,8 +116,8 @@ def test_interestingness_0_1_0(): assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3 -def test_interestingness_0_1_1(): - df = pd.read_csv("lux/data/car.csv") +def test_interestingness_0_1_1(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent( @@ -130,8 +131,8 @@ def test_interestingness_0_1_1(): assert str(df.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA" -def test_interestingness_1_1_0(): - df = pd.read_csv("lux/data/car.csv") +def test_interestingness_1_1_0(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Year")]) @@ -160,8 +161,8 @@ def test_interestingness_1_1_0(): assert interestingness(df.recommendation["Filter"][0], df) != None -def test_interestingness_1_1_1(): - df = pd.read_csv("lux/data/car.csv") +def test_interestingness_1_1_1(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent( @@ -198,12 +199,12 @@ def test_interestingness_1_1_1(): assert interestingness(df.recommendation["Filter"][0], df) != None -def test_interestingness_1_2_0(): +def test_interestingness_1_2_0(global_var): from lux.vis.Vis import Vis from lux.vis.Vis import Clause from lux.interestingness.interestingness import interestingness - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df y_clause = Clause(attribute="Name", channel="y") color_clause = Clause(attribute="Cylinders", channel="color") @@ -215,8 +216,8 @@ def test_interestingness_1_2_0(): assert interestingness(new_vis, df) < 0.01 -def test_interestingness_0_2_0(): - df = pd.read_csv("lux/data/car.csv") +def test_interestingness_0_2_0(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Acceleration")]) @@ -262,8 +263,8 @@ def test_interestingness_0_2_0(): assert interestingness(df.recommendation["Generalize"][0], df) != None -def test_interestingness_0_2_1(): - df = pd.read_csv("lux/data/car.csv") +def test_interestingness_0_2_1(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent( diff --git a/tests/test_maintainence.py b/tests/test_maintainence.py index 1c2137ca..4527c21d 100644 --- a/tests/test_maintainence.py +++ b/tests/test_maintainence.py @@ -19,32 +19,32 @@ from lux.vis.Vis import Vis -def test_metadata_subsequent_display(): - df = pd.read_csv("lux/data/car.csv") +def test_metadata_subsequent_display(global_var): + df = pytest.car_df df._repr_html_() assert df._metadata_fresh == True, "Failed to maintain metadata after display df" df._repr_html_() assert df._metadata_fresh == True, "Failed to maintain metadata after display df" -def test_metadata_subsequent_vis(): - df = pd.read_csv("lux/data/car.csv") +def test_metadata_subsequent_vis(global_var): + df = pytest.car_df df._repr_html_() assert df._metadata_fresh == True, "Failed to maintain metadata after display df" vis = Vis(["Acceleration", "Horsepower"], df) assert df._metadata_fresh == True, "Failed to maintain metadata after display df" -def test_metadata_inplace_operation(): - df = pd.read_csv("lux/data/car.csv") +def test_metadata_inplace_operation(global_var): + df = pytest.car_df df._repr_html_() assert df._metadata_fresh == True, "Failed to maintain metadata after display df" df.dropna(inplace=True) assert df._metadata_fresh == False, "Failed to expire metadata after in-place Pandas operation" -def test_metadata_new_df_operation(): - df = pd.read_csv("lux/data/car.csv") +def test_metadata_new_df_operation(global_var): + df = pytest.car_df df._repr_html_() assert df._metadata_fresh == True, "Failed to maintain metadata after display df" df[["MilesPerGal", "Acceleration"]] @@ -53,7 +53,7 @@ def test_metadata_new_df_operation(): assert not hasattr(df2, "_metadata_fresh") -def test_metadata_column_group_reset_df(): +def test_metadata_column_group_reset_df(global_var): df = pd.read_csv("lux/data/car.csv") assert not hasattr(df, "_metadata_fresh") df["Year"] = pd.to_datetime(df["Year"], format="%Y") @@ -70,8 +70,8 @@ def test_metadata_column_group_reset_df(): assert rec.mark == "bar", "Column Group not displaying bar charts" -def test_recs_inplace_operation(): - df = pd.read_csv("lux/data/car.csv") +def test_recs_inplace_operation(global_var): + df = pytest.car_df df._repr_html_() assert df._recs_fresh == True, "Failed to maintain recommendation after display df" assert len(df.recommendation["Occurrence"]) == 4 diff --git a/tests/test_nan.py b/tests/test_nan.py index df2b8e9f..f91f7c0c 100644 --- a/tests/test_nan.py +++ b/tests/test_nan.py @@ -20,8 +20,8 @@ from lux.vis.Vis import Vis -def test_nan_column(): - df = pd.read_csv("lux/data/college.csv") +def test_nan_column(global_var): + df = pytest.college_df df["Geography"] = np.nan df._repr_html_() for visList in df.recommendation.keys(): diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 60f6a91c..c3895779 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -28,8 +28,8 @@ # assert series.name == "Weight", "Pandas Series original `name` property not retained." -def test_head_tail(): - df = pd.read_csv("lux/data/car.csv") +def test_head_tail(global_var): + df = pytest.car_df df._repr_html_() assert df._message.to_html() == "" df.head()._repr_html_() diff --git a/tests/test_pandas_coverage.py b/tests/test_pandas_coverage.py index d5ebfeb3..e224e404 100644 --- a/tests/test_pandas_coverage.py +++ b/tests/test_pandas_coverage.py @@ -21,7 +21,7 @@ ################### -def test_deepcopy(): +def test_deepcopy(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") df._repr_html_() @@ -30,7 +30,7 @@ def test_deepcopy(): check_metadata_equal(df, saved_df) -def test_rename_inplace(): +def test_rename_inplace(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") df._repr_html_() @@ -65,7 +65,7 @@ def test_rename_inplace(): assert df.pre_aggregated == new_df.pre_aggregated -def test_rename(): +def test_rename(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") df._repr_html_() @@ -95,7 +95,7 @@ def test_rename(): assert df.pre_aggregated == new_df.pre_aggregated -def test_rename3(): +def test_rename3(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") @@ -122,7 +122,7 @@ def test_rename3(): assert "col2" in list(df.cardinality.keys()) -def test_concat(): +def test_concat(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") @@ -136,7 +136,7 @@ def test_concat(): assert len(new_df.cardinality) == 5 -def test_groupby_agg(): +def test_groupby_agg(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.groupby("Year").agg(sum) @@ -145,20 +145,20 @@ def test_groupby_agg(): assert len(new_df.cardinality) == 7 -def test_qcut(): +def test_qcut(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") df["Weight"] = pd.qcut(df["Weight"], q=3) df._repr_html_() -def test_cut(): +def test_cut(global_var): df = pd.read_csv("lux/data/car.csv") df["Weight"] = pd.cut(df["Weight"], bins=[0, 2500, 7500, 10000], labels=["small", "medium", "large"]) df._repr_html_() -def test_groupby_agg_very_small(): +def test_groupby_agg_very_small(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") @@ -168,7 +168,7 @@ def test_groupby_agg_very_small(): assert len(new_df.cardinality) == 7 -# def test_groupby_multi_index(): +# def test_groupby_multi_index(global_var): # url = 'https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true' # df = pd.read_csv(url) # df["Year"] = pd.to_datetime(df["Year"], format='%Y') @@ -178,7 +178,7 @@ def test_groupby_agg_very_small(): # assert len(new_df.cardinality) == 7 # TODO -def test_query(): +def test_query(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.query("Weight > 3000") @@ -192,7 +192,7 @@ def test_query(): assert len(new_df.cardinality) == 10 -def test_pop(): +def test_pop(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.pop("Weight") @@ -206,7 +206,7 @@ def test_pop(): assert len(df.cardinality) == 9 -def test_transform(): +def test_transform(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.iloc[:, 1:].groupby("Origin").transform(sum) @@ -215,7 +215,7 @@ def test_transform(): assert len(new_df.cardinality) == 7 -def test_get_group(): +def test_get_group(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") gbobj = df.groupby("Origin") @@ -230,7 +230,7 @@ def test_get_group(): assert len(new_df.cardinality) == 10 -def test_applymap(): +def test_applymap(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") mapping = {"USA": 0, "Europe": 1, "Japan": 2} @@ -245,7 +245,7 @@ def test_applymap(): assert len(df.cardinality) == 10 -def test_strcat(): +def test_strcat(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") df["combined"] = df["Origin"].str.cat(df["Brand"], sep=", ") @@ -259,7 +259,7 @@ def test_strcat(): assert len(df.cardinality) == 11 -def test_named_agg(): +def test_named_agg(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.groupby("Brand").agg( @@ -272,7 +272,7 @@ def test_named_agg(): assert len(new_df.cardinality) == 4 -def test_change_dtype(): +def test_change_dtype(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") df["Cylinders"] = pd.Series(df["Cylinders"], dtype="Int64") @@ -286,7 +286,7 @@ def test_change_dtype(): assert len(df.data_type_lookup) == 10 -def test_get_dummies(): +def test_get_dummies(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = pd.get_dummies(df) @@ -300,7 +300,7 @@ def test_get_dummies(): assert len(new_df.data_type_lookup) == 339 -def test_drop(): +def test_drop(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.drop([0, 1, 2], axis="rows") @@ -315,7 +315,7 @@ def test_drop(): assert len(new_df2.cardinality) == 7 -def test_merge(): +def test_merge(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.drop([0, 1, 2], axis="rows") @@ -330,7 +330,7 @@ def test_merge(): assert len(new_df2.cardinality) == 11 -def test_prefix(): +def test_prefix(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.add_prefix("1_") @@ -345,7 +345,7 @@ def test_prefix(): assert new_df.cardinality["1_Name"] == 300 -def test_loc(): +def test_loc(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.loc[:, "Displacement":"Origin"] @@ -374,7 +374,7 @@ def test_loc(): assert len(new_df.cardinality) == 3 -def test_iloc(): +def test_iloc(global_var): df = pd.read_csv("lux/data/car.csv") df["Year"] = pd.to_datetime(df["Year"], format="%Y") new_df = df.iloc[:, 3:9] @@ -472,7 +472,7 @@ def compare_vis(vis1, vis2): ################ -def test_df_to_series(): +def test_df_to_series(global_var): # Ensure metadata is kept when going from df to series df = pd.read_csv("lux/data/car.csv") df._repr_html_() # compute metadata @@ -503,7 +503,7 @@ def test_df_to_series(): assert series.name == "Weight", "Pandas Series original `name` property not retained." -def test_value_counts(): +def test_value_counts(global_var): df = pd.read_csv("lux/data/car.csv") df._repr_html_() # compute metadata assert df.cardinality is not None @@ -533,7 +533,7 @@ def test_value_counts(): assert series.name == "Weight", "Pandas Series original `name` property not retained." -def test_str_replace(): +def test_str_replace(global_var): df = pd.read_csv("lux/data/car.csv") df._repr_html_() # compute metadata assert df.cardinality is not None diff --git a/tests/test_parser.py b/tests/test_parser.py index b37e5db7..a59be1ec 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -17,88 +17,97 @@ import pytest -def test_case1(): - ldf = pd.read_csv("lux/data/car.csv") - ldf.set_intent(["Horsepower"]) - assert type(ldf._intent[0]) is lux.Clause - assert ldf._intent[0].attribute == "Horsepower" - - -def test_case2(): - ldf = pd.read_csv("lux/data/car.csv") - ldf.set_intent(["Horsepower", lux.Clause("MilesPerGal", channel="x")]) - assert type(ldf._intent[0]) is lux.Clause - assert ldf._intent[0].attribute == "Horsepower" - assert type(ldf._intent[1]) is lux.Clause - assert ldf._intent[1].attribute == "MilesPerGal" - - -def test_case3(): - ldf = pd.read_csv("lux/data/car.csv") - ldf.set_intent(["Horsepower", "Origin=USA"]) - assert type(ldf._intent[0]) is lux.Clause - assert ldf._intent[0].attribute == "Horsepower" - assert type(ldf._intent[1]) is lux.Clause - assert ldf._intent[1].attribute == "Origin" - assert ldf._intent[1].value == "USA" - - -def test_case4(): - ldf = pd.read_csv("lux/data/car.csv") - ldf.set_intent(["Horsepower", "Origin=USA|Japan"]) - assert type(ldf._intent[0]) is lux.Clause - assert ldf._intent[0].attribute == "Horsepower" - assert type(ldf._intent[1]) is lux.Clause - assert ldf._intent[1].attribute == "Origin" - assert ldf._intent[1].value == ["USA", "Japan"] - - -def test_case5(): - ldf = pd.read_csv("lux/data/car.csv") - ldf.set_intent([["Horsepower", "MilesPerGal", "Weight"], "Origin=USA"]) - assert type(ldf._intent[0]) is lux.Clause - assert ldf._intent[0].attribute == ["Horsepower", "MilesPerGal", "Weight"] - assert type(ldf._intent[1]) is lux.Clause - assert ldf._intent[1].attribute == "Origin" - assert ldf._intent[1].value == "USA" - - ldf.set_intent(["Horsepower|MilesPerGal|Weight", "Origin=USA"]) - assert type(ldf._intent[0]) is lux.Clause - assert ldf._intent[0].attribute == ["Horsepower", "MilesPerGal", "Weight"] - assert type(ldf._intent[1]) is lux.Clause - assert ldf._intent[1].attribute == "Origin" - assert ldf._intent[1].value == "USA" - - -def test_case6(): - ldf = pd.read_csv("lux/data/car.csv") - ldf.set_intent(["Horsepower", "Origin=?"]) - ldf._repr_html_() - assert type(ldf._intent[0]) is lux.Clause - assert ldf._intent[0].attribute == "Horsepower" - assert type(ldf._intent[1]) is lux.Clause - assert ldf._intent[1].attribute == "Origin" - assert ldf._intent[1].value == ["USA", "Japan", "Europe"] - - -def test_case7(): - df = pd.read_csv("lux/data/car.csv") +def test_case1(global_var): + df = pytest.car_df + df.set_intent(["Horsepower"]) + assert type(df._intent[0]) is lux.Clause + assert df._intent[0].attribute == "Horsepower" + df.clear_intent() + + +def test_case2(global_var): + df = pytest.car_df + df.set_intent(["Horsepower", lux.Clause("MilesPerGal", channel="x")]) + assert type(df._intent[0]) is lux.Clause + assert df._intent[0].attribute == "Horsepower" + assert type(df._intent[1]) is lux.Clause + assert df._intent[1].attribute == "MilesPerGal" + df.clear_intent() + + +def test_case3(global_var): + df = pytest.car_df + df.set_intent(["Horsepower", "Origin=USA"]) + assert type(df._intent[0]) is lux.Clause + assert df._intent[0].attribute == "Horsepower" + assert type(df._intent[1]) is lux.Clause + assert df._intent[1].attribute == "Origin" + assert df._intent[1].value == "USA" + df.clear_intent() + + +def test_case4(global_var): + df = pytest.car_df + df.set_intent(["Horsepower", "Origin=USA|Japan"]) + assert type(df._intent[0]) is lux.Clause + assert df._intent[0].attribute == "Horsepower" + assert type(df._intent[1]) is lux.Clause + assert df._intent[1].attribute == "Origin" + assert df._intent[1].value == ["USA", "Japan"] + df.clear_intent() + + +def test_case5(global_var): + df = pytest.car_df + df.set_intent([["Horsepower", "MilesPerGal", "Weight"], "Origin=USA"]) + assert type(df._intent[0]) is lux.Clause + assert df._intent[0].attribute == ["Horsepower", "MilesPerGal", "Weight"] + assert type(df._intent[1]) is lux.Clause + assert df._intent[1].attribute == "Origin" + assert df._intent[1].value == "USA" + + df.set_intent(["Horsepower|MilesPerGal|Weight", "Origin=USA"]) + assert type(df._intent[0]) is lux.Clause + assert df._intent[0].attribute == ["Horsepower", "MilesPerGal", "Weight"] + assert type(df._intent[1]) is lux.Clause + assert df._intent[1].attribute == "Origin" + assert df._intent[1].value == "USA" + df.clear_intent() + + +def test_case6(global_var): + df = pytest.car_df + df.set_intent(["Horsepower", "Origin=?"]) + df._repr_html_() + assert type(df._intent[0]) is lux.Clause + assert df._intent[0].attribute == "Horsepower" + assert type(df._intent[1]) is lux.Clause + assert df._intent[1].attribute == "Origin" + assert df._intent[1].value == ["USA", "Japan", "Europe"] + df.clear_intent() + + +def test_case7(global_var): + df = pytest.car_df df.intent = [["Horsepower", "MilesPerGal", "Acceleration"], "Origin"] df._repr_html_() assert len(df.current_vis) == 3 + df.clear_intent() -def test_validator_invalid_value(): - df = pd.read_csv("lux/data/college.csv") +def test_validator_invalid_value(global_var): + df = pytest.college_df with pytest.warns( UserWarning, match="The input value 'bob' does not exist for the attribute 'Region' for the DataFrame.", ): df.intent = ["Region=bob"] + df.clear_intent() + -def test_validator_invalid_filter(): - df = pd.read_csv("lux/data/college.csv") +def test_validator_invalid_filter(global_var): + df = pytest.college_df with pytest.raises(KeyError, match="'New England'"): with pytest.warns( @@ -108,8 +117,8 @@ def test_validator_invalid_filter(): df.intent = ["New England", "Southeast", "Far West"] -def test_validator_invalid_attribute(): - df = pd.read_csv("lux/data/college.csv") +def test_validator_invalid_attribute(global_var): + df = pytest.college_df with pytest.raises(KeyError, match="'blah'"): with pytest.warns( UserWarning, match="The input attribute 'blah' does not exist in the DataFrame." diff --git a/tests/test_performance.py b/tests/test_performance.py index 66a9bd6b..4e557075 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -17,9 +17,10 @@ import pandas as pd import time + # To run the script and see the printed result, run: # python -m pytest -s tests/test_performance.py -def test_q1_performance_census(): +def test_q1_performance_census(global_var): df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true") tic = time.perf_counter() df._repr_html_() diff --git a/tests/test_type.py b/tests/test_type.py index aa1b3b53..4c53656a 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -16,10 +16,10 @@ import pytest import pandas as pd + # Suite of test that checks if data_type inferred correctly by Lux def test_check_cars(): df = pd.read_csv("lux/data/car.csv") - df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.maintain_metadata() assert df.data_type_lookup["Name"] == "nominal" assert df.data_type_lookup["MilesPerGal"] == "quantitative" @@ -54,10 +54,7 @@ def test_check_str_id(): def test_check_hpi(): - df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/hpi.csv?raw=true").head( - 10 - ) - + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/hpi.csv?raw=true") df.maintain_metadata() assert df.data_type_lookup == { diff --git a/tests/test_vis.py b/tests/test_vis.py index 122c1e3c..f8273cdb 100644 --- a/tests/test_vis.py +++ b/tests/test_vis.py @@ -19,22 +19,23 @@ from lux.vis.Vis import Vis -def test_vis(): - df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") +def test_vis(global_var): + df = pytest.olympic vis = Vis(["Height", "SportType=Ball"], df) assert vis.get_attr_by_attr_name("Height")[0].bin_size != 0 assert vis.get_attr_by_attr_name("Record")[0].aggregation == "count" -def test_vis_set_specs(): - df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") +def test_vis_set_specs(global_var): + df = pytest.olympic vis = Vis(["Height", "SportType=Ball"], df) vis.set_intent(["Height", "SportType=Ice"]) assert vis.get_attr_by_attr_name("SportType")[0].value == "Ice" + df.clear_intent() -def test_vis_collection(): - df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") +def test_vis_collection(global_var): + df = pytest.olympic vlist = VisList(["Height", "SportType=Ball", "?"], df) vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" @@ -44,22 +45,23 @@ def test_vis_collection(): assert len(vlist) == len(df.columns) - 1 # remove 1 for vis with for same attribute -def test_vis_collection_set_intent(): - df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") +def test_vis_collection_set_intent(global_var): + df = pytest.olympic vlist = VisList(["Height", "SportType=Ice", "?"], df) vlist.set_intent(["Height", "SportType=Boat", "?"]) for v in vlist._collection: filter_vspec = list(filter(lambda x: x.channel == "", v._inferred_intent))[0] assert filter_vspec.value == "Boat" + df.clear_intent() -def test_custom_plot_setting(): +def test_custom_plot_setting(global_var): def change_color_make_transparent_add_title(chart): chart = chart.configure_mark(color="green", opacity=0.2) chart.title = "Test Title" return chart - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df df.plot_config = change_color_make_transparent_add_title df._repr_html_() config_mark_addition = 'chart = chart.configure_mark(color="green", opacity=0.2)' @@ -69,37 +71,38 @@ def change_color_make_transparent_add_title(chart): assert title_addition in exported_code_str -def test_remove(): - df = pd.read_csv("lux/data/car.csv") +def test_remove(global_var): + df = pytest.car_df vis = Vis([lux.Clause("Horsepower"), lux.Clause("Acceleration")], df) vis.remove_column_from_spec("Horsepower", remove_first=False) assert vis._inferred_intent[0].attribute == "Acceleration" -def test_remove_identity(): - df = pd.read_csv("lux/data/car.csv") +def test_remove_identity(global_var): + df = pytest.car_df vis = Vis(["Horsepower", "Horsepower"], df) vis.remove_column_from_spec("Horsepower") assert vis._inferred_intent == [], "Remove all instances of Horsepower" - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df vis = Vis(["Horsepower", "Horsepower"], df) vis.remove_column_from_spec("Horsepower", remove_first=True) assert len(vis._inferred_intent) == 1, "Remove only 1 instances of Horsepower" assert vis._inferred_intent[0].attribute == "Horsepower", "Remove only 1 instances of Horsepower" -def test_refresh_collection(): - df = pd.read_csv("lux/data/car.csv") +def test_refresh_collection(global_var): + df = pytest.car_df df["Year"] = pd.to_datetime(df["Year"], format="%Y") df.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")]) df._repr_html_() enhanceCollection = df.recommendation["Enhance"] enhanceCollection.refresh_source(df[df["Origin"] == "USA"]) + df.clear_intent() -def test_vis_custom_aggregation_as_str(): - df = pd.read_csv("lux/data/college.csv") +def test_vis_custom_aggregation_as_str(global_var): + df = pytest.college_df import numpy as np vis = Vis(["HighestDegree", lux.Clause("AverageCost", aggregation="max")], df) @@ -107,8 +110,8 @@ def test_vis_custom_aggregation_as_str(): assert vis.get_attr_by_data_model("measure")[0]._aggregation_name == "max" -def test_vis_custom_aggregation_as_numpy_func(): - df = pd.read_csv("lux/data/college.csv") +def test_vis_custom_aggregation_as_numpy_func(global_var): + df = pytest.college_df from lux.vis.Vis import Vis import numpy as np @@ -117,9 +120,8 @@ def test_vis_custom_aggregation_as_numpy_func(): assert vis.get_attr_by_data_model("measure")[0]._aggregation_name == "ptp" -def test_vis_collection_via_list_of_vis(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) +def test_vis_collection_via_list_of_vis(global_var): + df = pytest.olympic # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") from lux.vis.VisList import VisList @@ -133,15 +135,15 @@ def test_vis_collection_via_list_of_vis(): assert len(vlist) == 5 -def test_vis_to_Altair_basic_df(): - df = pd.read_csv("lux/data/car.csv") +def test_vis_to_Altair_basic_df(global_var): + df = pytest.car_df vis = Vis(["Weight", "Horsepower"], df) code = vis.to_Altair() assert "alt.Chart(df)" in code, "Unable to export to Altair" -def test_vis_to_Altair_custom_named_df(): - df = pd.read_csv("lux/data/car.csv") +def test_vis_to_Altair_custom_named_df(global_var): + df = pytest.car_df some_weirdly_named_df = df.dropna() vis = Vis(["Weight", "Horsepower"], some_weirdly_named_df) code = vis.to_Altair() @@ -150,8 +152,8 @@ def test_vis_to_Altair_custom_named_df(): ), "Unable to export to Altair and detect custom df name" -def test_vis_to_Altair_standalone(): - df = pd.read_csv("lux/data/car.csv") +def test_vis_to_Altair_standalone(global_var): + df = pytest.car_df vis = Vis(["Weight", "Horsepower"], df) code = vis.to_Altair(standalone=True) assert ( @@ -161,8 +163,8 @@ def test_vis_to_Altair_standalone(): ) -def test_vis_list_custom_title_override(): - df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") +def test_vis_list_custom_title_override(global_var): + df = pytest.olympic df["Year"] = pd.to_datetime(df["Year"], format="%Y") vcLst = [] @@ -177,10 +179,10 @@ def test_vis_list_custom_title_override(): assert v.title == "overriding dummy title" -def test_vis_set_intent(): +def test_vis_set_intent(global_var): from lux.vis.Vis import Vis - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df vis = Vis(["Weight", "Horsepower"], df) vis._repr_html_() assert "Horsepower" in str(vis._code) @@ -189,10 +191,10 @@ def test_vis_set_intent(): assert "MilesPerGal" in str(vis._code) -def test_vis_list_set_intent(): +def test_vis_list_set_intent(global_var): from lux.vis.VisList import VisList - df = pd.read_csv("lux/data/car.csv") + df = pytest.car_df vislist = VisList(["Horsepower", "?"], df) vislist._repr_html_() for vis in vislist: