diff --git a/lux/core/frame.py b/lux/core/frame.py index 47748a77..3c6b3977 100644 --- a/lux/core/frame.py +++ b/lux/core/frame.py @@ -364,10 +364,8 @@ def get_SQL_attributes(self): table_name = self.table_name[self.table_name.index(".") + 1 :] else: table_name = self.table_name - attr_query = ( - f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'" - ) - attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"]) + query = f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'" + attributes = list(pd.read_sql(query, self.SQLconnection)["column_name"]) for attr in attributes: self[attr] = None @@ -401,10 +399,8 @@ def compute_SQL_data_type(self): table_name = self.table_name # get the data types of the attributes in the SQL table for attr in list(self.columns): - datatype_query = ( - f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'", - ) - datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0] + query = f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'" + datatype = list(pd.read_sql(query, self.SQLconnection)["data_type"])[0] sql_dtypes[attr] = datatype data_type = {"quantitative": [], "nominal": [], "temporal": []} diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 6a41379d..a73e607b 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -246,18 +246,17 @@ def execute_binning(vis: Vis): import numpy as np bin_attribute = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0] - if not np.isnan(vis.data[bin_attribute.attribute]).all(): + bin_attr = bin_attribute.attribute + if not np.isnan(vis.data[bin_attr]).all(): # np.histogram breaks if array contain NaN - series = vis.data[bin_attribute.attribute].dropna() + series = vis.data[bin_attr].dropna() # TODO:binning runs for name attribte. Name attribute has datatype quantitative which is wrong. counts, bin_edges = np.histogram(series, bins=bin_attribute.bin_size) # bin_edges of size N+1, so need to compute bin_center as the bin location bin_center = np.mean(np.vstack([bin_edges[0:-1], bin_edges[1:]]), axis=0) # TODO: Should vis.data be a LuxDataFrame or a Pandas DataFrame? - vis._vis_data = pd.DataFrame( - np.array([bin_center, counts]).T, - columns=[bin_attribute.attribute, "Number of Records"], - ) + binned_result = np.array([bin_center, counts]).T + vis._vis_data = pd.DataFrame(binned_result, columns=[bin_attr, "Number of Records"]) @staticmethod def execute_filter(vis: Vis): diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py index 0550e590..99e9b1fd 100644 --- a/lux/vislib/altair/BarChart.py +++ b/lux/vislib/altair/BarChart.py @@ -67,8 +67,8 @@ def initialize_chart(self): type=x_attr.data_type, axis=alt.Axis(labelOverlap=True), ) - y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True))" + y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title) y_attr_field_code = ( f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')" ) diff --git a/tests/test_action.py b/tests/test_action.py index 5775c614..44337181 100644 --- a/tests/test_action.py +++ b/tests/test_action.py @@ -20,8 +20,7 @@ def test_vary_filter_val(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) df.set_intent_as_vis(vis) df._repr_html_() diff --git a/tests/test_performance.py b/tests/test_performance.py index a30b4cd2..66a9bd6b 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -20,8 +20,7 @@ # To run the script and see the printed result, run: # python -m pytest -s tests/test_performance.py def test_q1_performance_census(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true") tic = time.perf_counter() df._repr_html_() toc = time.perf_counter() diff --git a/tests/test_vis.py b/tests/test_vis.py index bf1879fd..122c1e3c 100644 --- a/tests/test_vis.py +++ b/tests/test_vis.py @@ -20,24 +20,21 @@ def test_vis(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) assert vis.get_attr_by_attr_name("Height")[0].bin_size != 0 assert vis.get_attr_by_attr_name("Record")[0].aggregation == "count" def test_vis_set_specs(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vis = Vis(["Height", "SportType=Ball"], df) vis.set_intent(["Height", "SportType=Ice"]) assert vis.get_attr_by_attr_name("SportType")[0].value == "Ice" def test_vis_collection(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vlist = VisList(["Height", "SportType=Ball", "?"], df) vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" @@ -48,8 +45,7 @@ def test_vis_collection(): def test_vis_collection_set_intent(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vlist = VisList(["Height", "SportType=Ice", "?"], df) vlist.set_intent(["Height", "SportType=Boat", "?"]) for v in vlist._collection: @@ -166,8 +162,7 @@ def test_vis_to_Altair_standalone(): def test_vis_list_custom_title_override(): - url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true" - df = pd.read_csv(url) + df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") df["Year"] = pd.to_datetime(df["Year"], format="%Y") vcLst = []