more rewrites and merges into single line

lux-org · Nov 16, 2020 · b468b07 · b468b07
1 parent 8c3b2c1
commit b468b07
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 29 deletions.
diff --git a/lux/core/frame.py b/lux/core/frame.py
@@ -364,10 +364,8 @@ def get_SQL_attributes(self):
             table_name = self.table_name[self.table_name.index(".") + 1 :]
         else:
             table_name = self.table_name
-        attr_query = (
-            f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'"
-        )
-        attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"])
+        query = f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'"
+        attributes = list(pd.read_sql(query, self.SQLconnection)["column_name"])
         for attr in attributes:
             self[attr] = None
 
@@ -401,10 +399,8 @@ def compute_SQL_data_type(self):
             table_name = self.table_name
         # get the data types of the attributes in the SQL table
         for attr in list(self.columns):
-            datatype_query = (
-                f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'",
-            )
-            datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0]
+            query = f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'"
+            datatype = list(pd.read_sql(query, self.SQLconnection)["data_type"])[0]
             sql_dtypes[attr] = datatype
 
         data_type = {"quantitative": [], "nominal": [], "temporal": []}

diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py
@@ -246,18 +246,17 @@ def execute_binning(vis: Vis):
         import numpy as np
 
         bin_attribute = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0]
-        if not np.isnan(vis.data[bin_attribute.attribute]).all():
+        bin_attr = bin_attribute.attribute
+        if not np.isnan(vis.data[bin_attr]).all():
             # np.histogram breaks if array contain NaN
-            series = vis.data[bin_attribute.attribute].dropna()
+            series = vis.data[bin_attr].dropna()
             # TODO:binning runs for name attribte. Name attribute has datatype quantitative which is wrong.
             counts, bin_edges = np.histogram(series, bins=bin_attribute.bin_size)
             # bin_edges of size N+1, so need to compute bin_center as the bin location
             bin_center = np.mean(np.vstack([bin_edges[0:-1], bin_edges[1:]]), axis=0)
             # TODO: Should vis.data be a LuxDataFrame or a Pandas DataFrame?
-            vis._vis_data = pd.DataFrame(
-                np.array([bin_center, counts]).T,
-                columns=[bin_attribute.attribute, "Number of Records"],
-            )
+            binned_result = np.array([bin_center, counts]).T
+            vis._vis_data = pd.DataFrame(binned_result, columns=[bin_attr, "Number of Records"])
 
     @staticmethod
     def execute_filter(vis: Vis):

diff --git a/lux/vislib/altair/BarChart.py b/lux/vislib/altair/BarChart.py
@@ -67,8 +67,8 @@ def initialize_chart(self):
                 type=x_attr.data_type,
                 axis=alt.Axis(labelOverlap=True),
             )
-            y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title)
             x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True))"
+            y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title)
             y_attr_field_code = (
                 f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')"
             )

diff --git a/tests/test_action.py b/tests/test_action.py
@@ -20,8 +20,7 @@
 
 
 def test_vary_filter_val():
-    url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
-    df = pd.read_csv(url)
+    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
     vis = Vis(["Height", "SportType=Ball"], df)
     df.set_intent_as_vis(vis)
     df._repr_html_()

diff --git a/tests/test_performance.py b/tests/test_performance.py
@@ -20,8 +20,7 @@
 # To run the script and see the printed result, run:
 # python -m pytest -s tests/test_performance.py
 def test_q1_performance_census():
-    url = "https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true"
-    df = pd.read_csv(url)
+    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true")
     tic = time.perf_counter()
     df._repr_html_()
     toc = time.perf_counter()

diff --git a/tests/test_vis.py b/tests/test_vis.py
@@ -20,24 +20,21 @@
 
 
 def test_vis():
-    url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
-    df = pd.read_csv(url)
+    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
     vis = Vis(["Height", "SportType=Ball"], df)
     assert vis.get_attr_by_attr_name("Height")[0].bin_size != 0
     assert vis.get_attr_by_attr_name("Record")[0].aggregation == "count"
 
 
 def test_vis_set_specs():
-    url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
-    df = pd.read_csv(url)
+    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
     vis = Vis(["Height", "SportType=Ball"], df)
     vis.set_intent(["Height", "SportType=Ice"])
     assert vis.get_attr_by_attr_name("SportType")[0].value == "Ice"
 
 
 def test_vis_collection():
-    url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
-    df = pd.read_csv(url)
+    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
     vlist = VisList(["Height", "SportType=Ball", "?"], df)
     vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0]
     assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year"
@@ -48,8 +45,7 @@ def test_vis_collection():
 
 
 def test_vis_collection_set_intent():
-    url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
-    df = pd.read_csv(url)
+    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
     vlist = VisList(["Height", "SportType=Ice", "?"], df)
     vlist.set_intent(["Height", "SportType=Boat", "?"])
     for v in vlist._collection:
@@ -166,8 +162,7 @@ def test_vis_to_Altair_standalone():
 
 
 def test_vis_list_custom_title_override():
-    url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
-    df = pd.read_csv(url)
+    df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
     df["Year"] = pd.to_datetime(df["Year"], format="%Y")
 
     vcLst = []