Skip to content

Commit

Permalink
Moved Executor Parameters to Global Config (#157)
Browse files Browse the repository at this point in the history
* Moved Executor Parameters to Global Config

* Black formatting

* Moved table_name parameter to frame.py. Removed executor_type parameter

executor_type parameter no longer necessary to maintain

* Fixed reference to table_name parameter

table_name is now a parameter within frame.py

* Adjusted Functions to Set SQL Connection

Moved set_SQL_connection function to config. Added set_SQL_table function within frame.py to let users specify which database table will be associated with their dataframe

* Update SQLExecutor name parameter

* Fix Executor Reference

Update current_vis() to reference lux.config.executor

* Update frame.py

* Moved set functions to global config

Co-authored-by: 19thyneb <thyne.boonmark@gmail.com>
Co-authored-by: Doris Lee <dorisjunglinlee@gmail.com>
  • Loading branch information
3 people committed Dec 3, 2020
1 parent 705cd05 commit 3b316d9
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 50 deletions.
26 changes: 26 additions & 0 deletions lux/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ def __init__(self):
self._default_display = "pandas"
self.renderer = "altair"
self.plot_config = None
self.SQLconnection = ""
self.executor = None

@property
def default_display(self):
Expand All @@ -177,6 +179,30 @@ def default_display(self, type: str) -> None:
stacklevel=2,
)

def set_SQL_connection(self, connection):
self.SQLconnection = connection

def set_executor_type(self, exe):
if exe == "SQL":
import pkgutil

if pkgutil.find_loader("psycopg2") is None:
raise ImportError(
"psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection."
)
else:
import psycopg2
from lux.executor.SQLExecutor import SQLExecutor

self.executor = SQLExecutor()
else:
from lux.executor.PandasExecutor import PandasExecutor

self.executor = PandasExecutor()

def set_SQL_connection(self, connection):
self.SQLconnection = connection


config = Config()

Expand Down
2 changes: 1 addition & 1 deletion lux/action/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def custom(ldf):
recommendation["collection"] = ldf.current_vis

vlist = ldf.current_vis
PandasExecutor.execute(vlist, ldf)
lux.config.executor.execute(vlist, ldf)
for vis in vlist:
vis.score = interestingness(vis, ldf)
# ldf.clear_intent()
Expand Down
43 changes: 10 additions & 33 deletions lux/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,8 @@ def __init__(self, *args, **kw):
self._prev = None
super(LuxDataFrame, self).__init__(*args, **kw)

self.executor_type = "Pandas"
self.executor = PandasExecutor()
self.SQLconnection = ""
self.table_name = ""
lux.config.executor = PandasExecutor()

self._sampled = None
self._toggle_pandas_display = True
Expand Down Expand Up @@ -111,8 +109,8 @@ def maintain_metadata(self):
if not hasattr(self, "_metadata_fresh") or not self._metadata_fresh:
# only compute metadata information if the dataframe is non-empty
if len(self) > 0:
self.executor.compute_stats(self)
self.executor.compute_dataset_metadata(self)
lux.config.executor.compute_stats(self)
lux.config.executor.compute_dataset_metadata(self)
self._infer_structure()
self._metadata_fresh = True

Expand Down Expand Up @@ -172,25 +170,6 @@ def _infer_structure(self):
if very_small_df_flag:
self.pre_aggregated = True

def set_executor_type(self, exe):
if exe == "SQL":
import pkgutil

if pkgutil.find_loader("psycopg2") is None:
raise ImportError(
"psycopg2 is not installed. Run `pip install psycopg2' to install psycopg2 to enable the Postgres connection."
)
else:
import psycopg2
from lux.executor.SQLExecutor import SQLExecutor

self.executor = SQLExecutor
else:
from lux.executor.PandasExecutor import PandasExecutor

self.executor = PandasExecutor()
self.executor_type = exe

@property
def intent(self):
return self._intent
Expand Down Expand Up @@ -291,7 +270,7 @@ def current_vis(self):
and len(self._current_vis) > 0
and self._current_vis[0].data is None
):
self.executor.execute(self._current_vis, self)
lux.config.executor.execute(self._current_vis, self)
return self._current_vis

@current_vis.setter
Expand All @@ -306,11 +285,9 @@ def __repr__(self):
########## SQL Metadata, type, model schema ###########
#######################################################

def set_SQL_connection(self, connection, t_name):
self.SQLconnection = connection
def set_SQL_table(self, t_name):
self.table_name = t_name
self.compute_SQL_dataset_metadata()
self.set_executor_type("SQL")

def compute_SQL_dataset_metadata(self):
self.get_SQL_attributes()
Expand Down Expand Up @@ -346,7 +323,7 @@ def get_SQL_attributes(self):
else:
table_name = self.table_name
query = f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'"
attributes = list(pd.read_sql(query, self.SQLconnection)["column_name"])
attributes = list(pd.read_sql(query, lux.config.SQLconnection)["column_name"])
for attr in attributes:
self[attr] = None

Expand All @@ -355,7 +332,7 @@ def get_SQL_cardinality(self):
for attr in list(self.columns):
card_query = pd.read_sql(
f"SELECT Count(Distinct({attr})) FROM {self.table_name}",
self.SQLconnection,
lux.config.SQLconnection,
)
cardinality[attr] = list(card_query["count"])[0]
self.cardinality = cardinality
Expand All @@ -365,7 +342,7 @@ def get_SQL_unique_values(self):
for attr in list(self.columns):
unique_query = pd.read_sql(
f"SELECT Distinct({attr}) FROM {self.table_name}",
self.SQLconnection,
lux.config.SQLconnection,
)
unique_vals[attr] = list(unique_query[attr])
self.unique_values = unique_vals
Expand All @@ -381,7 +358,7 @@ def compute_SQL_data_type(self):
# get the data types of the attributes in the SQL table
for attr in list(self.columns):
query = f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'"
datatype = list(pd.read_sql(query, self.SQLconnection)["data_type"])[0]
datatype = list(pd.read_sql(query, lux.config.SQLconnection)["data_type"])[0]
sql_dtypes[attr] = datatype

data_type = {"quantitative": [], "nominal": [], "temporal": []}
Expand Down Expand Up @@ -774,7 +751,7 @@ def intent_to_string(intent):
def to_JSON(self, rec_infolist, input_current_vis=""):
widget_spec = {}
if self.current_vis:
self.executor.execute(self.current_vis, self)
lux.config.executor.execute(self.current_vis, self)
widget_spec["current_vis"] = LuxDataFrame.current_vis_to_JSON(
self.current_vis, input_current_vis
)
Expand Down
20 changes: 10 additions & 10 deletions lux/executor/SQLExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ class SQLExecutor(Executor):
"""

def __init__(self):
self.name = "Executor"
self.name = "SQLExecutor"
self.selection = []
self.tables = []
self.filters = ""

def __repr__(self):
return f"<Executor>"
return f"<SQLExecutor>"

@staticmethod
def execute(vislist: VisList, ldf: LuxDataFrame):
Expand All @@ -60,14 +60,14 @@ def execute(vislist: VisList, ldf: LuxDataFrame):
required_variables = ",".join(required_variables)
row_count = list(
pd.read_sql(
f"SELECT COUNT(*) FROM {ldf.table_name} {where_clause}",
f"SELECT COUNT(*) FROM {lux.config.table_name} {where_clause}",
ldf.SQLconnection,
)["count"]
)[0]
if row_count > 10000:
query = f"SELECT {required_variables} FROM {ldf.table_name} {where_clause} ORDER BY random() LIMIT 10000"
query = f"SELECT {required_variables} FROM {lux.config.table_name} {where_clause} ORDER BY random() LIMIT 10000"
else:
query = f"SELECT {required_variables} FROM {ldf.table_name} {where_clause}"
query = f"SELECT {required_variables} FROM {lux.config.table_name} {where_clause}"
data = pd.read_sql(query, ldf.SQLconnection)
vis._vis_data = utils.pandas_to_lux(data)
if vis.mark == "bar" or vis.mark == "line":
Expand Down Expand Up @@ -96,23 +96,23 @@ def execute_aggregate(vis: Vis, ldf: LuxDataFrame):
# barchart case, need count data for each group
if measure_attr.attribute == "Record":
where_clause, filterVars = SQLExecutor.execute_filter(vis)
count_query = f"SELECT {groupby_attr.attribute}, COUNT({groupby_attr.attribute}) FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}"
count_query = f"SELECT {groupby_attr.attribute}, COUNT({groupby_attr.attribute}) FROM {lux.config.table_name} {where_clause} GROUP BY {groupby_attr.attribute}"
vis._vis_data = pd.read_sql(count_query, ldf.SQLconnection)
vis._vis_data = vis.data.rename(columns={"count": "Record"})
vis._vis_data = utils.pandas_to_lux(vis.data)

else:
where_clause, filterVars = SQLExecutor.execute_filter(vis)
if agg_func == "mean":
mean_query = f"SELECT {groupby_attr.attribute}, AVG({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}"
mean_query = f"SELECT {groupby_attr.attribute}, AVG({measure_attr.attribute}) as {measure_attr.attribute} FROM {lux.config.table_name} {where_clause} GROUP BY {groupby_attr.attribute}"
vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection)
vis._vis_data = utils.pandas_to_lux(vis.data)
if agg_func == "sum":
mean_query = f"SELECT {groupby_attr.attribute}, SUM({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}"
mean_query = f"SELECT {groupby_attr.attribute}, SUM({measure_attr.attribute}) as {measure_attr.attribute} FROM {lux.config.table_name} {where_clause} GROUP BY {groupby_attr.attribute}"
vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection)
vis._vis_data = utils.pandas_to_lux(vis.data)
if agg_func == "max":
mean_query = f"SELECT {groupby_attr.attribute}, MAX({measure_attr.attribute}) as {measure_attr.attribute} FROM {ldf.table_name} {where_clause} GROUP BY {groupby_attr.attribute}"
mean_query = f"SELECT {groupby_attr.attribute}, MAX({measure_attr.attribute}) as {measure_attr.attribute} FROM {lux.config.table_name} {where_clause} GROUP BY {groupby_attr.attribute}"
vis._vis_data = pd.read_sql(mean_query, ldf.SQLconnection)
vis._vis_data = utils.pandas_to_lux(vis.data)

Expand Down Expand Up @@ -150,7 +150,7 @@ def execute_binning(vis: Vis, ldf: LuxDataFrame):
upper_edges.append(str(curr_edge))
upper_edges = ",".join(upper_edges)
vis_filter, filter_vars = SQLExecutor.execute_filter(vis)
bin_count_query = f"SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket({bin_attribute.attribute}, '{{{upper_edges}}}') FROM {ldf.table_name}) as Buckets GROUP BY width_bucket ORDER BY width_bucket"
bin_count_query = f"SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket({bin_attribute.attribute}, '{{{upper_edges}}}') FROM {lux.config.table_name}) as Buckets GROUP BY width_bucket ORDER BY width_bucket"
bin_count_data = pd.read_sql(bin_count_query, ldf.SQLconnection)

# counts,binEdges = np.histogram(ldf[bin_attribute.attribute],bins=bin_attribute.bin_size)
Expand Down
3 changes: 2 additions & 1 deletion lux/interestingness/interestingness.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np
from pandas.api.types import is_datetime64_any_dtype as is_datetime
from scipy.spatial.distance import euclidean
import lux


def interestingness(vis: Vis, ldf: LuxDataFrame) -> int:
Expand Down Expand Up @@ -215,7 +216,7 @@ def deviation_from_overall(vis: Vis, ldf: LuxDataFrame, filter_specs: list, msr_
unfiltered_vis = copy.copy(vis)
# Remove filters, keep only attribute intent
unfiltered_vis._inferred_intent = utils.get_attrs_specs(vis._inferred_intent)
ldf.executor.execute([unfiltered_vis], ldf)
lux.config.executor.execute([unfiltered_vis], ldf)

v = unfiltered_vis.data[msr_attribute]
v = v / v.sum()
Expand Down
3 changes: 2 additions & 1 deletion lux/vis/Vis.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from typing import List, Callable, Union
from lux.vis.Clause import Clause
from lux.utils.utils import check_import_lux_widget
import lux


class Vis:
Expand Down Expand Up @@ -310,7 +311,7 @@ def refresh_source(self, ldf): # -> Vis:
self._inferred_intent = Parser.parse(self._intent)
Validator.validate_intent(self._inferred_intent, ldf)
Compiler.compile_vis(ldf, self)
ldf.executor.execute([self], ldf)
lux.config.executor.execute([self], ldf)

def check_not_vislist_intent(self):

Expand Down
2 changes: 1 addition & 1 deletion lux/vis/VisList.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,4 +310,4 @@ def refresh_source(self, ldf):
self._inferred_intent = Parser.parse(self._intent)
Validator.validate_intent(self._inferred_intent, ldf)
self._collection = Compiler.compile_intent(ldf, self._inferred_intent)
ldf.executor.execute(self._collection, ldf)
lux.config.executor.execute(self._collection, ldf)
6 changes: 3 additions & 3 deletions tests/test_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_period_selection(global_var):
]
)

PandasExecutor.execute(ldf.current_vis, ldf)
lux.config.executor.execute(ldf.current_vis, ldf)

assert all([type(vlist.data) == lux.core.frame.LuxDataFrame for vlist in ldf.current_vis])
assert all(ldf.current_vis[2].data.columns == ["Year", "Acceleration"])
Expand All @@ -64,7 +64,7 @@ def test_period_filter(global_var):

ldf.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")])

PandasExecutor.execute(ldf.current_vis, ldf)
lux.config.executor.execute(ldf.current_vis, ldf)
ldf._repr_html_()

assert isinstance(ldf.recommendation["Filter"][2]._inferred_intent[2].value, pd.Period)
Expand All @@ -79,7 +79,7 @@ def test_period_to_altair(global_var):

df.set_intent([lux.Clause(attribute="Acceleration"), lux.Clause(attribute="Horsepower")])

PandasExecutor.execute(df.current_vis, df)
lux.config.executor.execute(df.current_vis, df)
df._repr_html_()

exported_code = df.recommendation["Filter"][2].to_Altair()
Expand Down

0 comments on commit 3b316d9

Please sign in to comment.