Skip to content

Commit

Permalink
Revert "Update LuxSQLTable __len__() and metadata computation"
Browse files Browse the repository at this point in the history
This reverts commit 7c7dcd3.
  • Loading branch information
thyneb19 committed Mar 27, 2021
1 parent 48c1b57 commit b5998c7
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 27 deletions.
2 changes: 1 addition & 1 deletion lux/action/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
}
ignore_rec_flag = False
# Doesn't make sense to compute correlation if less than 4 data values
if len(ldf) < 5:
if ldf._length < 5:
ignore_rec_flag = True
# Then use the data populated in the vis list to compute score
for vis in vlist:
Expand Down
4 changes: 2 additions & 2 deletions lux/action/univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def univariate(ldf, *args):
"long_description": f"Distribution displays univariate histogram distributions of all quantitative attributes{examples}. Visualizations are ranked from most to least skewed.",
}
# Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
if len(ldf) < 5:
if ldf._length < 5:
ignore_rec_flag = True
elif data_type_constraint == "nominal":
possible_attributes = [
Expand Down Expand Up @@ -98,7 +98,7 @@ def univariate(ldf, *args):
"long_description": "Temporal displays line charts for all attributes related to datetimes in the dataframe.",
}
# Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
if len(ldf) < 3:
if ldf._length < 3:
ignore_rec_flag = True
if ignore_rec_flag:
recommendation["collection"] = []
Expand Down
1 change: 1 addition & 0 deletions lux/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def __init__(self, *args, **kw):
self._toggle_pandas_display = True
self._message = Message()
self._pandas_only = False
self._length = len(self)
# Metadata
self._data_type = {}
self.unique_values = None
Expand Down
12 changes: 3 additions & 9 deletions lux/core/sqltable.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ class LuxSQLTable(lux.LuxDataFrame):
"_pandas_only",
"pre_aggregated",
"_type_override",
"_length",
"_setup_done",
]

def __init__(self, *args, table_name="", **kw):
Expand All @@ -66,16 +64,12 @@ def __init__(self, *args, table_name="", **kw):
lux.config.executor = SQLExecutor()

self._length = 0
self._setup_done = False
if table_name != "":
self.set_SQL_table(table_name)
warnings.formatwarning = lux.warning_format

def __len__(self):
if self._setup_done:
return self._length
else:
return super(LuxSQLTable, self).__len__()
def len(self):
return self._length

def set_SQL_table(self, t_name):
# function that ties the Lux Dataframe to a SQL database table
Expand Down Expand Up @@ -132,7 +126,7 @@ def _ipython_display_(self):
layout=widgets.Layout(width="200px", top="6px", bottom="6px"),
)
self.output = widgets.Output()
self._sampled = lux.config.executor.execute_preview(self)
lux.config.executor.execute_preview(self)
display(button, self.output)

def on_button_clicked(b):
Expand Down
13 changes: 6 additions & 7 deletions lux/executor/SQLExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ def __repr__(self):
return f"<SQLExecutor>"

@staticmethod
def execute_preview(tbl: LuxSQLTable, preview_size=5):
output = pandas.read_sql(
"SELECT * from {} LIMIT {}".format(tbl.table_name, preview_size), lux.config.SQLconnection
def execute_preview(tbl: LuxSQLTable):
tbl._sampled = pandas.read_sql(
"SELECT * from {} LIMIT 5".format(tbl.table_name), lux.config.SQLconnection
)
return output

@staticmethod
def execute_sampling(tbl: LuxSQLTable):
Expand Down Expand Up @@ -612,8 +611,9 @@ def compute_dataset_metadata(self, tbl: LuxSQLTable):
-------
None
"""
if not tbl._setup_done:
self.get_SQL_attributes(tbl)
self.get_SQL_attributes(tbl)
for attr in list(tbl.columns):
tbl[attr] = None
tbl._data_type = {}
#####NOTE: since we aren't expecting users to do much data processing with the SQL database, should we just keep this
##### in the initialization and do it just once
Expand Down Expand Up @@ -644,7 +644,6 @@ def get_SQL_attributes(self, tbl: LuxSQLTable):
attributes = list(pandas.read_sql(attr_query, lux.config.SQLconnection)["column_name"])
for attr in attributes:
tbl[attr] = None
tbl._setup_done = True

def compute_stats(self, tbl: LuxSQLTable):
"""
Expand Down
2 changes: 1 addition & 1 deletion lux/interestingness/interestingness.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def deviation_from_overall(
from lux.executor.SQLExecutor import SQLExecutor

v_filter_size = SQLExecutor.get_filtered_size(filter_specs, ldf)
v_size = len(ldf)
v_size = ldf.len()
vdata = vis.data
v_filter = vdata[msr_attribute]
total = v_filter.sum()
Expand Down
8 changes: 1 addition & 7 deletions lux/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.
import pandas as pd
import matplotlib.pyplot as plt
import lux


def convert_to_list(x):
Expand Down Expand Up @@ -84,12 +83,7 @@ def check_if_id_like(df, attribute):
if is_string:
# For string IDs, usually serial numbers or codes with alphanumerics have a consistent length (eg., CG-39405) with little deviation. For a high cardinality string field but not ID field (like Name or Brand), there is less uniformity across the string lengths.
if len(df) > 50:
if lux.config.executor.name == "PandasExecutor":
sampled = df[attribute].sample(50, random_state=99)
else:
from lux.executor.SQLExecutor import SQLExecutor

sampled = SQLExecutor.execute_preview(df, preview_size=50)
sampled = df[attribute].sample(50, random_state=99)
else:
sampled = df[attribute]
str_length_uniformity = sampled.apply(lambda x: type(x) == str and len(x)).std() < 3
Expand Down

0 comments on commit b5998c7

Please sign in to comment.