Skip to content

Commit

Permalink
more rewrites and merges into single line
Browse files Browse the repository at this point in the history
  • Loading branch information
dorisjlee committed Nov 16, 2020
1 parent 8c3b2c1 commit b468b07
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 29 deletions.
12 changes: 4 additions & 8 deletions lux/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,10 +364,8 @@ def get_SQL_attributes(self):
table_name = self.table_name[self.table_name.index(".") + 1 :]
else:
table_name = self.table_name
attr_query = (
f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'"
)
attributes = list(pd.read_sql(attr_query, self.SQLconnection)["column_name"])
query = f"SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'"
attributes = list(pd.read_sql(query, self.SQLconnection)["column_name"])
for attr in attributes:
self[attr] = None

Expand Down Expand Up @@ -401,10 +399,8 @@ def compute_SQL_data_type(self):
table_name = self.table_name
# get the data types of the attributes in the SQL table
for attr in list(self.columns):
datatype_query = (
f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'",
)
datatype = list(pd.read_sql(datatype_query, self.SQLconnection)["data_type"])[0]
query = f"SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attr}'"
datatype = list(pd.read_sql(query, self.SQLconnection)["data_type"])[0]
sql_dtypes[attr] = datatype

data_type = {"quantitative": [], "nominal": [], "temporal": []}
Expand Down
11 changes: 5 additions & 6 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,18 +246,17 @@ def execute_binning(vis: Vis):
import numpy as np

bin_attribute = list(filter(lambda x: x.bin_size != 0, vis._inferred_intent))[0]
if not np.isnan(vis.data[bin_attribute.attribute]).all():
bin_attr = bin_attribute.attribute
if not np.isnan(vis.data[bin_attr]).all():
# np.histogram breaks if array contain NaN
series = vis.data[bin_attribute.attribute].dropna()
series = vis.data[bin_attr].dropna()
# TODO:binning runs for name attribte. Name attribute has datatype quantitative which is wrong.
counts, bin_edges = np.histogram(series, bins=bin_attribute.bin_size)
# bin_edges of size N+1, so need to compute bin_center as the bin location
bin_center = np.mean(np.vstack([bin_edges[0:-1], bin_edges[1:]]), axis=0)
# TODO: Should vis.data be a LuxDataFrame or a Pandas DataFrame?
vis._vis_data = pd.DataFrame(
np.array([bin_center, counts]).T,
columns=[bin_attribute.attribute, "Number of Records"],
)
binned_result = np.array([bin_center, counts]).T
vis._vis_data = pd.DataFrame(binned_result, columns=[bin_attr, "Number of Records"])

@staticmethod
def execute_filter(vis: Vis):
Expand Down
2 changes: 1 addition & 1 deletion lux/vislib/altair/BarChart.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def initialize_chart(self):
type=x_attr.data_type,
axis=alt.Axis(labelOverlap=True),
)
y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title)
x_attr_field_code = f"alt.X('{x_attr.attribute}', type= '{x_attr.data_type}', axis=alt.Axis(labelOverlap=True))"
y_attr_field = alt.Y(y_attr.attribute, type=y_attr.data_type, title=agg_title)
y_attr_field_code = (
f"alt.Y('{y_attr.attribute}', type= '{y_attr.data_type}', title='{agg_title}')"
)
Expand Down
3 changes: 1 addition & 2 deletions tests/test_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@


def test_vary_filter_val():
url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
df = pd.read_csv(url)
df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
vis = Vis(["Height", "SportType=Ball"], df)
df.set_intent_as_vis(vis)
df._repr_html_()
Expand Down
3 changes: 1 addition & 2 deletions tests/test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
# To run the script and see the printed result, run:
# python -m pytest -s tests/test_performance.py
def test_q1_performance_census():
url = "https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true"
df = pd.read_csv(url)
df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/census.csv?raw=true")
tic = time.perf_counter()
df._repr_html_()
toc = time.perf_counter()
Expand Down
15 changes: 5 additions & 10 deletions tests/test_vis.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,21 @@


def test_vis():
url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
df = pd.read_csv(url)
df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
vis = Vis(["Height", "SportType=Ball"], df)
assert vis.get_attr_by_attr_name("Height")[0].bin_size != 0
assert vis.get_attr_by_attr_name("Record")[0].aggregation == "count"


def test_vis_set_specs():
url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
df = pd.read_csv(url)
df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
vis = Vis(["Height", "SportType=Ball"], df)
vis.set_intent(["Height", "SportType=Ice"])
assert vis.get_attr_by_attr_name("SportType")[0].value == "Ice"


def test_vis_collection():
url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
df = pd.read_csv(url)
df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
vlist = VisList(["Height", "SportType=Ball", "?"], df)
vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0]
assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year"
Expand All @@ -48,8 +45,7 @@ def test_vis_collection():


def test_vis_collection_set_intent():
url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
df = pd.read_csv(url)
df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
vlist = VisList(["Height", "SportType=Ice", "?"], df)
vlist.set_intent(["Height", "SportType=Boat", "?"])
for v in vlist._collection:
Expand Down Expand Up @@ -166,8 +162,7 @@ def test_vis_to_Altair_standalone():


def test_vis_list_custom_title_override():
url = "https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true"
df = pd.read_csv(url)
df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true")
df["Year"] = pd.to_datetime(df["Year"], format="%Y")

vcLst = []
Expand Down

0 comments on commit b468b07

Please sign in to comment.