Skip to content

Commit

Permalink
Merge c8fff34 into 4d46ad9
Browse files Browse the repository at this point in the history
  • Loading branch information
thyneb19 committed Sep 10, 2021
2 parents 4d46ad9 + c8fff34 commit 6d1f809
Show file tree
Hide file tree
Showing 31 changed files with 1,989 additions and 357 deletions.
1 change: 1 addition & 0 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ jobs:
run: |
python lux/data/upload_car_data.py
python lux/data/upload_aug_test_data.py
python lux/data/upload_airbnb_nyc_data.py
- name: Lint check with black
run: |
black --target-version py37 --line-length 105 --check .
Expand Down
138 changes: 138 additions & 0 deletions examples/GeneralDatabase_Executor_Example.py.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "expected-facility",
"metadata": {},
"source": [
"This notebook is an example of how to use the General Database Executor in Lux. This execution backend allows users to switch what kind of queries are being used to query their database system. Here we show how to switch from using a SQL template for Postgresql to MySQL."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "helpful-liberty",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "97a93a0b783743fab041362d66d72125",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Button(description='Toggle Table/Lux', layout=Layout(bottom='6px', top='6px', width='200px'), style=ButtonStyl…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e216a8adf9584b6e8a3cc5374ae73209",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import sys\n",
"sys.path.insert(1, 'C:\\\\Users\\\\thyne\\\\Documents\\\\GitHub\\\\lux')\n",
"\n",
"import lux\n",
"import psycopg2\n",
"import pandas as pd\n",
"from lux import LuxSQLTable\n",
"\n",
"connection = psycopg2.connect(\"host=localhost user=postgres password=lux dbname=postgres\")\n",
"lux.config.set_SQL_connection(connection)\n",
"lux.config.read_query_template(\"postgres_query_template.txt\")\n",
"lux.config.quoted_queries = True\n",
"\n",
"sql_tbl = LuxSQLTable(table_name='car')\n",
"sql_tbl.intent = [\"Cylinders\"]\n",
"sql_tbl"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "searching-nancy",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a2c12d8447494178aa6c38fc0a4c59f6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Button(description='Toggle Table/Lux', layout=Layout(bottom='6px', top='6px', width='200px'), style=ButtonStyl…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "26b23f594155417e9fb7ff2b4695477c",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import sqlalchemy\n",
"import lux\n",
"from sqlalchemy.ext.declarative import declarative_base\n",
"\n",
"engine = sqlalchemy.create_engine('mysql+mysqlconnector://luxuser:lux@localhost:3306/sys',echo=False)\n",
"lux.config.set_SQL_connection(engine)\n",
"lux.config.read_query_template(\"mysql_query_template.txt\")\n",
"lux.config.quoted_queries = False\n",
"\n",
"sql_df = lux.LuxSQLTable(table_name='car')\n",
"\n",
"sql_df.intent = ['Cylinders']\n",
"sql_df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
733 changes: 733 additions & 0 deletions examples/Lux_Code_Tracing.ipynb

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions examples/mysql_query_template.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
preview_query:SELECT * from {table_name} LIMIT {num_rows}
length_query:SELECT COUNT(*) as length FROM {table_name} {where_clause}
sample_query:SELECT * FROM {table_name} {where_clause} LIMIT {num_rows}
scatter_query:SELECT {columns} FROM {table_name} {where_clause}
colored_barchart_counts:SELECT {groupby_attr}, {color_attr}, COUNT({groupby_attr}) as count FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_average:SELECT {groupby_attr}, {color_attr}, AVG({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_sum:SELECT {groupby_attr}, {color_attr}, SUM({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_max:SELECT {groupby_attr}, {color_attr}, MAX({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
barchart_counts:SELECT {groupby_attr}, COUNT({groupby_attr}) as count FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_average:SELECT {groupby_attr}, AVG({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_sum:SELECT {groupby_attr}, SUM({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_max:SELECT {groupby_attr}, MAX({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
histogram_counts:SELECT width_bucket, count(width_bucket) as count from (SELECT ({bucket_cases}) as width_bucket from {table_name} {where_clause}) as buckets GROUP BY width_bucket order by width_bucket
heatmap_counts:SELECT width_bucket1, width_bucket2, count(*) as count FROM (SELECT ({bucket_cases1}) as width_bucket1, ({bucket_cases2}) as width_bucket2 FROM {table_name} {where_clause}) as labeled_data GROUP BY width_bucket1, width_bucket2
table_attributes_query:SELECT COLUMN_NAME as column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'
min_max_query:SELECT MIN({attribute}) as min, MAX({attribute}) as max FROM {table_name}
cardinality_query:SELECT COUNT(Distinct({attribute})) as count FROM {table_name} WHERE {attribute} IS NOT NULL
unique_query:SELECT Distinct({attribute}) FROM {table_name} WHERE {attribute} IS NOT NULL
datatype_query:SELECT DATA_TYPE as data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attribute}'
19 changes: 19 additions & 0 deletions examples/postgres_query_template.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
preview_query:SELECT * from {table_name} LIMIT {num_rows}
length_query:SELECT COUNT(1) as length FROM {table_name} {where_clause}
sample_query:SELECT * FROM {table_name} {where_clause} ORDER BY random() LIMIT {num_rows}
scatter_query:SELECT {columns} FROM {table_name} {where_clause}
colored_barchart_counts:SELECT "{groupby_attr}", "{color_attr}", COUNT("{groupby_attr}") FROM {table_name} {where_clause} GROUP BY "{groupby_attr}", "{color_attr}"
colored_barchart_average:SELECT "{groupby_attr}", "{color_attr}", AVG("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}", "{color_attr}"
colored_barchart_sum:SELECT "{groupby_attr}", "{color_attr}", SUM("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}", "{color_attr}"
colored_barchart_max:SELECT "{groupby_attr}", "{color_attr}", MAX("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}", "{color_attr}"
barchart_counts:SELECT "{groupby_attr}", COUNT("{groupby_attr}") FROM {table_name} {where_clause} GROUP BY "{groupby_attr}"
barchart_average:SELECT "{groupby_attr}", AVG("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}"
barchart_sum:SELECT "{groupby_attr}", SUM("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}"
barchart_max:SELECT "{groupby_attr}", MAX("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}"
histogram_counts:SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket(CAST ("{bin_attribute}" AS FLOAT), '{upper_edges}') FROM {table_name} {where_clause}) as Buckets GROUP BY width_bucket ORDER BY width_bucket
heatmap_counts:SELECT width_bucket1, width_bucket2, count(*) FROM (SELECT width_bucket(CAST ("{x_attribute}" AS FLOAT), '{x_upper_edges_string}') as width_bucket1, width_bucket(CAST ("{y_attribute}" AS FLOAT), '{y_upper_edges_string}') as width_bucket2 FROM {table_name} {where_clause}) as foo GROUP BY width_bucket1, width_bucket2
table_attributes_query:SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'
min_max_query:SELECT MIN("{attribute}") as min, MAX("{attribute}") as max FROM {table_name}
cardinality_query:SELECT Count(Distinct("{attribute}")) FROM {table_name} WHERE "{attribute}" IS NOT NULL
unique_query:SELECT Distinct("{attribute}") FROM {table_name} WHERE "{attribute}" IS NOT NULL
datatype_query:SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attribute}'
23 changes: 23 additions & 0 deletions examples/query_template.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
##############################################################################
#########################Example Query Template #########################
# Details on query function see: https://readthedocs... ##
##############################################################################
preview_query:
length_query:
sample_query:
scatter_query:
colored_barchart_counts:SELECT {groupby_attr}, {color_attr}, COUNT({groupby_attr}) as count FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_average:SELECT {groupby_attr}, {color_attr}, AVG({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_sum:SELECT {groupby_attr}, {color_attr}, SUM({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_max:SELECT {groupby_attr}, {color_attr}, MAX({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
barchart_counts:SELECT {groupby_attr}, COUNT({groupby_attr}) as count FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_average:SELECT {groupby_attr}, AVG({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_sum:SELECT {groupby_attr}, SUM({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_max:SELECT {groupby_attr}, MAX({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
histogram_counts:SELECT width_bucket, count(width_bucket) as count from (SELECT ({bucket_cases}) as width_bucket from {table_name} {where_clause}) as buckets GROUP BY width_bucket order by width_bucket
heatmap_counts:SELECT width_bucket1, width_bucket2, count(*) as count FROM (SELECT ({bucket_cases1}) as width_bucket1, ({bucket_cases2}) as width_bucket2 FROM {table_name} {where_clause}) as labeled_data GROUP BY width_bucket1, width_bucket2
table_attributes_query:SELECT COLUMN_NAME as column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'
min_max_query:SELECT MIN({attribute}) as min, MAX({attribute}) as max FROM {table_name}
cardinality_query:SELECT COUNT(Distinct({attribute})) as count FROM {table_name} WHERE {attribute} IS NOT NULL
unique_query:SELECT Distinct({attribute}) FROM {table_name} WHERE {attribute} IS NOT NULL
datatype_query:SELECT DATA_TYPE as data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attribute}'
2 changes: 2 additions & 0 deletions lux/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from lux.vis.Clause import Clause
from lux.core.frame import LuxDataFrame
from lux.core.sqltable import LuxSQLTable
from lux.core.joinedsqltable import JoinedSQLTable
from lux.utils.tracing_utils import LuxTracer
from ._version import __version__, version_info
from lux._config import config
from lux._config.config import warning_format
Expand Down
25 changes: 25 additions & 0 deletions lux/_config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
import lux
import warnings
from lux.utils.tracing_utils import LuxTracer
import os
from lux._config.template import postgres_template, mysql_template

RegisteredOption = namedtuple("RegisteredOption", "name action display_condition args")

Expand Down Expand Up @@ -34,13 +37,18 @@ def __init__(self):
self._pandas_fallback = True
self._interestingness_fallback = True
self.heatmap_bin_size = 40
self.tracer_relevant_lines = []
self.tracer = LuxTracer()
self.query_templates = {}
self.handle_quotes = True
#####################################
#### Optimization Configurations ####
#####################################
self._sampling_start = 100000
self._sampling_cap = 1000000
self._sampling_flag = True
self._heatmap_flag = True
self._heatmap_start = 5000
self.lazy_maintain = True
self.early_pruning = True
self.early_pruning_sample_cap = 30000
Expand Down Expand Up @@ -419,11 +427,28 @@ def set_SQL_connection(self, connection):
self.set_executor_type("SQL")
self.SQLconnection = connection

def read_query_template(self, query_template):
from lux.executor.SQLExecutor import SQLExecutor

query_dict = {}
if type(query_template) is str:
for line in query_template.split("\n"):
(key, val) = line.split(":")
query_dict[key] = val.strip()
else:
with open(query_file) as f:
for line in f:
(key, val) = line.split(":")
query_dict[key] = val.strip()
self.query_templates = query_dict
self.executor = SQLExecutor()

def set_executor_type(self, exe):
if exe == "SQL":
from lux.executor.SQLExecutor import SQLExecutor

self.executor = SQLExecutor()
self.read_query_template(postgres_template)
elif exe == "Pandas":
from lux.executor.PandasExecutor import PandasExecutor

Expand Down
19 changes: 19 additions & 0 deletions lux/_config/mysql_query_template.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
preview_query:SELECT * from {table_name} LIMIT {num_rows}
length_query:SELECT COUNT(*) as length FROM {table_name} {where_clause}
sample_query:SELECT * FROM {table_name} {where_clause} LIMIT {num_rows}
scatter_query:SELECT {columns} FROM {table_name} {where_clause}
colored_barchart_counts:SELECT {groupby_attr}, {color_attr}, COUNT({groupby_attr}) as count FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_average:SELECT {groupby_attr}, {color_attr}, AVG({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_sum:SELECT {groupby_attr}, {color_attr}, SUM({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
colored_barchart_max:SELECT {groupby_attr}, {color_attr}, MAX({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}, {color_attr}
barchart_counts:SELECT {groupby_attr}, COUNT({groupby_attr}) as count FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_average:SELECT {groupby_attr}, AVG({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_sum:SELECT {groupby_attr}, SUM({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
barchart_max:SELECT {groupby_attr}, MAX({measure_attr}) as {measure_attr} FROM {table_name} {where_clause} GROUP BY {groupby_attr}
histogram_counts:SELECT width_bucket, count(width_bucket) as count from (SELECT ({bucket_cases}) as width_bucket from {table_name} {where_clause}) as buckets GROUP BY width_bucket order by width_bucket
heatmap_counts:SELECT width_bucket1, width_bucket2, count(*) as count FROM (SELECT ({bucket_cases1}) as width_bucket1, ({bucket_cases2}) as width_bucket2 FROM {table_name} {where_clause}) as labeled_data GROUP BY width_bucket1, width_bucket2
table_attributes_query:SELECT COLUMN_NAME as column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'
min_max_query:SELECT MIN({attribute}) as min, MAX({attribute}) as max FROM {table_name}
cardinality_query:SELECT COUNT(Distinct({attribute})) as count FROM {table_name} WHERE {attribute} IS NOT NULL
unique_query:SELECT Distinct({attribute}) FROM {table_name} WHERE {attribute} IS NOT NULL
datatype_query:SELECT DATA_TYPE as data_type FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attribute}'
19 changes: 19 additions & 0 deletions lux/_config/postgres_query_template.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
preview_query:SELECT * from {table_name} LIMIT {num_rows}
length_query:SELECT COUNT(1) as length FROM {table_name} {where_clause}
sample_query:SELECT * FROM {table_name} {where_clause} ORDER BY random() LIMIT {num_rows}
scatter_query:SELECT {columns} FROM {table_name} {where_clause}
colored_barchart_counts:SELECT "{groupby_attr}", "{color_attr}", COUNT("{groupby_attr}") FROM {table_name} {where_clause} GROUP BY "{groupby_attr}", "{color_attr}"
colored_barchart_average:SELECT "{groupby_attr}", "{color_attr}", AVG("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}", "{color_attr}"
colored_barchart_sum:SELECT "{groupby_attr}", "{color_attr}", SUM("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}", "{color_attr}"
colored_barchart_max:SELECT "{groupby_attr}", "{color_attr}", MAX("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}", "{color_attr}"
barchart_counts:SELECT "{groupby_attr}", COUNT("{groupby_attr}") FROM {table_name} {where_clause} GROUP BY "{groupby_attr}"
barchart_average:SELECT "{groupby_attr}", AVG("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}"
barchart_sum:SELECT "{groupby_attr}", SUM("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}"
barchart_max:SELECT "{groupby_attr}", MAX("{measure_attr}") as "{measure_attr}" FROM {table_name} {where_clause} GROUP BY "{groupby_attr}"
histogram_counts:SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket(CAST ("{bin_attribute}" AS FLOAT), '{upper_edges}') FROM {table_name} {where_clause}) as Buckets GROUP BY width_bucket ORDER BY width_bucket
heatmap_counts:SELECT width_bucket1, width_bucket2, count(*) FROM (SELECT width_bucket(CAST ("{x_attribute}" AS FLOAT), '{x_upper_edges_string}') as width_bucket1, width_bucket(CAST ("{y_attribute}" AS FLOAT), '{y_upper_edges_string}') as width_bucket2 FROM {table_name} {where_clause}) as foo GROUP BY width_bucket1, width_bucket2
table_attributes_query:SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = '{table_name}'
min_max_query:SELECT MIN("{attribute}") as min, MAX("{attribute}") as max FROM {table_name}
cardinality_query:SELECT Count(Distinct("{attribute}")) FROM {table_name} WHERE "{attribute}" IS NOT NULL
unique_query:SELECT Distinct("{attribute}") FROM {table_name} WHERE "{attribute}" IS NOT NULL
datatype_query:SELECT DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' AND COLUMN_NAME = '{attribute}'
Loading

0 comments on commit 6d1f809

Please sign in to comment.