diff --git a/docs/installation.rst b/docs/installation.rst index a06da876dc72..0f372bdbed1d 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -816,6 +816,84 @@ in this dictionary are made available for users to use in their SQL. 'my_crazy_macro': lambda x: x*2, } +**Scheduling queries** + +You can optionally allow your users to schedule queries directly in SQL Lab. +This is done by addding extra metadata to saved queries, which are then picked +up by an external scheduled (like [Apache Airflow](https://airflow.apache.org/)). + +To allow scheduled queries, add the following to your `config.py`: + +.. code-block:: python + + FEATURE_FLAGS = { + # Configuration for scheduling queries from SQL Lab. This information is + # collected when the user clicks "Schedule query", and saved into the `extra` + # field of saved queries. + # See: https://github.com/mozilla-services/react-jsonschema-form + 'SCHEDULED_QUERIES': { + 'JSONSCHEMA': { + 'title': 'Schedule', + 'description': ( + 'In order to schedule a query, you need to specify when it ' + 'should start running, when it should stop running, and how ' + 'often it should run. You can also optionally specify ' + 'dependencies that should be met before the query is ' + 'executed. Please read the documentation for best practices ' + 'and more information on how to specify dependencies.' + ), + 'type': 'object', + 'properties': { + 'output_table': { + 'type': 'string', + 'title': 'Output table name', + }, + 'start_date': { + 'type': 'string', + 'format': 'date-time', + 'title': 'Start date', + }, + 'end_date': { + 'type': 'string', + 'format': 'date-time', + 'title': 'End date', + }, + 'schedule_interval': { + 'type': 'string', + 'title': 'Schedule interval', + }, + 'dependencies': { + 'type': 'array', + 'title': 'Dependencies', + 'items': { + 'type': 'string', + }, + }, + }, + }, + 'UISCHEMA': { + 'schedule_interval': { + 'ui:placeholder': '@daily, @weekly, etc.', + }, + 'dependencies': { + 'ui:help': ( + 'Check the documentation for the correct format when ' + 'defining dependencies.' + ), + }, + }, + }, + } + +This feature flag is based on [react-jsonschema-form](https://github.com/mozilla-services/react-jsonschema-form), +and will add a button called "Schedule Query" to SQL Lab. When the button is +clicked, a modal will show up where the user can add the metadata required for +scheduling the query. + +This information can then be retrieved from the endpoint `/savedqueryviewapi/api/read` +and used to schedule the queries that have `scheduled_queries` in their JSON +metadata. For schedulers other than Airflow, additional fields can be easily +added to the configuration file above. Celery Flower ------------- diff --git a/superset/assets/package-lock.json b/superset/assets/package-lock.json index 704e6f3afd8a..e5075d969966 100644 --- a/superset/assets/package-lock.json +++ b/superset/assets/package-lock.json @@ -5791,8 +5791,7 @@ "co": { "version": "4.6.0", "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", - "integrity": "sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=", - "dev": true + "integrity": "sha1-bqa989hTrlTMuOR7+gvz+QMfsYQ=" }, "coa": { "version": "2.0.2", @@ -6071,8 +6070,7 @@ "core-js": { "version": "2.6.0", "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.0.tgz", - "integrity": "sha512-kLRC6ncVpuEW/1kwrOXYX6KQASCVtrh1gQr/UiaVgFlf9WE5Vp+lNe5+h3LuMr5PAucWnnEXwH0nQHRH/gpGtw==", - "dev": true + "integrity": "sha512-kLRC6ncVpuEW/1kwrOXYX6KQASCVtrh1gQr/UiaVgFlf9WE5Vp+lNe5+h3LuMr5PAucWnnEXwH0nQHRH/gpGtw==" }, "core-util-is": { "version": "1.0.2", @@ -13233,6 +13231,11 @@ "integrity": "sha1-7dFMgk4sycHgsKG0K7UhBRakJDg=", "dev": true }, + "lodash.topath": { + "version": "4.5.2", + "resolved": "https://registry.npmjs.org/lodash.topath/-/lodash.topath-4.5.2.tgz", + "integrity": "sha1-NhY1Hzu6YZlKCTGYlmC9AyVP0Ak=" + }, "lodash.uniq": { "version": "4.5.0", "resolved": "https://registry.npmjs.org/lodash.uniq/-/lodash.uniq-4.5.0.tgz", @@ -17458,6 +17461,41 @@ "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.6.3.tgz", "integrity": "sha512-u7FDWtthB4rWibG/+mFbVd5FvdI20yde86qKGx4lVUTWmPlSWQ4QxbBIrrs+HnXGbxOUlUzTAP/VDmvCwaP2yA==" }, + "react-jsonschema-form": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/react-jsonschema-form/-/react-jsonschema-form-1.2.0.tgz", + "integrity": "sha512-rR77qoFiQ5TxDYwsJz8UWmDner4jQ4xMnDqeV6Nvg7GtoEyOUoTVkI/SBMEzfXuF/piWZXYjquP96Hy/2L7C+Q==", + "requires": { + "ajv": "^5.2.3", + "babel-runtime": "^6.26.0", + "core-js": "^2.5.7", + "lodash.topath": "^4.5.2", + "prop-types": "^15.5.8" + }, + "dependencies": { + "ajv": { + "version": "5.5.2", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-5.5.2.tgz", + "integrity": "sha1-c7Xuyj+rZT49P5Qis0GtQiBdyWU=", + "requires": { + "co": "^4.6.0", + "fast-deep-equal": "^1.0.0", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.3.0" + } + }, + "fast-deep-equal": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-1.1.0.tgz", + "integrity": "sha1-wFNHeBfIa1HaqFPIHgWbcz0CNhQ=" + }, + "json-schema-traverse": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.3.1.tgz", + "integrity": "sha1-NJptRMU6Ud6JtAgFxdXlm0F9M0A=" + } + } + }, "react-lifecycles-compat": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/react-lifecycles-compat/-/react-lifecycles-compat-3.0.4.tgz", diff --git a/superset/assets/package.json b/superset/assets/package.json index 6914c736490b..6edb971331dc 100644 --- a/superset/assets/package.json +++ b/superset/assets/package.json @@ -117,6 +117,7 @@ "react-dom": "^16.4.1", "react-gravatar": "^2.6.1", "react-hot-loader": "^4.3.6", + "react-jsonschema-form": "^1.2.0", "react-map-gl": "^4.0.10", "react-markdown": "^3.3.0", "react-redux": "^5.0.2", diff --git a/superset/assets/src/SqlLab/components/QueryAutoRefresh.jsx b/superset/assets/src/SqlLab/components/QueryAutoRefresh.jsx index 541ce2d65d6b..13834cb9d919 100644 --- a/superset/assets/src/SqlLab/components/QueryAutoRefresh.jsx +++ b/superset/assets/src/SqlLab/components/QueryAutoRefresh.jsx @@ -41,10 +41,18 @@ class QueryAutoRefresh extends React.PureComponent { const { queries, queriesLastUpdate } = this.props; const now = new Date().getTime(); + // due to a race condition, queries can be marked as successful before the + // results key is set; this is a workaround until we fix the underlying + // problem + const isQueryRunning = q => ( + ['running', 'started', 'pending', 'fetching'].indexOf(q.state) >= 0 || + (q.state === 'success' && q.resultsKey === null) + ); + return ( queriesLastUpdate > 0 && Object.values(queries).some( - q => ['running', 'started', 'pending', 'fetching'].indexOf(q.state) >= 0 && + q => isQueryRunning(q) && now - q.startDttm < MAX_QUERY_AGE_TO_POLL, ) ); diff --git a/superset/assets/src/SqlLab/components/ScheduleQueryButton.jsx b/superset/assets/src/SqlLab/components/ScheduleQueryButton.jsx new file mode 100644 index 000000000000..2e7e16e3167a --- /dev/null +++ b/superset/assets/src/SqlLab/components/ScheduleQueryButton.jsx @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +import React from 'react'; +import PropTypes from 'prop-types'; +import Form from 'react-jsonschema-form'; +import { t } from '@superset-ui/translation'; + +import Button from '../../components/Button'; +import ModalTrigger from '../../components/ModalTrigger'; + +const propTypes = { + defaultLabel: PropTypes.string, + sql: PropTypes.string.isRequired, + schema: PropTypes.string.isRequired, + dbId: PropTypes.number.isRequired, + animation: PropTypes.bool, + onSchedule: PropTypes.func, +}; +const defaultProps = { + defaultLabel: t('Undefined'), + animation: true, + onSchedule: () => {}, +}; + +class ScheduleQueryButton extends React.PureComponent { + constructor(props) { + super(props); + this.state = { + description: '', + label: props.defaultLabel, + showSchedule: false, + }; + this.toggleSchedule = this.toggleSchedule.bind(this); + this.onSchedule = this.onSchedule.bind(this); + this.onCancel = this.onCancel.bind(this); + this.onLabelChange = this.onLabelChange.bind(this); + this.onDescriptionChange = this.onDescriptionChange.bind(this); + } + onSchedule({ formData }) { + const query = { + label: this.state.label, + description: this.state.description, + db_id: this.props.dbId, + schema: this.props.schema, + sql: this.props.sql, + extra_json: JSON.stringify({ schedule_info: formData }), + }; + this.props.onSchedule(query); + this.saveModal.close(); + } + onCancel() { + this.saveModal.close(); + } + onLabelChange(e) { + this.setState({ label: e.target.value }); + } + onDescriptionChange(e) { + this.setState({ description: e.target.value }); + } + toggleSchedule(e) { + this.setState({ target: e.target, showSchedule: !this.state.showSchedule }); + } + renderModalBody() { + return ( +
+ ); + } + render() { + return ( + + { this.saveModal = ref; }} + modalTitle={t('Schedule Query')} + modalBody={this.renderModalBody()} + triggerNode={ + + } + bsSize="medium" + /> + + ); + } +} +ScheduleQueryButton.propTypes = propTypes; +ScheduleQueryButton.defaultProps = defaultProps; + +export default ScheduleQueryButton; diff --git a/superset/assets/src/SqlLab/components/SqlEditor.jsx b/superset/assets/src/SqlLab/components/SqlEditor.jsx index 960a4af15ef5..f4495af2a4dc 100644 --- a/superset/assets/src/SqlLab/components/SqlEditor.jsx +++ b/superset/assets/src/SqlLab/components/SqlEditor.jsx @@ -36,6 +36,7 @@ import LimitControl from './LimitControl'; import TemplateParamsEditor from './TemplateParamsEditor'; import SouthPane from './SouthPane'; import SaveQuery from './SaveQuery'; +import ScheduleQueryButton from './ScheduleQueryButton'; import ShareSqlLabQuery from './ShareSqlLabQuery'; import Timer from '../../components/Timer'; import Hotkeys from '../../components/Hotkeys'; @@ -43,6 +44,7 @@ import SqlEditorLeftBar from './SqlEditorLeftBar'; import AceEditorWrapper from './AceEditorWrapper'; import { STATE_BSSTYLE_MAP } from '../constants'; import RunQueryActionButton from './RunQueryActionButton'; +import { FeatureFlag, isFeatureEnabled } from '../../featureFlags'; const SQL_EDITOR_PADDING = 10; const SQL_TOOLBAR_HEIGHT = 51; @@ -313,6 +315,18 @@ class SqlEditor extends React.PureComponent { sql={this.state.sql} /> + {isFeatureEnabled(FeatureFlag.SCHEDULED_QUERIES) && + + + + } +
{label} {sortBy === dataKey && diff --git a/superset/assets/src/components/FilterableTable/FilterableTableStyles.css b/superset/assets/src/components/FilterableTable/FilterableTableStyles.css index 5be4a369499d..7a0d3ba0ea7d 100644 --- a/superset/assets/src/components/FilterableTable/FilterableTableStyles.css +++ b/superset/assets/src/components/FilterableTable/FilterableTableStyles.css @@ -72,3 +72,8 @@ } .even-row { background: #f2f2f2; } .odd-row { background: #ffffff; } +.header-style { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} \ No newline at end of file diff --git a/superset/assets/src/featureFlags.ts b/superset/assets/src/featureFlags.ts index 8638a5471f7c..450ad2cd4f89 100644 --- a/superset/assets/src/featureFlags.ts +++ b/superset/assets/src/featureFlags.ts @@ -22,6 +22,7 @@ export enum FeatureFlag { SCOPED_FILTER = 'SCOPED_FILTER', OMNIBAR = 'OMNIBAR', CLIENT_CACHE = 'CLIENT_CACHE', + SCHEDULED_QUERIES = 'SCHEDULED_QUERIES', } export type FeatureFlagMap = { @@ -39,5 +40,5 @@ export function initFeatureFlags(featureFlags: FeatureFlagMap) { } export function isFeatureEnabled(feature: FeatureFlag) { - return !!window.featureFlags[feature]; + return window && window.featureFlags && !!window.featureFlags[feature]; } diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 32fc1aa79c6b..5fed48049107 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -49,6 +49,7 @@ from sqlalchemy.engine.result import RowProxy from sqlalchemy.engine.url import make_url from sqlalchemy.sql import quoted_name, text +from sqlalchemy.sql.expression import ColumnClause from sqlalchemy.sql.expression import TextAsFrom from sqlalchemy.types import String, UnicodeText import sqlparse @@ -980,19 +981,98 @@ def get_columns( result.append(cls._create_column_info(column, column.Column, column_type)) return result + @classmethod + def _is_column_name_quoted(cls, column_name: str) -> bool: + """ + Check if column name is in quotes + :param column_name: column name + :return: boolean + """ + return column_name.startswith('"') and column_name.endswith('"') + + @classmethod + def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]: + """ + Format column clauses where names are in quotes and labels are specified + :param cols: columns + :return: column clauses + """ + column_clauses = [] + # Column names are separated by periods. This regex will find periods in a string + # if they are not enclosed in quotes because if a period is enclosed in quotes, + # then that period is part of a column name. + dot_pattern = r"""\. # split on period + (?= # look ahead + (?: # create non-capture group + [^\"]*\"[^\"]*\" # two quotes + )*[^\"]*$) # end regex""" + dot_regex = re.compile(dot_pattern, re.VERBOSE) + for col in cols: + # get individual column names + col_names = re.split(dot_regex, col['name']) + # quote each column name if it is not already quoted + for index, col_name in enumerate(col_names): + if not cls._is_column_name_quoted(col_name): + col_names[index] = '"{}"'.format(col_name) + quoted_col_name = '.'.join( + col_name if cls._is_column_name_quoted(col_name) else f'"{col_name}"' + for col_name in col_names) + # create column clause in the format "name"."name" AS "name.name" + column_clause = sqla.literal_column(quoted_col_name).label(col['name']) + column_clauses.append(column_clause) + return column_clauses + + @classmethod + def _filter_presto_cols(cls, cols: List[dict]) -> List[dict]: + """ + We want to filter out columns that correspond to array content because expanding + arrays would require us to use unnest and join. This can lead to a large, + complicated, and slow query. + + Example: select array_content + from TABLE + cross join UNNEST(array_column) as t(array_content); + + We know which columns to skip because cols is a list provided to us in a specific + order where a structural column is positioned right before its content. + + Example: Column Name: ColA, Column Data Type: array(row(nest_obj int)) + cols = [ ..., ColA, ColA.nest_obj, ... ] + + When we run across an array, check if subsequent column names start with the + array name and skip them. + :param cols: columns + :return: filtered list of columns + """ + filtered_cols = [] + curr_array_col_name = '' + for col in cols: + # col corresponds to an array's content and should be skipped + if curr_array_col_name and col['name'].startswith(curr_array_col_name): + continue + # col is an array so we need to check if subsequent + # columns correspond to the array's contents + elif str(col['type']) == 'ARRAY': + curr_array_col_name = col['name'] + filtered_cols.append(col) + else: + curr_array_col_name = '' + filtered_cols.append(col) + return filtered_cols + @classmethod def select_star(cls, my_db, table_name: str, engine: Engine, schema: str = None, limit: int = 100, show_cols: bool = False, indent: bool = True, latest_partition: bool = True, cols: List[dict] = []) -> str: """ - Temporary method until we have a function that can handle row and array columns + Include selecting properties of row objects. We cannot easily break arrays into + rows, so render the whole array in its own row and skip columns that correspond + to an array's contents. """ presto_cols = cols if show_cols: - dot_regex = r'\.(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)' - presto_cols = [ - col for col in presto_cols if re.search(dot_regex, col['name']) is None] - return BaseEngineSpec.select_star( + presto_cols = cls._filter_presto_cols(cols) + return super(PrestoEngineSpec, cls).select_star( my_db, table_name, engine, schema, limit, show_cols, indent, latest_partition, presto_cols, ) @@ -1526,6 +1606,10 @@ def where_latest_partition( return qry.where(Column(col_name) == value) return False + @classmethod + def _get_fields(cls, cols: List[dict]) -> List[ColumnClause]: + return BaseEngineSpec._get_fields(cols) + @classmethod def latest_sub_partition(cls, table_name, schema, database, **kwargs): # TODO(bogdan): implement` diff --git a/superset/views/sql_lab.py b/superset/views/sql_lab.py index adbdd46c6dfd..b9d1f2eb3509 100644 --- a/superset/views/sql_lab.py +++ b/superset/views/sql_lab.py @@ -116,9 +116,10 @@ def pre_update(self, obj): class SavedQueryViewApi(SavedQueryView): list_columns = [ - 'label', 'sqlalchemy_uri', 'user_email', 'schema', 'description', - 'sql'] - show_columns = ['label', 'db_id', 'schema', 'description', 'sql'] + 'id', 'label', 'sqlalchemy_uri', 'user_email', 'schema', 'description', + 'sql', 'extra_json'] + show_columns = [ + 'label', 'db_id', 'schema', 'description', 'sql', 'extra_json'] add_columns = show_columns edit_columns = add_columns diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py index ef9d6bc17da1..2a22c590ed69 100644 --- a/tests/db_engine_specs_test.py +++ b/tests/db_engine_specs_test.py @@ -383,6 +383,29 @@ def test_presto_get_array_within_row_within_array_column(self): ('column_name.nested_obj', 'FLOAT')] self.verify_presto_column(presto_column, expected_results) + def test_presto_get_fields(self): + cols = [ + {'name': 'column'}, + {'name': 'column.nested_obj'}, + {'name': 'column."quoted.nested obj"'}] + actual_results = PrestoEngineSpec._get_fields(cols) + expected_results = [ + {'name': '"column"', 'label': 'column'}, + {'name': '"column"."nested_obj"', 'label': 'column.nested_obj'}, + {'name': '"column"."quoted.nested obj"', + 'label': 'column."quoted.nested obj"'}] + for actual_result, expected_result in zip(actual_results, expected_results): + self.assertEqual(actual_result.element.name, expected_result['name']) + self.assertEqual(actual_result.name, expected_result['label']) + + def test_presto_filter_presto_cols(self): + cols = [ + {'name': 'column', 'type': 'ARRAY'}, + {'name': 'column.nested_obj', 'type': 'FLOAT'}] + actual_results = PrestoEngineSpec._filter_presto_cols(cols) + expected_results = [cols[0]] + self.assertEqual(actual_results, expected_results) + def test_hive_get_view_names_return_empty_list(self): self.assertEquals([], HiveEngineSpec.get_view_names(mock.ANY, mock.ANY))