diff --git a/fireant/dataset/data_blending.py b/fireant/dataset/data_blending.py index 49f1890e..685b9f2d 100644 --- a/fireant/dataset/data_blending.py +++ b/fireant/dataset/data_blending.py @@ -79,6 +79,10 @@ def __deepcopy__(self, memodict={}): def table(self): return None + @property + def database(self): + return self.primary_dataset.database + @immutable def extra_fields(self, *fields): for field in fields: diff --git a/fireant/queries/builder/dataset_blender_query_builder.py b/fireant/queries/builder/dataset_blender_query_builder.py index 8cfd31ef..3825e539 100644 --- a/fireant/queries/builder/dataset_blender_query_builder.py +++ b/fireant/queries/builder/dataset_blender_query_builder.py @@ -126,9 +126,6 @@ def _join_criteria_for_blender_subqueries(primary, secondary, dimensions, field_ def _blender(dimensions, metrics, orders, field_maps) -> Callable: raw_dataset_metrics = set(find_dataset_metrics(metrics)) - if orders is None: - orders = [(dimension, None) for dimension in dimensions] - def _field_subquery_map(dataset_sql): """ This nasty little function returns a dictionary that tells how how to select dimensions and metrics in the @@ -227,6 +224,7 @@ def sql(self): datasets, field_maps = _datasets_and_field_maps(self.dataset) metrics = find_metrics_for_widgets(self._widgets) raw_dataset_metrics = find_dataset_metrics(metrics) + orders = self.orders dataset_queries = [ _build_dataset_query( dataset, @@ -264,8 +262,5 @@ def sql(self): zip(*[dataset_query.sql for i, dataset_query in enumerate(dataset_queries)]) ) - blend_query = _blender(self._dimensions, metrics, self._orders, field_maps) + blend_query = _blender(self._dimensions, metrics, orders, field_maps) return [blend_query(*cp) for cp in tx_query_matrix] - - def __str__(self): - return str(self.sql) diff --git a/fireant/queries/builder/dataset_query_builder.py b/fireant/queries/builder/dataset_query_builder.py index e6d19c39..ae4fd57c 100644 --- a/fireant/queries/builder/dataset_query_builder.py +++ b/fireant/queries/builder/dataset_query_builder.py @@ -9,6 +9,7 @@ alias_selector, immutable, ) + from .query_builder import ( QueryBuilder, QueryException, @@ -18,7 +19,6 @@ ) from .. import special_cases from ..execution import fetch_data -from ..field_helper import make_term_for_dimension from ..finders import ( find_and_group_references_for_dimensions, find_metrics_for_widgets, @@ -68,28 +68,6 @@ def reference_groups(self): ).values() ) - @property - def orders(self): - """ - Initialize the DataSetQueryBuilder values for orders so that the SQL queries can be built. This also initializes - the default values for orders, which is all of the dimensions, if no order is specified. - """ - if self._orders is not None: - return [ - (field.definition.as_(alias_selector(field.alias)), orientation) - for (field, orientation) in self._orders - ] - - # Initialize ordering to be by all dimensions - - # Use the same function to make the definition terms to force it to be consistent. - # Always take the last element in order to prefer the display definition. - definitions = [ - make_term_for_dimension(dimension) for dimension in self._dimensions - ] - - return [(definition, None) for definition in definitions] - @property def sql(self): """ @@ -110,7 +88,6 @@ def sql(self): share_dimensions = find_share_dimensions(self._dimensions, operations) return make_slicer_query_with_totals_and_references( - self.dataset, self.dataset.database, self.table, self.dataset.joins, @@ -188,27 +165,32 @@ def __str__(self): def __repr__(self): return ".".join( - ["dataset", "query"] - + ["widget({})".format(repr(widget)) for widget in self._widgets] - + [ - "dimension({})".format(repr(dimension)) - for dimension in self._dimensions - ] - + [ - "filter({}{})".format( - repr(f), - ", apply_filter_to_totals=True" if apply_filter_to_totals else "", - ) - for f, apply_filter_to_totals in zip( - self._filters, self._apply_filter_to_totals - ) - ] - + [ - "reference({})".format(repr(reference)) - for reference in self._references - ] - + [ - "orderby({}, {})".format(definition.alias, orientation) - for (definition, orientation) in self.orders - ] + [ + "dataset", + "query", + *["widget({})".format(repr(widget)) for widget in self._widgets], + *[ + "dimension({})".format(repr(dimension)) + for dimension in self._dimensions + ], + *[ + "filter({}{})".format( + repr(f), + ", apply_filter_to_totals=True" + if apply_filter_to_totals + else "", + ) + for f, apply_filter_to_totals in zip( + self._filters, self._apply_filter_to_totals + ) + ], + *[ + "reference({})".format(repr(reference)) + for reference in self._references + ], + *[ + "orderby({}, {})".format(definition.alias, orientation) + for (definition, orientation) in self.orders + ], + ], ) diff --git a/fireant/queries/builder/dimension_choices_query_builder.py b/fireant/queries/builder/dimension_choices_query_builder.py index d7dc7076..09f830e1 100644 --- a/fireant/queries/builder/dimension_choices_query_builder.py +++ b/fireant/queries/builder/dimension_choices_query_builder.py @@ -8,7 +8,7 @@ get_column_names, ) from ..execution import fetch_data -from ..field_helper import make_term_for_dimension +from ..field_helper import make_term_for_field from ..finders import find_joins_for_tables from ..sql_transformer import make_slicer_query from ...formats import display_value @@ -82,7 +82,7 @@ def _make_terms_for_hint_dimensions(self): """ dimension_terms = [] for dimension in self._dimensions: - dimension_term = make_term_for_dimension( + dimension_term = make_term_for_field( dimension, self.dataset.database.trunc_date ) dimension_term = dimension_term.replace_table( diff --git a/fireant/queries/builder/query_builder.py b/fireant/queries/builder/query_builder.py index 8eff0e6d..c8d058b1 100644 --- a/fireant/queries/builder/query_builder.py +++ b/fireant/queries/builder/query_builder.py @@ -135,6 +135,12 @@ def offset(self, offset): """ self._offset = offset + @property + def orders(self): + if self._orders is None: + return [(dimension, None) for dimension in self._dimensions] + return self._orders + @property def sql(self): """ diff --git a/fireant/queries/field_helper.py b/fireant/queries/field_helper.py index bf622a4a..c468c0dc 100644 --- a/fireant/queries/field_helper.py +++ b/fireant/queries/field_helper.py @@ -2,17 +2,12 @@ from fireant.utils import alias_selector -def make_term_for_metrics(metric): - f_alias = alias_selector(metric.alias) - return metric.definition.as_(f_alias) - - -def make_term_for_dimension(dimension, window=None): +def make_term_for_field(field, window=None): """ - Makes a list of pypika terms for a given dataset definition. + Makes a list of pypika terms for a given dataset field. - :param dimension: - A dataset dimension. + :param field: + A field from a dataset. :param window: A window function to apply to the dimension definition if it is a continuous dimension. :return: @@ -20,9 +15,9 @@ def make_term_for_dimension(dimension, window=None): either one or two elements. A second element will be included if the dimension has a definition for its display field. """ - f_alias = alias_selector(dimension.alias) + f_alias = alias_selector(field.alias) - if window and isinstance(dimension, DatetimeInterval): - return window(dimension.definition, dimension.interval_key).as_(f_alias) + if window and isinstance(field, DatetimeInterval): + return window(field.definition, field.interval_key).as_(f_alias) - return dimension.definition.as_(f_alias) + return field.definition.as_(f_alias) diff --git a/fireant/queries/pagination.py b/fireant/queries/pagination.py index 3ad93689..e08d125f 100644 --- a/fireant/queries/pagination.py +++ b/fireant/queries/pagination.py @@ -1,19 +1,22 @@ import pandas as pd +from fireant.utils import alias_selector from pypika import Order def _get_window(limit, offset): start = offset - end = offset + limit \ - if None not in (offset, limit) \ - else limit + end = offset + limit if None not in (offset, limit) else limit return start, end def _apply_sorting(orders): - sort_values, ascending = zip(*[(order[0].alias, order[1] is Order.asc) - for order in orders]) + sort_values, ascending = zip( + *[ + (alias_selector(field.alias), orientation is Order.asc) + for field, orientation in orders + ] + ) return list(sort_values), ascending @@ -37,9 +40,9 @@ def paginate(data_frame, widgets, orders=(), limit=None, offset=None): return data_frame start, end = _get_window(limit, offset) - group_pagination = isinstance(data_frame.index, pd.MultiIndex) \ - and any([getattr(widget, 'group_pagination', False) - for widget in widgets]) + group_pagination = isinstance(data_frame.index, pd.MultiIndex) and any( + [getattr(widget, "group_pagination", False) for widget in widgets] + ) if group_pagination: return _group_paginate(data_frame, start, end, orders) @@ -62,17 +65,16 @@ def _simple_paginate(data_frame, start=None, end=None, orders=()): """ if orders: sort, ascending = _apply_sorting(orders) - data_frame = data_frame.sort_values(by=sort, - ascending=ascending) + data_frame = data_frame.sort_values(by=sort, ascending=ascending) return data_frame[start:end] def _index_isnull(data_frame): if isinstance(data_frame.index, pd.MultiIndex): - return [any(pd.isnull(value) - for value in level) - for level in list(data_frame.index)] + return [ + any(pd.isnull(value) for value in level) for level in list(data_frame.index) + ] return pd.isnull(data_frame.index) @@ -97,9 +99,7 @@ def _group_paginate(data_frame, start=None, end=None, orders=()): # Do not apply ordering on the 0th dimension !!! # This would not have any result since the X-Axis on a chart is ordered sequentially - orders = [order - for order in orders - if order[0].alias != data_frame.index.names[0]] + orders = [order for order in orders if order[0].alias != data_frame.index.names[0]] if orders: # FIXME this should aggregate according to field definition, instead of sum @@ -107,16 +107,19 @@ def _group_paginate(data_frame, start=None, end=None, orders=()): aggregated_df = dimension_groups.sum() sort, ascending = _apply_sorting(orders) - sorted_df = aggregated_df.sort_values(by=sort, - ascending=ascending) + sorted_df = aggregated_df.sort_values(by=sort, ascending=ascending) sorted_dimension_values = tuple(sorted_df.index)[start:end] else: - sorted_dimension_values = tuple(dimension_groups.apply(lambda g: g.name))[start:end] + sorted_dimension_values = tuple(dimension_groups.apply(lambda g: g.name))[ + start:end + ] - sorted_dimension_values = pd.Index(sorted_dimension_values, name=dimension_levels[0]) \ - if len(dimension_levels) == 1 \ + sorted_dimension_values = ( + pd.Index(sorted_dimension_values, name=dimension_levels[0]) + if len(dimension_levels) == 1 else pd.MultiIndex.from_tuples(sorted_dimension_values, names=dimension_levels) + ) def _apply_pagination(df): # This function applies sorting by using the sorted dimension values as an index to select values in the right @@ -138,7 +141,8 @@ def _apply_pagination(df): return dfx.loc[index_slice, :].append(dfx[isnull]) - return data_frame \ - .sort_values(data_frame.index.names[0], ascending=True) \ - .groupby(level=0) \ + return ( + data_frame.sort_values(data_frame.index.names[0], ascending=True) + .groupby(level=0) .apply(_apply_pagination) + ) diff --git a/fireant/queries/references.py b/fireant/queries/references.py index d9d95a88..5f416566 100644 --- a/fireant/queries/references.py +++ b/fireant/queries/references.py @@ -2,7 +2,7 @@ from fireant.dataset.fields import Field -from .field_helper import make_term_for_dimension +from .field_helper import make_term_for_field from .finders import find_field_in_modified_field @@ -23,7 +23,7 @@ def adapt_for_reference_query( def _replace_reference_dimension(dimension, offset_func, trunc_date=None): - ref_definition = offset_func(make_term_for_dimension(dimension, trunc_date)) + ref_definition = offset_func(make_term_for_field(dimension, trunc_date)) field = Field( alias=dimension.alias, definition=ref_definition, diff --git a/fireant/queries/special_cases.py b/fireant/queries/special_cases.py index 39179595..a2546be2 100644 --- a/fireant/queries/special_cases.py +++ b/fireant/queries/special_cases.py @@ -2,7 +2,6 @@ import pandas as pd from dateutil.relativedelta import relativedelta - from fireant.dataset.fields import DataType from fireant.dataset.filters import RangeFilter from fireant.dataset.intervals import DatetimeInterval @@ -25,35 +24,42 @@ def adjust_daterange_filter_for_rolling_window(dimensions, operations, filters): The filters applied to a slicer query :return: """ - has_datetime_dimension_in_first_dimension_pos = not len(dimensions) \ - or not dimensions[0].data_type == DataType.date + has_datetime_dimension_in_first_dimension_pos = ( + not len(dimensions) or not dimensions[0].data_type == DataType.date + ) if has_datetime_dimension_in_first_dimension_pos: return filters - has_rolling = any([isinstance(operation, RollingOperation) - for operation in operations]) + has_rolling = any( + [isinstance(operation, RollingOperation) for operation in operations] + ) if not has_rolling: return filters dim0 = dimensions[0] - filters_on_dim0 = [filter_ - for filter_ in filters - if isinstance(filter_, RangeFilter) - and str(filter_.definition.term) == str(dim0.definition)] + filters_on_dim0 = [ + filter_ + for filter_ in filters + if isinstance(filter_, RangeFilter) + and str(filter_.definition.term) == str(dim0.definition) + ] if not 0 < len(filters_on_dim0): return filters - max_rolling_period = max(operation.window - for operation in operations - if isinstance(operation, RollingOperation)) + max_rolling_period = max( + operation.window + for operation in operations + if isinstance(operation, RollingOperation) + ) for filter_ in filters_on_dim0: # Monkey patch the update start date on the date filter - print('stop') - args = {dim0.interval_key + 's': max_rolling_period} \ - if isinstance(dim0, DatetimeInterval) \ - and 'quarter' != dim0.interval_key \ - else {'months': max_rolling_period * 3} + print("stop") + args = ( + {dim0.interval_key + "s": max_rolling_period} + if isinstance(dim0, DatetimeInterval) and "quarter" != dim0.interval_key + else {"months": max_rolling_period * 3} + ) filter_.definition.start.value -= relativedelta(**args) return filters @@ -71,32 +77,52 @@ def adjust_dataframe_for_rolling_window(operations, data_frame): :param data_frame: :return: """ - has_rolling = any([isinstance(operation, RollingOperation) - for operation in operations]) + has_rolling = any( + [isinstance(operation, RollingOperation) for operation in operations] + ) if not has_rolling: return data_frame - max_rolling_period = max(operation.window - for operation in operations - if isinstance(operation, RollingOperation)) + max_rolling_period = max( + operation.window + for operation in operations + if isinstance(operation, RollingOperation) + ) if isinstance(data_frame.index, pd.DatetimeIndex): - return data_frame.iloc[max_rolling_period - 1:] + return data_frame.iloc[max_rolling_period - 1 :] - if isinstance(data_frame.index, pd.MultiIndex) \ - and isinstance(data_frame.index.levels[0], pd.DatetimeIndex): + if isinstance(data_frame.index, pd.MultiIndex) and isinstance( + data_frame.index.levels[0], pd.DatetimeIndex + ): num_levels = len(data_frame.index.levels) - return data_frame.groupby(level=list(range(1, num_levels))) \ - .apply(lambda df: df.iloc[max_rolling_period - 1:]) \ + return ( + data_frame.groupby(level=list(range(1, num_levels))) + .apply(lambda df: df.iloc[max_rolling_period - 1 :]) .reset_index(level=list(range(num_levels - 1)), drop=True) + ) return data_frame -def apply_to_query_args(dataset, database, table, joins, dimensions, metrics, operations, filters, references, orders): - filters = adjust_daterange_filter_for_rolling_window(dimensions, operations, filters) - return (dataset, database, table, joins, dimensions, metrics, operations, filters, references, orders) +def apply_to_query_args( + database, table, joins, dimensions, metrics, operations, filters, references, orders +): + filters = adjust_daterange_filter_for_rolling_window( + dimensions, operations, filters + ) + return ( + database, + table, + joins, + dimensions, + metrics, + operations, + filters, + references, + orders, + ) def apply_special_cases(f): diff --git a/fireant/queries/sql_transformer.py b/fireant/queries/sql_transformer.py index a20f82e2..1d8c24e0 100644 --- a/fireant/queries/sql_transformer.py +++ b/fireant/queries/sql_transformer.py @@ -15,8 +15,8 @@ flatten, ) from .field_helper import ( - make_term_for_dimension, - make_term_for_metrics, + make_term_for_field, + make_term_for_field, ) from .finders import ( find_and_group_references_for_dimensions, @@ -31,7 +31,6 @@ @apply_special_cases def make_slicer_query_with_totals_and_references( - dataset, database, table, joins, @@ -167,7 +166,7 @@ def make_slicer_query( # Add dimensions for dimension in dimensions: - dimension_term = make_term_for_dimension(dimension, database.trunc_date) + dimension_term = make_term_for_field(dimension, database.trunc_date) query = query.select(dimension_term) if not isinstance(dimension, Rollup): query = query.groupby(dimension_term) @@ -181,14 +180,15 @@ def make_slicer_query( ) # Add metrics - metric_terms = [make_term_for_metrics(metric) for metric in metrics] + metric_terms = [make_term_for_field(metric) for metric in metrics] if metric_terms: query = query.select(*metric_terms) # In the case that the orders are determined by a field that is not selected as a metric or dimension, then it needs # to be added to the query. select_aliases = {el.alias for el in query._selects} - for (orderby_term, orientation) in orders: + for (orderby_field, orientation) in orders: + orderby_term = make_term_for_field(orderby_field) query = query.orderby(orderby_term, order=orientation) if orderby_term.alias not in select_aliases: diff --git a/fireant/tests/queries/test_build_data_blending.py b/fireant/tests/queries/test_build_data_blending.py index a0700396..965e3345 100644 --- a/fireant/tests/queries/test_build_data_blending.py +++ b/fireant/tests/queries/test_build_data_blending.py @@ -9,6 +9,9 @@ # noinspection SqlDialectInspection,SqlNoDataSourceInspection +from pypika import Order + + class DataSetBlenderQueryBuilderTests(TestCase): maxDiff = None @@ -220,7 +223,6 @@ def test_apply_dimension_filter_on_UNmapped_dimension_field_filters_in_dataset_n def test_multiple_metrics_with_an_order_by_in_query_applies_order_to_wrapping_query( self, ): - # TODO test order by metric not selected queries = ( mock_dataset_blender.query() .widget( @@ -312,37 +314,36 @@ def test_apply_reference_to_blended_query(self): ) .dimension(f.day(mock_dataset_blender.fields.timestamp)) .reference(f.WeekOverWeek(mock_dataset_blender.fields.timestamp)) - # TODO enforce reference only on mapped dimensions ) sql = query.sql self.assertEqual(len(sql), 2) (base_query, ref_query) = sql - # with self.subTest("base query"): - self.assertEqual( - "SELECT " - '"sq0"."$timestamp" "$timestamp",' - '"sq1"."$candidate-spend"/"sq0"."$wins" "$candidate-spend-per-wins" ' - "FROM (" - "SELECT " - 'TRUNC("timestamp",\'DD\') "$timestamp",' - 'SUM("is_winner") "$wins" ' - 'FROM "politics"."politician" ' - 'GROUP BY "$timestamp" ORDER BY "$timestamp"' - ') "sq0" ' - "LEFT JOIN (" - "SELECT " - 'TRUNC("timestamp",\'DD\') "$timestamp",' - 'SUM("candidate_spend") "$candidate-spend" ' - 'FROM "politics"."politician_spend" ' - 'GROUP BY "$timestamp" ORDER BY "$timestamp"' - ') "sq1" ' - "ON " - '"sq0"."$timestamp"="sq1"."$timestamp" ' - 'ORDER BY "$timestamp"', - str(base_query), - ) + with self.subTest("base query"): + self.assertEqual( + "SELECT " + '"sq0"."$timestamp" "$timestamp",' + '"sq1"."$candidate-spend"/"sq0"."$wins" "$candidate-spend-per-wins" ' + "FROM (" + "SELECT " + 'TRUNC("timestamp",\'DD\') "$timestamp",' + 'SUM("is_winner") "$wins" ' + 'FROM "politics"."politician" ' + 'GROUP BY "$timestamp" ORDER BY "$timestamp"' + ') "sq0" ' + "LEFT JOIN (" + "SELECT " + 'TRUNC("timestamp",\'DD\') "$timestamp",' + 'SUM("candidate_spend") "$candidate-spend" ' + 'FROM "politics"."politician_spend" ' + 'GROUP BY "$timestamp" ORDER BY "$timestamp"' + ') "sq1" ' + "ON " + '"sq0"."$timestamp"="sq1"."$timestamp" ' + 'ORDER BY "$timestamp"', + str(base_query), + ) with self.subTest("ref query"): self.assertEqual( "SELECT " @@ -412,7 +413,52 @@ def test_apply_totals_to_blended_query(self): 'ORDER BY "$timestamp","$candidate-id"', str(base_query), ) - # with self.subTest("ref query"): + with self.subTest("totals query"): + self.assertEqual( + "SELECT " + '"sq0"."$timestamp" "$timestamp",' + '"sq0"."$candidate-id" "$candidate-id",' + '"sq1"."$candidate-spend"/"sq0"."$wins" "$candidate-spend-per-wins" ' + "FROM (" + "SELECT " + 'TRUNC("timestamp",\'DD\') "$timestamp",' + 'NULL "$candidate-id",' + 'SUM("is_winner") "$wins" ' + 'FROM "politics"."politician" ' + 'GROUP BY "$timestamp" ' + 'ORDER BY "$timestamp","$candidate-id"' + ') "sq0" ' + "LEFT JOIN (" + "SELECT " + 'TRUNC("timestamp",\'DD\') "$timestamp",' + 'NULL "$candidate-id",' + 'SUM("candidate_spend") "$candidate-spend" ' + 'FROM "politics"."politician_spend" ' + 'GROUP BY "$timestamp" ' + 'ORDER BY "$timestamp","$candidate-id"' + ') "sq1" ' + "ON " + '"sq0"."$timestamp"="sq1"."$timestamp" ' + 'AND "sq0"."$candidate-id"="sq1"."$candidate-id" ' + 'ORDER BY "$timestamp","$candidate-id"', + str(totals_query), + ) + + def test_blended_query_with_orderby_mapped_dimension(self): + queries = ( + mock_dataset_blender.query() + .widget( + f.ReactTable(mock_dataset_blender.fields["candidate-spend-per-wins"]) + ) + .dimension( + f.day(mock_dataset_blender.fields.timestamp), + mock_dataset_blender.fields["candidate-id"], + ) + .orderby(mock_dataset_blender.fields["candidate-id"], Order.desc) + ).sql + + self.assertEqual(len(queries), 1) + (query,) = queries self.assertEqual( "SELECT " '"sq0"."$timestamp" "$timestamp",' @@ -421,26 +467,26 @@ def test_apply_totals_to_blended_query(self): "FROM (" "SELECT " 'TRUNC("timestamp",\'DD\') "$timestamp",' - 'NULL "$candidate-id",' + '"candidate_id" "$candidate-id",' 'SUM("is_winner") "$wins" ' 'FROM "politics"."politician" ' - 'GROUP BY "$timestamp" ' + 'GROUP BY "$timestamp","$candidate-id" ' 'ORDER BY "$timestamp","$candidate-id"' ') "sq0" ' "LEFT JOIN (" "SELECT " 'TRUNC("timestamp",\'DD\') "$timestamp",' - 'NULL "$candidate-id",' + '"candidate_id" "$candidate-id",' 'SUM("candidate_spend") "$candidate-spend" ' 'FROM "politics"."politician_spend" ' - 'GROUP BY "$timestamp" ' + 'GROUP BY "$timestamp","$candidate-id" ' 'ORDER BY "$timestamp","$candidate-id"' ') "sq1" ' "ON " '"sq0"."$timestamp"="sq1"."$timestamp" ' 'AND "sq0"."$candidate-id"="sq1"."$candidate-id" ' - 'ORDER BY "$timestamp","$candidate-id"', - str(totals_query), + 'ORDER BY "$candidate-id" DESC', + str(query), ) def test_does_not_raise_SlicerException_when_a_dimension_is_not_mapped_for_unnecessary_secondary_datasets(