Skip to content

Commit

Permalink
Special case for dimension filter that's not selected.
Browse files Browse the repository at this point in the history
  • Loading branch information
gl3nn committed Sep 14, 2020
1 parent 4994f8b commit 74da3df
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 47 deletions.
96 changes: 49 additions & 47 deletions fireant/queries/builder/dataset_blender_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,6 @@ def _build_dataset_query(
operations,
share_dimensions,
):

if not dataset_metrics and not dataset_dimensions:
return []

dataset_references = map_blender_fields_to_dataset_fields(
references, field_map, dataset
)
Expand All @@ -223,7 +219,7 @@ def _build_dataset_query(
# TODO: It's possible that we have to adapt/map the operations for @apply_special_cases
dataset_operations = operations

result = make_slicer_query_with_totals_and_references(
return make_slicer_query_with_totals_and_references(
database=dataset.database,
table=dataset.table,
joins=dataset.joins,
Expand All @@ -235,9 +231,6 @@ def _build_dataset_query(
orders=[],
share_dimensions=dataset_share_dimensions,
)
if result[0].get_sql() == "":
return []
return result


def _blender_join_criteria(
Expand Down Expand Up @@ -381,8 +374,8 @@ def sql(self):
self._validate()

datasets, field_maps = _datasets_and_field_maps(self.dataset, self._filters)
selected_blender_dimensions = self.dimensions

selected_blender_dimensions = self.dimensions
selected_blender_dimensions_aliases = {
dimension.alias for dimension in selected_blender_dimensions
}
Expand All @@ -391,22 +384,29 @@ def sql(self):
metric.alias for metric in selected_blender_metrics
}

operations = find_operations_for_widgets(self._widgets)
share_dimensions = find_share_dimensions(
selected_blender_dimensions, operations
)
non_set_filters = omit_set_filters(self._filters)

# Add fields to be ordered on, to metrics if they aren't yet selected in metrics or dimensions
for field, orientation in self.orders:
# To think about: if the selected order_by field is a dimension, should we add it to dimensions?
for field, _ in self.orders:
if (
field.alias not in selected_blender_metrics_aliases
and field.alias not in selected_blender_dimensions_aliases
):
selected_blender_metrics.append(field)

selected_metrics_as_dataset_fields = find_dataset_fields(
selected_blender_metrics
)
operations = find_operations_for_widgets(self._widgets)
share_dimensions = find_share_dimensions(
selected_blender_dimensions, operations
)
non_set_filters = omit_set_filters(self._filters)
# Needed dimensions in final query as tuples of (dimension, is_selected_dimension)
needed_blender_dimensions = [(dimension_field, True) for dimension_field in selected_blender_dimensions]
# Add dimension filters which are not selected to the pool of needed dimensions
for filter_ in non_set_filters:
if not is_metric_field(filter_.field) and (filter_.field.alias not in selected_blender_dimensions_aliases):
needed_blender_dimensions.append((filter_.field, False))

selected_metrics_as_dataset_fields = find_dataset_fields(selected_blender_metrics)

# Determine for each dataset which metrics and dimensions need to be selected
dataset_dimensions = [[] for _ in range(len(datasets))]
Expand Down Expand Up @@ -446,28 +446,25 @@ def sql(self):
# Second map the dimensions and find the dimensions which are unique to a dataset. Include those.
# Also save for each dimension of which datasets it is part of.
dimensions_dataset_info = []
print("HERE")
for dimension in selected_blender_dimensions:
print(dimension)
for blender_dimension_field, is_selected_dimension in needed_blender_dimensions:
dimension_dataset_info = []

for dataset_index, dataset in enumerate(datasets):
mapped_dimension = map_blender_field_to_dataset_field(
dimension, field_maps[dataset_index], dataset
blender_dimension_field, field_maps[dataset_index], dataset
)

if mapped_dimension is not None:
dimension_dataset_info.append((dataset_index, mapped_dimension))
dimension_dataset_info.append((dataset_index, mapped_dimension, is_selected_dimension))

if len(dimension_dataset_info) == 0:
# This case should only happen when using sets, otherwise I would have raised the following exception:
# raise Exception("Dimension requested that was not part of any dataset.")
pass
elif len(dimension_dataset_info) == 1:
# This is the only dataset that has this dimension, assign it
dataset_index, _ = dimension_dataset_info[0]
dataset_index, _, _ = dimension_dataset_info[0]
dataset_included_in_final_query[dataset_index] = True
# dataset_dimensions[dataset_index].add(mapped_dimension)

if dimension_dataset_info:
dimensions_dataset_info.append(dimension_dataset_info)
Expand All @@ -477,11 +474,12 @@ def sql(self):
for dimension_dataset_info in dimensions_dataset_info:
dimension_accounted_for = False
first_dataset_that_has_the_dimension = None
for (dataset_index, mapped_dimension) in dimension_dataset_info:
for (dataset_index, mapped_dimension, is_selected_dimension) in dimension_dataset_info:
# If the dataset is already part of the final query, add this dimension
if dataset_included_in_final_query[dataset_index]:
dimension_accounted_for = True
dataset_dimensions[dataset_index].append(mapped_dimension)
if is_selected_dimension:
dataset_dimensions[dataset_index].append(mapped_dimension)

# Update first_dataset_that_has_the_dimension if needed
if (
Expand All @@ -491,27 +489,36 @@ def sql(self):
first_dataset_that_has_the_dimension = (
dataset_index,
mapped_dimension,
is_selected_dimension,
)

if not dimension_accounted_for:
# Dimension not yet accounted for! Take first dataset that has the dimension.
dataset_index, mapped_dimension = first_dataset_that_has_the_dimension
dataset_dimensions[dataset_index].append(mapped_dimension)
dataset_index, mapped_dimension, is_selected_dimension = first_dataset_that_has_the_dimension
dataset_included_in_final_query[dataset_index] = True
if is_selected_dimension:
dataset_dimensions[dataset_index].append(mapped_dimension)

datasets_queries = []
filtered_field_maps = []
for dataset_index, dataset in enumerate(datasets):
datasets_queries.append(
_build_dataset_query(
dataset,
field_maps[dataset_index],
dataset_metrics[dataset_index],
dataset_dimensions[dataset_index],
dataset_filters[dataset_index],
self._references,
operations,
share_dimensions,
if dataset_included_in_final_query[dataset_index]:
datasets_queries.append(
_build_dataset_query(
dataset,
field_maps[dataset_index],
dataset_metrics[dataset_index],
dataset_dimensions[dataset_index],
dataset_filters[dataset_index],
self._references,
operations,
share_dimensions,
)
)
)
# Filter the field maps of which the dataset is not going to be in the final query.
filtered_field_maps.append(field_maps[dataset_index])



"""
A dataset query can yield one or more sql queries, depending on how many types of references or dimensions
Expand All @@ -535,15 +542,10 @@ def sql(self):
# There will be the same amount of query sets as the longest length of queries for a single dataset
query_sets = [[] for _ in range(per_dataset_queries_count)]

filtered_field_maps = []
# Add the queries returned for each dataset to the correct queryset
for dataset_index, dataset_queries in enumerate(datasets_queries):
if len(dataset_queries) != 0:
# Only include datasets which resulted in queries, filter field maps the same way.
filtered_field_maps.append(field_maps[dataset_index])

for i, query_set in enumerate(query_sets):
query_set.append(dataset_queries[i])
for i, query in enumerate(dataset_queries):
query_sets[i].append(query)

blended_queries = []
for queryset in query_sets:
Expand Down
68 changes: 68 additions & 0 deletions fireant/tests/queries/test_data_blending_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,74 @@ def test_select_unmapped_dimension_from_primary_but_only_metric_from_secondary(
str(query),
)

def test_filter_unmapped_dimension_from_primary_with_only_metric_selected_from_secondary(
self,
):
db = Database()
t0, t1 = Tables("test0", "test1")
primary_ds = DataSet(
table=t0,
database=db,
fields=[
Field(
"timestamp",
label="Timestamp",
definition=t0.timestamp,
data_type=DataType.date,
),
Field(
"account",
label="Account",
definition=t0.account,
data_type=DataType.number,
),
],
)
secondary_ds = DataSet(
table=t1,
database=db,
fields=[
Field(
"timestamp",
label="Timestamp",
definition=t1.timestamp,
data_type=DataType.date,
),
Field(
"metric1",
label="Metric1",
definition=fn.Sum(t1.metric),
data_type=DataType.number,
),
],
)
blend_ds = primary_ds.blend(secondary_ds).on(
{primary_ds.fields.timestamp: secondary_ds.fields.timestamp}
)

sql = (
blend_ds.query()
.dimension(blend_ds.fields.timestamp)
.widget(ReactTable(blend_ds.fields.metric1))
.filter(blend_ds.fields.account.isin(["123"]))
).sql

(query,) = sql
self.assertEqual(
'SELECT "sq0"."$timestamp" "$timestamp","sq1"."$metric1" "$metric1" '
'FROM ('
'SELECT "timestamp" "$timestamp" FROM "test0" '
'WHERE "account" IN (\'123\') '
'GROUP BY "$timestamp") "sq0" '
'LEFT JOIN ('
'SELECT "timestamp" "$timestamp",SUM("metric") "$metric1" FROM "test1" '
'GROUP BY "$timestamp"'
') "sq1" ON "sq0"."$timestamp"="sq1"."$timestamp" '
'ORDER BY "$timestamp" '
'LIMIT 200000',
str(query),
)

def test_select_unmapped_dimension_from_primary_and_metrics_from_both_datasets(
self,
):
Expand Down

0 comments on commit 74da3df

Please sign in to comment.