Skip to content

Commit

Permalink
Merge pull request #277 from kayak/data_blending_no_dimension_restric…
Browse files Browse the repository at this point in the history
…tion

Data blending dimensions
  • Loading branch information
twheys committed Jan 21, 2020
2 parents ef9be15 + aaf04ed commit 8cc95b6
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 29 deletions.
36 changes: 18 additions & 18 deletions fireant/queries/builder/dataset_blender_query_builder.py
Expand Up @@ -5,7 +5,6 @@
List,
)

from fireant.exceptions import DataSetException
from fireant.queries.builder.dataset_query_builder import DataSetQueryBuilder
from fireant.queries.finders import (
find_dataset_metrics,
Expand Down Expand Up @@ -37,7 +36,7 @@ def _flatten_blend_datasets(dataset) -> List:
return zip(*_flatten_blend_datasets(blender))


def _replace_field(dimension, field_map=None, dataset=None, omit_umapped=False):
def _replace_field(dimension, field_map=None, dataset=None):
root_dimension = find_field_in_modified_field(dimension)
if root_dimension is not dimension:
# Handle modified dimensions
Expand All @@ -47,18 +46,12 @@ def _replace_field(dimension, field_map=None, dataset=None, omit_umapped=False):
if field_map is not None and dimension in field_map:
return field_map.get(dimension, None)

if dataset is not None and dimension.alias in dataset.fields:
return dataset.fields[dimension.alias]
if dataset is not None:
if dimension.definition is not None and dimension.definition in dataset.fields:
return dimension.definition

if dimension.definition is not None:
return dimension.definition

if not omit_umapped and dimension not in field_map:
raise DataSetException(
"Invalid Dimension {}. Dimensions must be mapped in order to be used in a blender query.".format(
dimension.alias
)
)
if dimension.alias in dataset.fields:
return dataset.fields[dimension.alias]

return field_map.get(dimension, None)

Expand All @@ -80,12 +73,16 @@ def _build_dataset_query(dataset, field_map, metrics, dimensions, filters, refer
return None

blended_dimensions = [
_replace_field(dimension, field_map, dataset) for dimension in dimensions
dimension
for dimension in [
_replace_field(dimension, field_map, dataset) for dimension in dimensions
]
if dimension is not None
]

blended_filters = []
for fltr in filters:
filter_field = _replace_field(fltr.field, field_map, dataset, omit_umapped=True)
filter_field = _replace_field(fltr.field, field_map, dataset)

if filter_field not in dataset.fields:
continue
Expand Down Expand Up @@ -114,9 +111,12 @@ def _join_criteria_for_blender_subqueries(primary, secondary, dimensions, field_
join_criteria = []

for dimension in dimensions:
mapped_dimension = _replace_field(dimension, field_map)
p_alias = alias_selector(dimension.alias)
s_alias = alias_selector(mapped_dimension.alias)
primary_dimension = find_field_in_modified_field(dimension).definition
if primary_dimension not in field_map:
continue
secondary_dimension = field_map[primary_dimension]
p_alias = alias_selector(primary_dimension.alias)
s_alias = alias_selector(secondary_dimension.alias)
join_criteria.append(primary[p_alias] == secondary[s_alias])

return reduce(lambda a, b: a & b, join_criteria)
Expand Down
50 changes: 39 additions & 11 deletions fireant/tests/queries/test_build_data_blending.py
Expand Up @@ -103,6 +103,45 @@ def test_using_datablender_metric_builds_query(self):
str(queries[0]),
)

def test_using_datablender_builds_query_with_mapped_and_unmapped_dimensions(self):
queries = (
mock_dataset_blender.query()
.widget(
f.ReactTable(mock_dataset_blender.fields["candidate-spend-per-wins"])
)
.dimension(
f.day(mock_dataset_blender.fields.timestamp),
mock_dataset_blender.fields.political_party,
)
).sql

self.assertEqual(len(queries), 1)
self.assertEqual(
"SELECT "
'"sq0"."$timestamp" "$timestamp",'
'"sq0"."$political_party" "$political_party",'
'"sq1"."$candidate-spend"/"sq0"."$wins" "$candidate-spend-per-wins" '
"FROM ("
"SELECT "
'TRUNC("timestamp",\'DD\') "$timestamp",'
'"political_party" "$political_party",'
'SUM("is_winner") "$wins" '
'FROM "politics"."politician" '
'GROUP BY "$timestamp","$political_party" ORDER BY "$timestamp","$political_party"'
') "sq0" '
"LEFT JOIN ("
"SELECT "
'TRUNC("timestamp",\'DD\') "$timestamp",'
'SUM("candidate_spend") "$candidate-spend" '
'FROM "politics"."politician_spend" '
'GROUP BY "$timestamp" ORDER BY "$timestamp"'
') "sq1" '
"ON "
'"sq0"."$timestamp"="sq1"."$timestamp" '
'ORDER BY "$timestamp","$political_party"',
str(queries[0]),
)

def test_apply_metric_filter_to_dataset_field_filters_in_nested_dataset_query(self):
queries = (
mock_dataset_blender.query()
Expand Down Expand Up @@ -494,14 +533,3 @@ def test_does_not_raise_SlicerException_when_a_dimension_is_not_mapped_for_unnec
.widget(f.ReactTable(mock_dataset_blender.fields["votes"]))
.dimension(mock_dataset_blender.fields["district-id"])
).sql

def test_raises_SlicerException_when_a_dimension_from_a_necessary_secondary_dataset_is_not_mapped(
self,
):
with self.assertRaises(DataSetException):
queries = (
mock_dataset_blender.query()
.widget(f.ReactTable(mock_dataset_blender.fields["candidate-spend"]))
.dimension(f.day(mock_dataset_blender.fields.timestamp))
.dimension(mock_dataset_blender.fields["district-id"])
).sql

0 comments on commit 8cc95b6

Please sign in to comment.