Skip to content

Commit

Permalink
Merge pull request #227 from kayak/add_support_for_reference_weekday_…
Browse files Browse the repository at this point in the history
…alignment

Fixed references to align to weekday
  • Loading branch information
twheys committed Apr 3, 2019
2 parents 849a025 + 017f451 commit f6f9fa2
Show file tree
Hide file tree
Showing 10 changed files with 283 additions and 267 deletions.
5 changes: 4 additions & 1 deletion fireant/dataset/intervals.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ def __hash__(self):
return hash(repr(self))


DATETIME_INTERVALS = ('hour', 'day', 'week', 'month', 'quarter', 'year')


class DatetimeInterval(DimensionModifier):
def __init__(self, dimension, interval_key):
super().__init__(dimension)
Expand All @@ -37,4 +40,4 @@ def __hash__(self):


hour, day, week, month, quarter, year = [partial(DatetimeInterval, interval_key=key)
for key in ('hour', 'day', 'week', 'month', 'quarter', 'year')]
for key in DATETIME_INTERVALS]
4 changes: 2 additions & 2 deletions fireant/dataset/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ def __repr__(self):


class ReferenceType(object):
def __init__(self, key, label, time_unit: str, interval: int):
self.alias = key
def __init__(self, alias, label, time_unit: str, interval: int):
self.alias = alias
self.label = label

self.time_unit = time_unit
Expand Down
4 changes: 2 additions & 2 deletions fireant/queries/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pypika import Order
from . import special_cases
from .execution import fetch_data
from .field_helper import make_orders_for_dimensions
from .finders import (
find_and_group_references_for_dimensions,
find_and_replace_reference_dimensions,
Expand All @@ -27,7 +28,6 @@
from .pagination import paginate
from .sql_transformer import (
make_latest_query,
make_orders_for_dimensions,
make_slicer_query,
make_slicer_query_with_totals_and_references,
)
Expand Down Expand Up @@ -178,7 +178,7 @@ def _validate(self):

@property
def reference_groups(self):
return list(find_and_group_references_for_dimensions(self._references).values())
return list(find_and_group_references_for_dimensions(self._dimensions, self._references).values())

@property
def sql(self):
Expand Down
48 changes: 48 additions & 0 deletions fireant/queries/field_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from fireant.dataset.intervals import DatetimeInterval
from fireant.utils import alias_selector


def make_term_for_metrics(metric):
return metric.definition.as_(alias_selector(metric.alias))


def make_term_for_dimension(dimension, window=None):
"""
Makes a list of pypika terms for a given slicer definition.
:param dimension:
A slicer dimension.
:param window:
A window function to apply to the dimension definition if it is a continuous dimension.
:return:
a list of terms required to select and group by in a SQL query given a slicer dimension. This list will contain
either one or two elements. A second element will be included if the dimension has a definition for its display
field.
"""
f_alias = alias_selector(dimension.alias)

if window and isinstance(dimension, DatetimeInterval):
return window(dimension.definition, dimension.interval_key).as_(f_alias)

return dimension.definition.as_(f_alias)


def make_orders_for_dimensions(dimensions):
"""
Creates a list of ordering for a slicer query based on a list of dimensions. The dimensions's display definition is
used preferably as the ordering term but the definition is used for dimensions that do not have a display
definition.
:param dimensions:
:return:
a list of tuple pairs like (term, orientation) for ordering a SQL query where the first element is the term
to order by and the second is the orientation of the ordering, ASC or DESC.
"""

# Use the same function to make the definition terms to force it to be consistent.
# Always take the last element in order to prefer the display definition.
definitions = [make_term_for_dimension(dimension)
for dimension in dimensions]

return [(definition, None)
for definition in definitions]
24 changes: 22 additions & 2 deletions fireant/queries/finders.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
toposort_flatten,
)

from fireant.dataset.intervals import (
DATETIME_INTERVALS,
DatetimeInterval,
)
from fireant.dataset.modifiers import (
OmitFromRollup,
Rollup,
Expand Down Expand Up @@ -186,13 +190,21 @@ def find_and_replace_reference_dimensions(references, dimensions):
return reference_copies


def find_and_group_references_for_dimensions(references):
interval_weekdays = {
'month': ('week', 4),
'quarter': ('week', 4 * 3),
'year': ('week', 4 * 13),
}


def find_and_group_references_for_dimensions(dimensions, references):
"""
Finds all of the references for dimensions and groups them by dimension, interval unit, number of intervals.
This structure reflects how the references need to be joined to the slicer query. References of the same
type (WoW, WoW.delta, WoW.delta_percent) can share a join query.
:param dimensions:
:param references:
:return:
Expand All @@ -207,9 +219,17 @@ def find_and_group_references_for_dimensions(references):
(Dimension(date_7), 'days', 1): [DoD, DoD.delta_percent],
}
"""
align_weekdays = dimensions \
and isinstance(dimensions[0], DatetimeInterval) \
and -1 < DATETIME_INTERVALS.index(dimensions[0].interval_key) < 3

def get_dimension_time_unit_and_interval(reference):
return reference.field, reference.time_unit, reference.interval
defaults = (reference.time_unit, 1)
time_unit, interval_muliplier = interval_weekdays.get(reference.time_unit, defaults) \
if align_weekdays \
else defaults

return reference.field, time_unit, interval_muliplier * reference.interval

distinct_references = ordered_distinct_list(references)
return groupby(distinct_references, get_dimension_time_unit_and_interval)
60 changes: 14 additions & 46 deletions fireant/queries/reference_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,63 +2,44 @@
from functools import partial

from fireant.dataset.fields import Field
from fireant.dataset.intervals import (
DatetimeInterval,
week,
)
from fireant.dataset.references import YearOverYear
from pypika.terms import (
ComplexCriterion,
Criterion,
Term,
)
from .field_helper import make_term_for_dimension


def adapt_for_reference_query(reference_parts, database, dimensions, metrics, filters, references):
if reference_parts is None:
return database, dimensions, metrics, filters
return dimensions, metrics, filters

ref_dimension, time_unit, interval = reference_parts
# Each group is guaranteed to have one reference and they will all be grouped together by reference type
ref_database = _make_reference_database(database,
ref_dimension,
time_unit,
interval)

ref_metrics = _make_reference_metrics(metrics,
references[0].reference_type.alias)
offset_func = partial(database.date_add,
date_part=time_unit,
interval=interval)
ref_dimensions = _make_reference_dimensions(dimensions,
ref_dimensions = _make_reference_dimensions(database,
dimensions,
ref_dimension,
offset_func)
ref_filters = _make_reference_filters(filters,
ref_dimension,
offset_func)
return ref_database, ref_dimensions, ref_metrics, ref_filters


def _make_reference_database(database, ref_dimension, time_unit, interval):
# NOTE: In the case of weekly intervals with YoY references, the trunc date function needs to adjust for weekday
# to keep things aligned. To do this, the date is first shifted forward a year before being truncated by week
# and then shifted back.
offset_for_weekday = isinstance(ref_dimension, DatetimeInterval) \
and ref_dimension.interval_key == week.keywords['interval_key'] \
and YearOverYear.time_unit == time_unit

if offset_for_weekday:
return _monkey_patch_align_weekdays(database, time_unit, interval)

return database
return ref_dimensions, ref_metrics, ref_filters


def _make_reference_dimensions(dimensions, ref_dimension, offset_func):
def _make_reference_dimensions(database, dimensions, ref_dimension, offset_func):
def replace_reference_dimension(dimension):
ref_dimension = copy.copy(dimension)
if hasattr(ref_dimension, 'dimension'):
ref_dimension.dimension = copy.copy(dimension.dimension)
ref_dimension.definition = offset_func(ref_dimension.definition)
return ref_dimension
ref_dimension_copy = copy.copy(dimension)
if hasattr(ref_dimension_copy, 'dimension'):
ref_dimension_copy.dimension = copy.copy(dimension.dimension)

ref_definition = make_term_for_dimension(ref_dimension_copy, database.trunc_date)
ref_dimension_copy.definition = offset_func(ref_definition)
return ref_dimension_copy

return [replace_reference_dimension(dimension)
if dimension is ref_dimension
Expand Down Expand Up @@ -99,19 +80,6 @@ def _make_reference_filters(filters, ref_dimension, offset_func):
return reference_filters


def _monkey_patch_align_weekdays(database, time_unit, interval):
original_trunc_date = database.__class__.trunc_date

def trunc_date(definition, _):
offset = original_trunc_date(database, definition, week.keywords['interval_key'])
return database.date_add(offset, time_unit, -interval)

# Copy the database to avoid side effects then monkey patch the trunc date function with the correction for weekday
database = copy.deepcopy(database)
database.trunc_date = trunc_date
return database


def _apply_to_term_in_criterion(target: Term,
replacement: Term,
criterion: Criterion):
Expand Down
Loading

0 comments on commit f6f9fa2

Please sign in to comment.