From be69572cac61aa13408e21ea0567e25a0254656a Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Wed, 3 Dec 2025 08:06:08 -0800 Subject: [PATCH] Move default_time_dimension from Metric to Model Breaking change: default_time_dimension and default_grain are now model-level settings, not per-metric. This aligns with MetricFlow's approach where all metrics in a semantic model share the same default time dimension. When a model has default_time_dimension set and no time dimension from that model is in the query, auto-include it. Also applies default_grain if specified. User-provided time dimensions override the default. --- docs/advanced-features.qmd | 29 ++- docs/models.qmd | 2 + docs/python-api.qmd | 4 - sidemantic/adapters/metricflow.py | 24 ++- sidemantic/adapters/omni.py | 5 +- sidemantic/adapters/sidemantic.py | 14 +- sidemantic/core/metric.py | 6 - sidemantic/core/model.py | 10 + sidemantic/sql/generator.py | 55 ++++- .../test_metadata_adapter_roundtrip.py | 15 +- tests/core/test_sql_definitions.py | 6 +- tests/metrics/test_default_time_dimension.py | 201 ++++++++++++++++++ tests/test_metadata_fields.py | 77 ++++--- 13 files changed, 375 insertions(+), 73 deletions(-) create mode 100644 tests/metrics/test_default_time_dimension.py diff --git a/docs/advanced-features.qmd b/docs/advanced-features.qmd index 51919bd8..d4b642bb 100644 --- a/docs/advanced-features.qmd +++ b/docs/advanced-features.qmd @@ -254,15 +254,30 @@ metrics: non_additive_dimension: "date" # Don't sum averages across time! ``` -### Default Dimensions +### Default Time Dimension -Specify default time dimension and granularity: +Specify default time dimension and granularity at the model level. When querying metrics from this model without explicitly selecting a time dimension, the default will be auto-included: ```yaml -metrics: - - name: daily_revenue - agg: sum - sql: amount +models: + - name: orders + table: orders_table default_time_dimension: "order_date" - default_grain: "day" + default_grain: "month" + dimensions: + - name: order_date + type: time + granularity: day + metrics: + - name: revenue + agg: sum + sql: amount +``` + +```python +# Auto-includes orders.order_date__month +layer.compile(metrics=["orders.revenue"]) + +# Override with different granularity +layer.compile(metrics=["orders.revenue"], dimensions=["orders.order_date__week"]) ``` diff --git a/docs/models.qmd b/docs/models.qmd index dd77de9f..5e47bfa9 100644 --- a/docs/models.qmd +++ b/docs/models.qmd @@ -58,6 +58,8 @@ orders = Model( - **relationships**: Relationships to other models (see [Relationships](relationships.qmd)) - **dimensions**: Attributes for grouping and filtering - **metrics**: Model-level aggregations +- **default_time_dimension**: Default time dimension to auto-include in queries +- **default_grain**: Default granularity for the time dimension (`hour`, `day`, `week`, `month`, `quarter`, `year`) ## Dimensions diff --git a/docs/python-api.qmd b/docs/python-api.qmd index eab5f2ac..3eb12828 100644 --- a/docs/python-api.qmd +++ b/docs/python-api.qmd @@ -331,8 +331,6 @@ formatted_revenue = Metric( value_format_name="usd", drill_fields=["order_id", "customer_id", "order_date"], non_additive_dimension="customer_id", - default_time_dimension="order_date", - default_grain="day" ) # With inheritance @@ -386,8 +384,6 @@ extended_revenue = Metric( - **value_format_name**: Named format (e.g., `"usd"`, `"percent"`) - **drill_fields**: List of field names for drill-down - **non_additive_dimension**: Dimension this metric cannot be summed across -- **default_time_dimension**: Default time dimension for this metric -- **default_grain**: Default time granularity (`hour`, `day`, `week`, `month`, `quarter`, `year`) #### Inheritance diff --git a/sidemantic/adapters/metricflow.py b/sidemantic/adapters/metricflow.py index 5861c6f4..f6adbc1e 100644 --- a/sidemantic/adapters/metricflow.py +++ b/sidemantic/adapters/metricflow.py @@ -159,6 +159,11 @@ def _parse_semantic_model(self, model_def: dict) -> Model | None: # Parse inheritance extends = meta.get("extends") + # Parse default time dimension (MetricFlow uses defaults.agg_time_dimension) + defaults = model_def.get("defaults", {}) + default_time_dimension = defaults.get("agg_time_dimension") + default_grain = meta.get("default_grain") + return Model( name=name, table=table, @@ -169,6 +174,8 @@ def _parse_semantic_model(self, model_def: dict) -> Model | None: metrics=measures, segments=segments, extends=extends, + default_time_dimension=default_time_dimension, + default_grain=default_grain, ) def _parse_dimension(self, dim_def: dict) -> Dimension | None: @@ -258,8 +265,6 @@ def _parse_measure(self, measure_def: dict) -> Metric | None: format_str = meta.get("format") value_format_name = meta.get("value_format_name") drill_fields = meta.get("drill_fields") - default_time_dimension = meta.get("default_time_dimension") - default_grain = meta.get("default_grain") # Parse non_additive_dimension non_additive = measure_def.get("non_additive_dimension") @@ -282,8 +287,6 @@ def _parse_measure(self, measure_def: dict) -> Metric | None: value_format_name=value_format_name, drill_fields=drill_fields, non_additive_dimension=non_additive_dimension, - default_time_dimension=default_time_dimension, - default_grain=default_grain, ) def _parse_metric(self, metric_def: dict) -> Metric | None: @@ -543,15 +546,16 @@ def _export_semantic_model(self, model: Model) -> dict: measure_def["meta"]["drill_fields"] = measure.drill_fields if measure.non_additive_dimension: measure_def["non_additive_dimension"] = {"name": measure.non_additive_dimension} - if measure.default_time_dimension: - measure_def["meta"] = measure_def.get("meta", {}) - measure_def["meta"]["default_time_dimension"] = measure.default_time_dimension - if measure.default_grain: - measure_def["meta"] = measure_def.get("meta", {}) - measure_def["meta"]["default_grain"] = measure.default_grain result["measures"].append(measure_def) + # Export model-level default_time_dimension + if model.default_time_dimension: + result["defaults"] = {"agg_time_dimension": model.default_time_dimension} + if model.default_grain: + result["meta"] = result.get("meta", {}) + result["meta"]["default_grain"] = model.default_grain + # Export segments (as meta since MetricFlow doesn't have native segment support) if model.segments: result["meta"] = result.get("meta", {}) diff --git a/sidemantic/adapters/omni.py b/sidemantic/adapters/omni.py index 3ff2a7d8..b5cbdc8f 100644 --- a/sidemantic/adapters/omni.py +++ b/sidemantic/adapters/omni.py @@ -533,9 +533,8 @@ def _export_view(self, model: Model) -> dict[str, Any]: time_offset = metric.time_offset or self._comparison_type_to_offset(metric.comparison_type) # Find the time dimension to apply the offset to - # Typically this would be the model's default time dimension - # For now, use a generic name - user may need to adjust - time_field = metric.default_time_dimension or "created_at" + # Use the model's default time dimension + time_field = model.default_time_dimension or "created_at" measure_def["filters"] = { time_field: { diff --git a/sidemantic/adapters/sidemantic.py b/sidemantic/adapters/sidemantic.py index cfdc8d9e..fe535983 100644 --- a/sidemantic/adapters/sidemantic.py +++ b/sidemantic/adapters/sidemantic.py @@ -249,8 +249,6 @@ def _parse_model(self, model_def: dict) -> Model | None: value_format_name=measure_def.get("value_format_name"), drill_fields=measure_def.get("drill_fields"), non_additive_dimension=measure_def.get("non_additive_dimension"), - default_time_dimension=measure_def.get("default_time_dimension"), - default_grain=measure_def.get("default_grain"), ) measures.append(measure) @@ -313,6 +311,8 @@ def _parse_model(self, model_def: dict) -> Model | None: metrics=measures, segments=segments, pre_aggregations=pre_aggregations, + default_time_dimension=model_def.get("default_time_dimension"), + default_grain=model_def.get("default_grain"), ) def _parse_metric(self, metric_def: dict) -> Metric | None: @@ -424,12 +424,14 @@ def _export_model(self, model: Model) -> dict: measure_def["drill_fields"] = measure.drill_fields if measure.non_additive_dimension: measure_def["non_additive_dimension"] = measure.non_additive_dimension - if measure.default_time_dimension: - measure_def["default_time_dimension"] = measure.default_time_dimension - if measure.default_grain: - measure_def["default_grain"] = measure.default_grain result["metrics"].append(measure_def) + # Export model-level default_time_dimension + if model.default_time_dimension: + result["default_time_dimension"] = model.default_time_dimension + if model.default_grain: + result["default_grain"] = model.default_grain + # Export segments if model.segments: result["segments"] = [] diff --git a/sidemantic/core/metric.py b/sidemantic/core/metric.py index 82451635..9c4c0a67 100644 --- a/sidemantic/core/metric.py +++ b/sidemantic/core/metric.py @@ -144,12 +144,6 @@ def handle_expr_and_parse_agg(cls, data): description="Dimension across which this metric cannot be summed (e.g., time for averages)", ) - # Defaults - default_time_dimension: str | None = Field(None, description="Default time dimension for this metric") - default_grain: Literal["hour", "day", "week", "month", "quarter", "year"] | None = Field( - None, description="Default time granularity for this metric" - ) - def __hash__(self) -> int: return hash((self.name, self.agg, self.sql)) diff --git a/sidemantic/core/model.py b/sidemantic/core/model.py index 772cacba..0ac0ea75 100644 --- a/sidemantic/core/model.py +++ b/sidemantic/core/model.py @@ -1,5 +1,7 @@ """Model definitions.""" +from typing import Literal + from pydantic import BaseModel, Field from sidemantic.core.dimension import Dimension @@ -35,6 +37,14 @@ class Model(BaseModel): default_factory=list, description="Pre-aggregation definitions for query optimization" ) + # Default time dimension for all metrics in this model + default_time_dimension: str | None = Field( + None, description="Default time dimension for metrics (auto-included in queries)" + ) + default_grain: Literal["hour", "day", "week", "month", "quarter", "year"] | None = Field( + None, description="Default time granularity when using default_time_dimension" + ) + def __init__(self, **data): super().__init__(**data) diff --git a/sidemantic/sql/generator.py b/sidemantic/sql/generator.py index a6e13339..d7695583 100644 --- a/sidemantic/sql/generator.py +++ b/sidemantic/sql/generator.py @@ -54,6 +54,56 @@ def _date_trunc(self, granularity: str, column_expr: str) -> str: date_trunc = exp.DateTrunc(this=col, unit=exp.Literal.string(granularity)) return date_trunc.sql(dialect=self.dialect) + def _apply_default_time_dimensions(self, metrics: list[str], dimensions: list[str]) -> list[str]: + """Auto-include default_time_dimension from models if not already present. + + If a model has default_time_dimension set and no time dimension from that + model is already in the dimensions list, add it with the default_grain. + + Args: + metrics: List of metric references + dimensions: List of dimension references + + Returns: + Updated dimensions list with default time dimensions added + """ + # Extract which models already have time dimensions in the query + models_with_time_dims = set() + for dim_ref in dimensions: + if "." in dim_ref: + model_name, dim_part = dim_ref.split(".", 1) + # Strip granularity suffix if present + dim_name = dim_part.split("__")[0] + model = self.graph.get_model(model_name) + if model: + dim = model.get_dimension(dim_name) + if dim and dim.type == "time": + models_with_time_dims.add(model_name) + + # Check each model referenced by metrics for default_time_dimension + added_dims = [] + models_checked = set() + for metric_ref in metrics: + if "." in metric_ref: + model_name, _ = metric_ref.split(".") + if model_name in models_checked: + continue + models_checked.add(model_name) + + model = self.graph.get_model(model_name) + if model and model.default_time_dimension: + # Only add if this model doesn't already have a time dimension + if model_name not in models_with_time_dims: + time_dim_ref = f"{model_name}.{model.default_time_dimension}" + # Apply default_grain if specified + if model.default_grain: + time_dim_ref = f"{time_dim_ref}__{model.default_grain}" + if time_dim_ref not in dimensions and time_dim_ref not in added_dims: + added_dims.append(time_dim_ref) + models_with_time_dims.add(model_name) + + return dimensions + added_dims + def generate_view( self, view_name: str, @@ -114,12 +164,15 @@ def generate( SQL query string """ metrics = metrics or [] - dimensions = dimensions or [] + dimensions = list(dimensions) if dimensions else [] filters = filters or [] segments = segments or [] parameters = parameters or {} aliases = aliases or {} + # Auto-include default_time_dimension from metrics if not already present + dimensions = self._apply_default_time_dimensions(metrics, dimensions) + # Resolve segments to SQL filters segment_filters = self._resolve_segments(segments) filters = filters + segment_filters diff --git a/tests/adapters/test_metadata_adapter_roundtrip.py b/tests/adapters/test_metadata_adapter_roundtrip.py index 2238ada7..6b65d155 100644 --- a/tests/adapters/test_metadata_adapter_roundtrip.py +++ b/tests/adapters/test_metadata_adapter_roundtrip.py @@ -16,6 +16,8 @@ def test_metadata_roundtrip_sidemantic_adapter(): table="orders_table", primary_key="order_id", description="Order data", + default_time_dimension="created_at", + default_grain="day", dimensions=[ Dimension( name="status", @@ -44,8 +46,6 @@ def test_metadata_roundtrip_sidemantic_adapter(): format="$#,##0.00", value_format_name="usd", drill_fields=["status", "customer_id"], - default_time_dimension="created_at", - default_grain="day", ), Metric( name="avg_order_value", @@ -119,8 +119,10 @@ def test_metadata_roundtrip_sidemantic_adapter(): assert revenue.format == "$#,##0.00" assert revenue.value_format_name == "usd" assert revenue.drill_fields == ["status", "customer_id"] - assert revenue.default_time_dimension == "created_at" - assert revenue.default_grain == "day" + + # Verify model-level default_time_dimension + assert imported.default_time_dimension == "created_at" + assert imported.default_grain == "day" avg_value = imported.get_metric("avg_order_value") assert avg_value is not None @@ -219,7 +221,10 @@ def test_empty_metadata_roundtrip(): user_count = imported.get_metric("user_count") assert user_count.format is None assert user_count.drill_fields is None - assert user_count.default_grain is None + + # default_time_dimension is now on model, not metric + assert imported.default_time_dimension is None + assert imported.default_grain is None status = imported.get_dimension("status") assert status.format is None diff --git a/tests/core/test_sql_definitions.py b/tests/core/test_sql_definitions.py index 239968cb..fad1f1bb 100644 --- a/tests/core/test_sql_definitions.py +++ b/tests/core/test_sql_definitions.py @@ -61,9 +61,7 @@ def test_parse_metric_all_fields(): format '$#,##0.00', filters status = 'completed', fill_nulls_with 0, - non_additive_dimension time, - default_time_dimension order_date, - default_grain day + non_additive_dimension time ); """ @@ -79,8 +77,6 @@ def test_parse_metric_all_fields(): assert metric.filters == ["status = 'completed'"] assert metric.fill_nulls_with == 0 assert metric.non_additive_dimension == "time" - assert metric.default_time_dimension == "order_date" - assert metric.default_grain == "day" def test_parse_ratio_metric(): diff --git a/tests/metrics/test_default_time_dimension.py b/tests/metrics/test_default_time_dimension.py new file mode 100644 index 00000000..2dbf8d14 --- /dev/null +++ b/tests/metrics/test_default_time_dimension.py @@ -0,0 +1,201 @@ +"""Test default_time_dimension auto-inclusion from Model.""" + +from sidemantic import Dimension, Metric, Model + + +def test_default_time_dimension_auto_included(layer): + """Test that model's default_time_dimension is auto-included when not specified.""" + orders = Model( + name="orders", + table="orders_table", + primary_key="order_id", + default_time_dimension="order_date", + dimensions=[ + Dimension(name="order_date", type="time", granularity="day"), + Dimension(name="status", type="categorical"), + ], + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + ], + ) + + layer.add_model(orders) + + # Query without specifying time dimension + sql = layer.compile(metrics=["orders.revenue"]) + + print("SQL with auto-included time dimension:") + print(sql) + + # Should include order_date in the query + assert "order_date" in sql + # Should be in GROUP BY + assert "GROUP BY" in sql + + +def test_default_time_dimension_with_grain(layer): + """Test that default_grain is applied to auto-included time dimension.""" + orders = Model( + name="orders", + table="orders_table", + primary_key="order_id", + default_time_dimension="order_date", + default_grain="month", + dimensions=[ + Dimension(name="order_date", type="time", granularity="day"), + ], + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + ], + ) + + layer.add_model(orders) + + sql = layer.compile(metrics=["orders.revenue"]) + + print("SQL with default grain:") + print(sql) + + # Should include order_date__month + assert "order_date__month" in sql + + +def test_default_time_dimension_override(layer): + """Test that user-specified time dimension overrides default.""" + orders = Model( + name="orders", + table="orders_table", + primary_key="order_id", + default_time_dimension="order_date", + default_grain="month", + dimensions=[ + Dimension(name="order_date", type="time", granularity="day"), + Dimension(name="created_at", type="time", granularity="day"), + ], + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + ], + ) + + layer.add_model(orders) + + # User explicitly specifies a different time dimension + sql = layer.compile( + metrics=["orders.revenue"], + dimensions=["orders.created_at__week"], + ) + + print("SQL with user override:") + print(sql) + + # Should use user's choice, not default + assert "created_at__week" in sql + # Should NOT auto-add order_date__month since user provided a time dim + assert "order_date__month" not in sql + + +def test_default_time_dimension_same_override(layer): + """Test that user can override default with different granularity.""" + orders = Model( + name="orders", + table="orders_table", + primary_key="order_id", + default_time_dimension="order_date", + default_grain="month", + dimensions=[ + Dimension(name="order_date", type="time", granularity="day"), + ], + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + ], + ) + + layer.add_model(orders) + + # User specifies same dimension but different grain + sql = layer.compile( + metrics=["orders.revenue"], + dimensions=["orders.order_date__week"], + ) + + print("SQL with same dim different grain:") + print(sql) + + # Should use user's week granularity + assert "order_date__week" in sql + # Should NOT add month since user already specified order_date + assert "order_date__month" not in sql + + +def test_no_default_time_dimension(layer): + """Test models without default_time_dimension work normally.""" + orders = Model( + name="orders", + table="orders_table", + primary_key="order_id", + dimensions=[ + Dimension(name="order_date", type="time", granularity="day"), + Dimension(name="status", type="categorical"), + ], + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + ], + ) + + layer.add_model(orders) + + # Query without time dimension - should work, just aggregate all + sql = layer.compile(metrics=["orders.revenue"]) + + print("SQL without default time dimension:") + print(sql) + + # Should not have GROUP BY since no dimensions + assert "revenue" in sql + + +def test_multiple_models_different_defaults(layer): + """Test multiple models with different default_time_dimensions.""" + orders = Model( + name="orders", + table="orders_table", + primary_key="order_id", + default_time_dimension="order_date", + default_grain="month", + dimensions=[ + Dimension(name="order_date", type="time", granularity="day"), + Dimension(name="customer_id", type="categorical"), + ], + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + ], + ) + + customers = Model( + name="customers", + table="customers_table", + primary_key="customer_id", + default_time_dimension="created_at", + default_grain="week", + dimensions=[ + Dimension(name="created_at", type="time", granularity="day"), + ], + metrics=[ + Metric(name="customer_count", agg="count"), + ], + ) + + layer.add_model(orders) + layer.add_model(customers) + + # Query orders metric - should use orders' default + sql = layer.compile(metrics=["orders.revenue"]) + print("Orders SQL:") + print(sql) + assert "order_date__month" in sql + + # Query customers metric - should use customers' default + sql2 = layer.compile(metrics=["customers.customer_count"]) + print("Customers SQL:") + print(sql2) + assert "created_at__week" in sql2 diff --git a/tests/test_metadata_fields.py b/tests/test_metadata_fields.py index eaf518f5..8bd09ce1 100644 --- a/tests/test_metadata_fields.py +++ b/tests/test_metadata_fields.py @@ -50,29 +50,41 @@ def test_metric_non_additive_dimension(): assert distinct_metric.non_additive_dimension == "order_date" -def test_metric_default_time_dimension_and_grain(): - """Test default_time_dimension and default_grain on Metric.""" - metric = Metric( - name="daily_revenue", - agg="sum", - sql="amount", +def test_model_default_time_dimension_and_grain(): + """Test default_time_dimension and default_grain on Model.""" + model = Model( + name="orders", + table="orders_table", + primary_key="order_id", default_time_dimension="order_date", default_grain="day", + dimensions=[ + Dimension(name="order_date", type="time", granularity="day"), + ], + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + ], ) - assert metric.default_time_dimension == "order_date" - assert metric.default_grain == "day" + assert model.default_time_dimension == "order_date" + assert model.default_grain == "day" - # Monthly metric - monthly_metric = Metric( - name="monthly_revenue", - agg="sum", - sql="amount", + # Monthly model + monthly_model = Model( + name="monthly_orders", + table="orders_table", + primary_key="order_id", default_time_dimension="order_date", default_grain="month", + dimensions=[ + Dimension(name="order_date", type="time", granularity="day"), + ], + metrics=[ + Metric(name="revenue", agg="sum", sql="amount"), + ], ) - assert monthly_metric.default_grain == "month" + assert monthly_model.default_grain == "month" def test_all_metadata_fields_together(): @@ -84,8 +96,6 @@ def test_all_metadata_fields_together(): format="$#,##0.00", value_format_name="usd", drill_fields=["customer.name", "product.name"], - default_time_dimension="order_date", - default_grain="day", description="Total revenue from completed orders", label="Revenue (USD)", ) @@ -94,8 +104,6 @@ def test_all_metadata_fields_together(): assert metric.format == "$#,##0.00" assert metric.value_format_name == "usd" assert metric.drill_fields == ["customer.name", "product.name"] - assert metric.default_time_dimension == "order_date" - assert metric.default_grain == "day" assert metric.description == "Total revenue from completed orders" assert metric.label == "Revenue (USD)" @@ -106,9 +114,12 @@ def test_metadata_fields_in_model(layer): name="orders", table="orders_table", primary_key="order_id", + default_time_dimension="order_date", + default_grain="day", dimensions=[ Dimension(name="status", type="categorical", label="Order Status"), Dimension(name="discount_pct", type="numeric", format="0.0%", value_format_name="percent"), + Dimension(name="order_date", type="time", granularity="day"), ], metrics=[ Metric( @@ -118,8 +129,6 @@ def test_metadata_fields_in_model(layer): format="$#,##0.00", value_format_name="usd", drill_fields=["status", "customer_id"], - default_time_dimension="order_date", - default_grain="day", ), Metric( name="avg_order_value", @@ -139,7 +148,10 @@ def test_metadata_fields_in_model(layer): assert revenue is not None assert revenue.format == "$#,##0.00" assert revenue.drill_fields == ["status", "customer_id"] - assert revenue.default_grain == "day" + + # Verify model-level defaults + assert orders.default_time_dimension == "order_date" + assert orders.default_grain == "day" avg_value = orders.get_metric("avg_order_value") assert avg_value is not None @@ -160,22 +172,35 @@ def test_metadata_fields_optional(): assert metric.value_format_name is None assert metric.drill_fields is None assert metric.non_additive_dimension is None - assert metric.default_time_dimension is None - assert metric.default_grain is None dimension = Dimension(name="status", type="categorical") assert dimension.format is None assert dimension.value_format_name is None + # Model-level defaults are also optional + model = Model( + name="orders", + table="orders_table", + primary_key="order_id", + metrics=[metric], + ) + assert model.default_time_dimension is None + assert model.default_grain is None + def test_default_grain_validation(): - """Test default_grain accepts valid time granularities.""" + """Test default_grain accepts valid time granularities on Model.""" valid_grains = ["hour", "day", "week", "month", "quarter", "year"] for grain in valid_grains: - metric = Metric(name="revenue", agg="sum", sql="amount", default_grain=grain) - assert metric.default_grain == grain + model = Model( + name="orders", + table="orders_table", + primary_key="order_id", + default_grain=grain, + ) + assert model.default_grain == grain def test_metadata_survives_query_compilation(layer):