From 3c2ecef0399ef05748aed44c427c6e453a2ea452 Mon Sep 17 00:00:00 2001
From: Nico Ritschel <ritschel@gmail.com>
Date: Sat, 3 Jan 2026 13:12:15 -0800
Subject: [PATCH] Fix aggregation parsing for complex expressions

- Replace greedy regex with sqlglot in Metric class to properly handle
  expressions like SUM(x) / SUM(y) without mangling them
- Fix COUNT DISTINCT detection using isinstance(parsed.this, exp.Distinct)
- Add expression metric support in SQL generator for metrics with inline
  aggregations (agg=None, type=None, sql=<expression>)
- Fix dependency analyzer to skip resolution for expression metrics with
  inline aggregations
- Fix cumulative metrics to properly resolve references to other measures
  and generate valid aliases

Add kitchen sink tests using patterns from rill-examples to catch edge cases.
---
 sidemantic/adapters/rill.py              |  42 +---
 sidemantic/core/dependency_analyzer.py   |  14 ++
 sidemantic/core/metric.py                |  76 ++++--
 sidemantic/sql/generator.py              |  59 ++++-
 tests/adapters/rill/test_kitchen_sink.py | 289 +++++++++++++++++++++++
 tests/fixtures/rill/kitchen_sink.yaml    | 189 +++++++++++++++
 6 files changed, 601 insertions(+), 68 deletions(-)
 create mode 100644 tests/adapters/rill/test_kitchen_sink.py
 create mode 100644 tests/fixtures/rill/kitchen_sink.yaml
diff --git a/sidemantic/adapters/rill.py b/sidemantic/adapters/rill.py
index e5650eb5..abc7e087 100644
--- a/sidemantic/adapters/rill.py
+++ b/sidemantic/adapters/rill.py
@@ -7,9 +7,7 @@
 from pathlib import Path
 from typing import Any
 
-import sqlglot
 import yaml
-from sqlglot import expressions as exp
 
 from sidemantic.core.dimension import Dimension
 from sidemantic.core.metric import Metric
@@ -203,46 +201,14 @@ def _parse_measure(self, measure_def: dict[str, Any]) -> Metric | None:
             # "simple" = basic aggregation (None type), "derived" = calculation using other measures
             metric_type = "derived"
 
-        # Use sqlglot to detect simple aggregations
-        agg_type = None
-        agg_sql = None
-        try:
-            parsed = sqlglot.parse_one(expression, read="duckdb")
-
-            # Check if this is a simple aggregation function
-            if isinstance(parsed, (exp.Sum, exp.Avg, exp.Count, exp.Min, exp.Max)):
-                # Map sqlglot aggregation types to Sidemantic agg types
-                if isinstance(parsed, exp.Sum):
-                    agg_type = "sum"
-                elif isinstance(parsed, exp.Avg):
-                    agg_type = "avg"
-                elif isinstance(parsed, exp.Count):
-                    if parsed.args.get("distinct"):
-                        agg_type = "count_distinct"
-                    else:
-                        agg_type = "count"
-                elif isinstance(parsed, exp.Min):
-                    agg_type = "min"
-                elif isinstance(parsed, exp.Max):
-                    agg_type = "max"
-
-                # Extract the aggregated column/expression
-                agg_arg = parsed.this
-                if agg_arg:
-                    agg_sql = agg_arg.sql(dialect="duckdb")
-                elif isinstance(parsed, exp.Count):
-                    # COUNT(*) case
-                    agg_sql = None
-        except Exception:
-            # If parsing fails, treat as custom SQL expression
-            pass
-
+        # Let the Metric class handle aggregation parsing via its model_validator.
+        # This properly handles complex expressions like SUM(x) / SUM(y) and
+        # COUNT(DISTINCT col) using sqlglot.
         return Metric(
             name=name,
             label=label,
             description=description,
-            agg=agg_type,
-            sql=agg_sql if agg_type else expression,
+            sql=expression,  # Pass full expression, Metric will parse aggregations
             type=metric_type,
             value_format_name=value_format_name,
             window_order=window_order,
diff --git a/sidemantic/core/dependency_analyzer.py b/sidemantic/core/dependency_analyzer.py
index 6725ef0e..bc14eef6 100644
--- a/sidemantic/core/dependency_analyzer.py
+++ b/sidemantic/core/dependency_analyzer.py
@@ -56,6 +56,20 @@ def extract_metric_dependencies(metric_obj, graph=None, model_context=None) -> s
             deps.add(metric_obj.sql)
             return deps
 
+        # Check if this is an expression metric with inline aggregations
+        # (e.g., SUM(x) / SUM(y), COUNT(DISTINCT col) * 1.0)
+        # These don't have measure dependencies - the aggregations are inline
+        try:
+            parsed = sqlglot.parse_one(metric_obj.sql)
+            # Check if the expression contains any aggregation functions
+            agg_types = (exp.Sum, exp.Avg, exp.Count, exp.Min, exp.Max, exp.Median)
+            has_inline_agg = any(parsed.find_all(*agg_types))
+            if has_inline_agg and not metric_obj.type:
+                # Expression metric with inline aggregations - no measure dependencies
+                return deps
+        except Exception:
+            pass
+
         # Extract column references from expression
         refs = extract_column_references(metric_obj.sql)
 
diff --git a/sidemantic/core/metric.py b/sidemantic/core/metric.py
index 96f20ac5..cff73f5d 100644
--- a/sidemantic/core/metric.py
+++ b/sidemantic/core/metric.py
@@ -45,9 +45,11 @@ def handle_expr_and_parse_agg(cls, data):
 
         1. Converts expr= to sql= for backwards compatibility
         2. Parses aggregation functions from SQL (e.g., SUM(amount) -> agg=sum, sql=amount)
-        """
-        import re
 
+        Uses sqlglot to properly parse expressions and handle nested parentheses.
+        Only extracts aggregation from SIMPLE expressions (single aggregation function).
+        Complex expressions like SUM(x) / SUM(y) are preserved as-is.
+        """
         if isinstance(data, dict):
             # Step 1: Handle expr alias
             expr_val = data.get("expr")
@@ -72,23 +74,61 @@ def handle_expr_and_parse_agg(cls, data):
             # Parse if sql is provided and agg is not set
             # Allow parsing for simple metrics (no type) OR cumulative metrics (to support AVG/COUNT windows)
             if sql_val and not agg_val and (not type_val or type_val == "cumulative"):
-                # Match aggregation functions at the start: SUM(expr), COUNT(expr), etc.
-                agg_pattern = r"^\s*(SUM|COUNT|AVG|MIN|MAX|MEDIAN|COUNT_DISTINCT)\s*\((.*)\)\s*$"
-                match = re.match(agg_pattern, sql_val, re.IGNORECASE)
-
-                if match:
-                    agg_func = match.group(1).lower()
-                    inner_expr = match.group(2).strip()
-
-                    # Extract DISTINCT for COUNT(DISTINCT col)
-                    if agg_func == "count":
-                        distinct_match = re.match(r"^\s*DISTINCT\s+(.+)$", inner_expr, re.IGNORECASE)
-                        if distinct_match:
+                try:
+                    import sqlglot
+                    from sqlglot import expressions as exp
+
+                    parsed = sqlglot.parse_one(sql_val, read="duckdb")
+
+                    # Only extract if the TOP-LEVEL expression is a simple aggregation
+                    # This prevents breaking expressions like SUM(x) / SUM(y)
+                    agg_map = {
+                        exp.Sum: "sum",
+                        exp.Avg: "avg",
+                        exp.Min: "min",
+                        exp.Max: "max",
+                        exp.Median: "median",
+                    }
+
+                    agg_func = None
+                    inner_expr = None
+
+                    # Check for standard aggregations
+                    for agg_class, agg_name in agg_map.items():
+                        if isinstance(parsed, agg_class):
+                            agg_func = agg_name
+                            if parsed.this:
+                                inner_expr = parsed.this.sql(dialect="duckdb")
+                            break
+
+                    # Handle COUNT specially (need to detect DISTINCT)
+                    if isinstance(parsed, exp.Count):
+                        # Check if the argument is a Distinct expression
+                        if isinstance(parsed.this, exp.Distinct):
                             agg_func = "count_distinct"
-                            inner_expr = distinct_match.group(1).strip()
-
-                    data["agg"] = agg_func
-                    data["sql"] = inner_expr
+                            # Extract all expressions from inside Distinct
+                            # e.g., COUNT(DISTINCT a, b) -> "a, b"
+                            if parsed.this.expressions:
+                                inner_expr = ", ".join(e.sql(dialect="duckdb") for e in parsed.this.expressions)
+                            else:
+                                inner_expr = parsed.this.sql(dialect="duckdb")
+                        else:
+                            agg_func = "count"
+                            if parsed.this:
+                                inner_expr = parsed.this.sql(dialect="duckdb")
+                            # COUNT(*) case - inner_expr stays None
+
+                    if agg_func:
+                        data["agg"] = agg_func
+                        if inner_expr is not None:
+                            data["sql"] = inner_expr
+                        elif agg_func == "count":
+                            # COUNT(*) - leave sql as None or "*"
+                            data["sql"] = None
+
+                except Exception:
+                    # If sqlglot parsing fails, leave the expression as-is
+                    pass
 
         return data
 
diff --git a/sidemantic/sql/generator.py b/sidemantic/sql/generator.py
index 2f8ff2c3..d3284ae0 100644
--- a/sidemantic/sql/generator.py
+++ b/sidemantic/sql/generator.py
@@ -1398,13 +1398,15 @@ def _build_main_select(
                     # Complex metric types (derived, ratio) can be built inline
                     # Note: cumulative, time_comparison, conversion are handled via special query generators
                     # and won't appear in this code path
-                    if measure.type in ["derived", "ratio"]:
+                    # Also handle "expression metrics" - metrics with inline aggregations like SUM(x)/SUM(y)
+                    is_expression_metric = not measure.type and not measure.agg and measure.sql
+                    if measure.type in ["derived", "ratio"] or is_expression_metric:
                         # Use complex metric builder
                         metric_expr = self._build_metric_sql(measure, model_name)
                         metric_expr = self._wrap_with_fill_nulls(metric_expr, measure)
                         select_exprs.append(f"{metric_expr} AS {alias}")
                     elif not measure.agg:
-                        # Complex types that need special handling (shouldn't reach here normally)
+                        # Unknown metric type that needs special handling
                         raise ValueError(
                             f"Metric '{measure.name}' with type '{measure.type}' cannot be queried directly. "
                             f"Use generate() instead of _build_main_select() for this metric type."
@@ -1736,7 +1738,12 @@ def _build_metric_sql(self, metric, model_context: str | None = None) -> str:
 
             # Check if this is a SQL expression metric (has inline aggregations)
             # These metrics already contain complete SQL and shouldn't have dependencies replaced
-            has_inline_agg = any(agg in formula.upper() for agg in ["COUNT(", "SUM(", "AVG(", "MIN(", "MAX("])
+            try:
+                parsed = sqlglot.parse_one(formula, read=self.dialect)
+                agg_types = (exp.Sum, exp.Avg, exp.Count, exp.Min, exp.Max, exp.Median)
+                has_inline_agg = any(parsed.find_all(*agg_types))
+            except Exception:
+                has_inline_agg = False
 
             if has_inline_agg:
                 # This is a SQL expression metric with inline aggregations.
@@ -2009,7 +2016,12 @@ def _generate_with_window_functions(
                 cumulative_metrics.append(m)
                 # Add the base measure/metric to base_metrics
                 if metric.sql:
-                    base_metrics.append(metric.sql)
+                    base_ref = metric.sql
+                    # Qualify unqualified references with the model name
+                    if "." not in base_ref and "." in m:
+                        model_name = m.split(".")[0]
+                        base_ref = f"{model_name}.{base_ref}"
+                    base_metrics.append(base_ref)
             elif metric and metric.type == "time_comparison":
                 # Validate required fields
                 if not metric.base_metric:
@@ -2076,7 +2088,16 @@ def _generate_with_window_functions(
 
         # Add cumulative metrics with window functions
         for m in cumulative_metrics:
-            metric = self.graph.get_metric(m)
+            # Handle both qualified (model.measure) and unqualified references
+            if "." in m:
+                model_name, measure_name = m.split(".", 1)
+                model = self.graph.get_model(model_name)
+                metric = model.get_metric(measure_name) if model else None
+                # Use just the measure name as the alias (not model.measure)
+                metric_alias = measure_name
+            else:
+                metric = self.graph.get_metric(m)
+                metric_alias = m
             if not metric or (not metric.sql and not metric.window_expression):
                 continue
 
@@ -2107,7 +2128,7 @@ def _generate_with_window_functions(
             if metric.window_expression:
                 order_col = time_dim
                 frame = metric.window_frame or "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW"
-                window_expr = f"{metric.window_expression} OVER (ORDER BY {order_col} {frame}) AS {m}"
+                window_expr = f"{metric.window_expression} OVER (ORDER BY {order_col} {frame}) AS {metric_alias}"
                 select_exprs.append(window_expr)
                 continue
 
@@ -2118,8 +2139,22 @@ def _generate_with_window_functions(
                 # It's a direct measure reference - extract just the measure name
                 base_alias = base_ref.split(".")[1]
             else:
-                # It's a metric reference - check if it exists and get its underlying measure
-                base_metric = self.graph.get_metric(base_ref)
+                # It's an unqualified reference - check model first, then graph-level
+                base_metric = None
+                # Get model name from the cumulative metric reference
+                cum_model_name = m.split(".")[0] if "." in m else None
+                if cum_model_name:
+                    cum_model = self.graph.get_model(cum_model_name)
+                    if cum_model:
+                        base_metric = cum_model.get_metric(base_ref)
+
+                # Fallback to graph-level metric
+                if not base_metric:
+                    try:
+                        base_metric = self.graph.get_metric(base_ref)
+                    except KeyError:
+                        pass
+
                 if base_metric and base_metric.sql:
                     # Use the underlying measure name
                     if "." in base_metric.sql:
@@ -2145,20 +2180,20 @@ def _generate_with_window_functions(
                 grain = metric.grain_to_date
                 partition = self._date_trunc(grain, time_dim)
 
-                window_expr = f"{agg_func}({base_col}) OVER (PARTITION BY {partition} ORDER BY {time_dim} ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS {m}"
+                window_expr = f"{agg_func}({base_col}) OVER (PARTITION BY {partition} ORDER BY {time_dim} ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS {metric_alias}"
             elif metric.window:
                 # Parse window (e.g., "7 days")
                 window_parts = metric.window.split()
                 if len(window_parts) == 2:
                     num, unit = window_parts
                     # For date-based windows, use RANGE
-                    window_expr = f"{agg_func}({base_col}) OVER (ORDER BY {time_dim} RANGE BETWEEN INTERVAL '{num} {unit}' PRECEDING AND CURRENT ROW) AS {m}"
+                    window_expr = f"{agg_func}({base_col}) OVER (ORDER BY {time_dim} RANGE BETWEEN INTERVAL '{num} {unit}' PRECEDING AND CURRENT ROW) AS {metric_alias}"
                 else:
                     # Fallback to rows
-                    window_expr = f"{agg_func}({base_col}) OVER (ORDER BY {time_dim} ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS {m}"
+                    window_expr = f"{agg_func}({base_col}) OVER (ORDER BY {time_dim} ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS {metric_alias}"
             else:
                 # Running total (unbounded window)
-                window_expr = f"{agg_func}({base_col}) OVER (ORDER BY {time_dim} ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS {m}"
+                window_expr = f"{agg_func}({base_col}) OVER (ORDER BY {time_dim} ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS {metric_alias}"
 
             select_exprs.append(window_expr)
 
diff --git a/tests/adapters/rill/test_kitchen_sink.py b/tests/adapters/rill/test_kitchen_sink.py
new file mode 100644
index 00000000..37c38fd9
--- /dev/null
+++ b/tests/adapters/rill/test_kitchen_sink.py
@@ -0,0 +1,289 @@
+"""Kitchen sink tests for Rill adapter using real-world patterns.
+
+These tests use expressions from rill-examples to find holes in the implementation.
+The goal is to find bugs, not conform tests to bugs.
+"""
+
+import tempfile
+from pathlib import Path
+
+import pytest
+
+from sidemantic import SemanticLayer
+from sidemantic.adapters.rill import RillAdapter
+
+
+@pytest.fixture
+def kitchen_sink_layer():
+    """Load kitchen sink fixture."""
+    adapter = RillAdapter()
+    graph = adapter.parse("tests/fixtures/rill/kitchen_sink.yaml")
+    layer = SemanticLayer()
+    layer.graph = graph
+    return layer
+
+
+class TestKitchenSinkParsing:
+    """Test that the kitchen sink fixture parses correctly."""
+
+    def test_model_loads(self, kitchen_sink_layer):
+        """Verify the model loads."""
+        assert "kitchen_sink" in kitchen_sink_layer.graph.models
+
+    def test_dimension_count(self, kitchen_sink_layer):
+        """Verify all dimensions loaded."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        # 5 explicit dimensions + 1 auto-created timeseries
+        assert len(model.dimensions) >= 5
+
+    def test_metric_count(self, kitchen_sink_layer):
+        """Verify all metrics loaded."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        assert len(model.metrics) >= 20
+
+
+class TestSimpleAggregations:
+    """Test simple aggregation expressions."""
+
+    def test_count_star(self, kitchen_sink_layer):
+        """COUNT(*) should work."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.total_records"])
+        assert "COUNT(" in sql.upper()
+
+    def test_sum(self, kitchen_sink_layer):
+        """SUM(column) should work."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.total_revenue"])
+        assert "SUM(" in sql.upper()
+
+    def test_avg(self, kitchen_sink_layer):
+        """AVG(column) should work."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.avg_amount"])
+        assert "AVG(" in sql.upper()
+
+    def test_min(self, kitchen_sink_layer):
+        """MIN(column) should work."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.min_amount"])
+        assert "MIN(" in sql.upper()
+
+    def test_max(self, kitchen_sink_layer):
+        """MAX(column) should work."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.max_amount"])
+        assert "MAX(" in sql.upper()
+
+    def test_median(self, kitchen_sink_layer):
+        """MEDIAN(column) should work."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.median_duration"])
+        assert "MEDIAN(" in sql.upper()
+
+
+class TestCountDistinct:
+    """Test COUNT DISTINCT expressions."""
+
+    def test_count_distinct_simple(self, kitchen_sink_layer):
+        """COUNT(DISTINCT col) should produce valid SQL."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.unique_users"])
+        # Should have COUNT and DISTINCT
+        assert "COUNT(" in sql.upper()
+        assert "DISTINCT" in sql.upper()
+        # Should NOT have broken SQL like "DISTINCT user_id AS"
+        assert "DISTINCT user_id AS" not in sql
+
+    def test_count_distinct_with_function(self, kitchen_sink_layer):
+        """COUNT(DISTINCT CONCAT(...)) from rill-311-ops."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.unique_locations"])
+        assert "COUNT(" in sql.upper()
+        assert "DISTINCT" in sql.upper()
+        assert "CONCAT" in sql.upper()
+
+
+class TestArithmeticExpressions:
+    """Test expressions with arithmetic operators.
+
+    These are the patterns that break with the greedy regex bug.
+    """
+
+    def test_division_of_sums(self, kitchen_sink_layer):
+        """SUM(x) / SUM(y) from rill-github-analytics.
+
+        BUG: Greedy regex in Metric class matches first SUM( to last ),
+        extracting 'deletions) / SUM(changes' as the inner expression.
+        """
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        metric = model.get_metric("deletion_pct")
+
+        # The expression should be preserved as-is (no agg extraction)
+        # OR properly parsed as a ratio
+        assert metric is not None
+
+        # If parsed as expression without agg:
+        if metric.agg is None:
+            assert metric.sql == "SUM(deletions) / SUM(changes)"
+        else:
+            # Should NOT have broken SQL
+            assert ") / SUM(" not in (metric.sql or "")
+
+    def test_subtraction_of_sums(self, kitchen_sink_layer):
+        """SUM(x) - SUM(y) from rill-cost-monitoring."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        metric = model.get_metric("net_revenue")
+
+        assert metric is not None
+        if metric.agg is None:
+            assert metric.sql == "SUM(revenue) - SUM(cost)"
+        else:
+            assert ") - SUM(" not in (metric.sql or "")
+
+    def test_division_of_counts(self, kitchen_sink_layer):
+        """COUNT(*) / COUNT(DISTINCT x) from rill-github-analytics."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        metric = model.get_metric("files_per_commit")
+
+        assert metric is not None
+        if metric.agg is None:
+            assert "COUNT(*)" in metric.sql
+            assert "COUNT(DISTINCT" in metric.sql
+        else:
+            assert ") / COUNT(" not in (metric.sql or "")
+
+    def test_sum_with_division(self, kitchen_sink_layer):
+        """SUM(x)/1000 from rill-openrtb-prog-ads."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        metric = model.get_metric("ad_spend")
+
+        assert metric is not None
+        # Should either preserve full expression or work correctly
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.ad_spend"])
+        # Should not error and should contain the division
+        assert "/1000" in sql or "/ 1000" in sql
+
+    def test_ratio_with_multiplier(self, kitchen_sink_layer):
+        """SUM(x)*1.0/SUM(y)*1.0 from rill-app-engagement."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        metric = model.get_metric("conversion_rate")
+
+        assert metric is not None
+        # Full expression should be preserved
+        if metric.agg is None:
+            assert "*1.0" in metric.sql
+
+    def test_margin_calculation(self, kitchen_sink_layer):
+        """(SUM(x) - SUM(y))/SUM(x) from rill-cost-monitoring."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        metric = model.get_metric("margin")
+
+        assert metric is not None
+        # Complex expression - should be preserved
+        if metric.agg is None:
+            assert "SUM(revenue)" in metric.sql
+            assert "SUM(cost)" in metric.sql
+
+
+class TestConditionalAggregations:
+    """Test aggregations with CASE expressions."""
+
+    def test_count_with_case(self, kitchen_sink_layer):
+        """COUNT(CASE WHEN ... END) should work."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.completed_orders"])
+        assert "COUNT(" in sql.upper()
+        assert "CASE" in sql.upper()
+
+    def test_sum_with_case(self, kitchen_sink_layer):
+        """SUM(CASE WHEN ... END) should work."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.completed_revenue"])
+        assert "SUM(" in sql.upper()
+        assert "CASE" in sql.upper()
+
+
+class TestDerivedMetrics:
+    """Test derived/calculated metrics."""
+
+    def test_derived_metric_references(self, kitchen_sink_layer):
+        """Derived metric referencing other metrics."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        metric = model.get_metric("revenue_per_user")
+
+        assert metric is not None
+        assert metric.type == "derived"
+
+    def test_derived_metric_sql(self, kitchen_sink_layer):
+        """Derived metric should compile."""
+        sql = kitchen_sink_layer.compile(metrics=["kitchen_sink.revenue_per_user"])
+        # Should reference the base metrics
+        assert sql is not None
+
+
+class TestWindowFunctions:
+    """Test window function metrics."""
+
+    def test_window_metric_parsing(self, kitchen_sink_layer):
+        """Window function metrics should parse."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+        metric = model.get_metric("rolling_7day_revenue")
+
+        assert metric is not None
+        assert metric.type == "cumulative"
+
+
+class TestSQLGeneration:
+    """Test that generated SQL is syntactically valid."""
+
+    def test_all_metrics_compile(self, kitchen_sink_layer):
+        """All metrics should compile without errors."""
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+
+        failed = []
+        for metric in model.metrics:
+            try:
+                sql = kitchen_sink_layer.compile(metrics=[f"kitchen_sink.{metric.name}"])
+                # Basic syntax check - should have SELECT
+                assert "SELECT" in sql.upper()
+            except Exception as e:
+                failed.append((metric.name, str(e)))
+
+        if failed:
+            msg = "\n".join(f"  {name}: {err}" for name, err in failed)
+            pytest.fail(f"Failed to compile metrics:\n{msg}")
+
+    def test_sql_is_parseable(self, kitchen_sink_layer):
+        """Generated SQL should be parseable by sqlglot."""
+        import sqlglot
+
+        model = kitchen_sink_layer.graph.models["kitchen_sink"]
+
+        failed = []
+        for metric in model.metrics:
+            try:
+                sql = kitchen_sink_layer.compile(metrics=[f"kitchen_sink.{metric.name}"])
+                # Strip the comment line
+                sql_no_comment = "\n".join(line for line in sql.split("\n") if not line.strip().startswith("--"))
+                sqlglot.parse_one(sql_no_comment, read="duckdb")
+            except Exception as e:
+                failed.append((metric.name, str(e)))
+
+        if failed:
+            msg = "\n".join(f"  {name}: {err}" for name, err in failed)
+            pytest.fail(f"Generated invalid SQL:\n{msg}")
+
+
+class TestRoundtrip:
+    """Test export and re-import."""
+
+    def test_roundtrip_preserves_expressions(self, kitchen_sink_layer):
+        """Complex expressions should survive roundtrip."""
+        adapter = RillAdapter()
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            adapter.export(kitchen_sink_layer.graph, tmpdir)
+            graph2 = adapter.parse(Path(tmpdir) / "kitchen_sink.yaml")
+
+            model1 = kitchen_sink_layer.graph.models["kitchen_sink"]
+            model2 = graph2.models["kitchen_sink"]
+
+            # Check a complex metric survived
+            m1 = model1.get_metric("margin")
+            m2 = model2.get_metric("margin")
+
+            assert m1 is not None
+            assert m2 is not None
+            # Both should have the full expression (not broken)
+            # Either both have agg=None with full SQL, or both properly decomposed
diff --git a/tests/fixtures/rill/kitchen_sink.yaml b/tests/fixtures/rill/kitchen_sink.yaml
new file mode 100644
index 00000000..c7434638
--- /dev/null
+++ b/tests/fixtures/rill/kitchen_sink.yaml
@@ -0,0 +1,189 @@
+# Kitchen sink Rill metrics view based on real rill-examples patterns
+# Tests edge cases and complex expressions from:
+# - rill-311-ops (MEDIAN, COUNT DISTINCT CONCAT)
+# - rill-app-engagement (ratios with *1.0, count with space)
+# - rill-cost-monitoring (arithmetic expressions, margin calculations)
+# - rill-github-analytics (division ratios, count(*))
+# - rill-openrtb-prog-ads (complex nested arithmetic)
+
+type: metrics_view
+name: kitchen_sink
+display_name: Kitchen Sink Analytics
+description: Comprehensive test of Rill adapter edge cases
+model: kitchen_sink_model
+timeseries: event_time
+smallest_time_grain: hour
+default_time_range: P1W
+
+dimensions:
+  # Simple column reference
+  - name: status
+    display_name: Status
+    description: Current status
+    column: status
+
+  # Expression-based dimension (CASE)
+  - name: status_type
+    display_name: Status Type
+    expression: |
+      CASE
+        WHEN LOWER(status) IN ('open', 'new', 'in progress') THEN 'Active'
+        WHEN status IS NULL THEN 'Unknown'
+        ELSE 'Closed'
+      END
+
+  # Concatenation in dimension
+  - name: full_location
+    display_name: Full Location
+    expression: CONCAT(city, ', ', state)
+
+  # Boolean dimension
+  - name: is_merge_commit
+    display_name: Merge Commit
+    column: is_merge_commit
+    description: True if the commit is a merge commit
+
+  # Nested CASE expression
+  - name: size_bucket
+    display_name: Size Bucket
+    expression: |
+      CASE
+        WHEN amount < 100 THEN 'small'
+        WHEN amount < 1000 THEN 'medium'
+        WHEN amount < 10000 THEN 'large'
+        ELSE 'enterprise'
+      END
+
+measures:
+  # Simple COUNT(*)
+  - name: total_records
+    display_name: Total Records
+    expression: COUNT(*)
+    format_preset: humanize
+
+  # COUNT DISTINCT
+  - name: unique_users
+    display_name: Unique Users
+    expression: COUNT(DISTINCT user_id)
+    format_preset: humanize
+
+  # COUNT DISTINCT with CONCAT (from rill-311-ops)
+  - name: unique_locations
+    display_name: Unique Locations
+    expression: COUNT(DISTINCT CONCAT(latitude, longitude))
+    format_preset: humanize
+
+  # MEDIAN aggregation (from rill-311-ops)
+  - name: median_duration
+    display_name: Median Duration (Hours)
+    expression: MEDIAN(duration_hours)
+    format_preset: humanize
+
+  # Simple SUM
+  - name: total_revenue
+    display_name: Total Revenue
+    expression: SUM(revenue)
+    format_preset: currency_usd
+
+  # SUM with division constant (from rill-openrtb-prog-ads)
+  - name: ad_spend
+    display_name: Advertising Spend
+    expression: SUM(media_spend_usd)/1000
+    format_preset: currency_usd
+
+  # Ratio with *1.0 pattern (from rill-app-engagement)
+  - name: conversion_rate
+    display_name: Conversion Rate
+    expression: SUM(clicks)*1.0/SUM(views)*1.0
+    format_preset: percentage
+
+  # Simple ratio (from rill-github-analytics)
+  - name: deletion_pct
+    display_name: Deletion Percentage
+    expression: SUM(deletions) / SUM(changes)
+    format_preset: percentage
+
+  # Complex arithmetic (from rill-cost-monitoring)
+  - name: margin
+    display_name: Margin
+    expression: (SUM(revenue) - SUM(cost))/SUM(revenue)
+    format_preset: percentage
+
+  # Net calculation
+  - name: net_revenue
+    display_name: Net Revenue
+    expression: SUM(revenue) - SUM(cost)
+    format_preset: currency_usd
+
+  # Division in AVG context (from rill-openrtb-prog-ads)
+  - name: avg_bid_price
+    display_name: Average Bid Price
+    expression: SUM(bid_price_usd)*1.0/SUM(bid_cnt)/1000
+    format_preset: currency_usd
+
+  # Files per commit ratio (from rill-github-analytics)
+  - name: files_per_commit
+    display_name: Files per Commit
+    expression: COUNT(*) / COUNT(DISTINCT commit_hash)
+    format_preset: humanize
+
+  # Count with space before parenthesis (from rill-app-engagement)
+  - name: unique_visitors
+    display_name: Unique Visitors
+    expression: count (distinct visitor_id)
+    format_preset: humanize
+
+  # Completion rate with count distinct ratio
+  - name: completion_rate
+    display_name: Completion Rate
+    expression: count (distinct completed_id)*1.0/count (distinct started_id) *1.0
+    format_preset: percentage
+
+  # Derived measure referencing other measures
+  - name: revenue_per_user
+    display_name: Revenue per User
+    type: derived
+    expression: total_revenue / unique_users
+    format_preset: currency_usd
+    requires:
+      - total_revenue
+      - unique_users
+
+  # Window function / rolling average
+  - name: rolling_7day_revenue
+    display_name: 7-Day Rolling Revenue
+    expression: AVG(total_revenue)
+    requires:
+      - total_revenue
+    window:
+      order: event_time
+      frame: RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW
+
+  # MIN/MAX aggregations
+  - name: min_amount
+    display_name: Minimum Amount
+    expression: MIN(amount)
+    format_preset: currency_usd
+
+  - name: max_amount
+    display_name: Maximum Amount
+    expression: MAX(amount)
+    format_preset: currency_usd
+
+  # AVG aggregation
+  - name: avg_amount
+    display_name: Average Amount
+    expression: AVG(amount)
+    format_preset: currency_usd
+
+  # Conditional count (CASE in COUNT)
+  - name: completed_orders
+    display_name: Completed Orders
+    expression: COUNT(CASE WHEN status = 'completed' THEN 1 END)
+    format_preset: humanize
+
+  # SUM with CASE
+  - name: completed_revenue
+    display_name: Completed Revenue
+    expression: SUM(CASE WHEN status = 'completed' THEN revenue ELSE 0 END)
+    format_preset: currency_usd