diff --git a/sidemantic/adapters/lookml.py b/sidemantic/adapters/lookml.py index c2306308..bffcf1aa 100644 --- a/sidemantic/adapters/lookml.py +++ b/sidemantic/adapters/lookml.py @@ -75,6 +75,10 @@ def _parse_views_from_file(self, file_path: Path, graph: SemanticGraph) -> None: for view_def in parsed.get("views") or []: model = self._parse_view(view_def) if model: + # For refinements (+view_name), skip if already exists + # In real Looker, multiple refinements would be merged + if model.name.startswith("+") and model.name in graph.models: + continue graph.add_model(model) def _parse_explores_from_file(self, file_path: Path, graph: SemanticGraph) -> None: @@ -139,6 +143,108 @@ def replace_ref(match: re.Match) -> str: return resolved + def _convert_lookml_filter_to_sql(self, field: str, value: str) -> str: + """Convert a LookML filter value to SQL condition. + + Handles LookML filter syntax: + - "value" -> field = 'value' + - "val1,val2,val3" -> field IN ('val1', 'val2', 'val3') + - "-value" -> field != 'value' (negation) + - "-val1,-val2" -> field NOT IN ('val1', 'val2') + - "yes"/"no" -> field = true/false (for yesno dimensions) + - ">100", ">=50", "<10", "<=5", "!=0" -> numeric comparisons + - "%pattern%" -> field LIKE '%pattern%' (wildcards) + - "NULL" -> field IS NULL + - "-NULL" -> field IS NOT NULL + - "EMPTY" -> field = '' + - "-EMPTY" -> field != '' + + Args: + field: The field name + value: The LookML filter value + + Returns: + SQL condition string + """ + # Handle NULL special values + if value.upper() == "NULL": + return f"{{model}}.{field} IS NULL" + if value.upper() == "-NULL": + return f"{{model}}.{field} IS NOT NULL" + + # Handle EMPTY special values + if value.upper() == "EMPTY": + return f"{{model}}.{field} = ''" + if value.upper() == "-EMPTY": + return f"{{model}}.{field} != ''" + + # Handle yes/no boolean values + if value.lower() == "yes": + return f"{{model}}.{field} = true" + if value.lower() == "no": + return f"{{model}}.{field} = false" + + # Check if this is a comma-separated list of values (OR condition) + # But be careful: ">100,<200" is two comparison operators, not a list + if "," in value: + parts = [p.strip() for p in value.split(",")] + + # Check if all parts are negations (NOT IN) + if all(p.startswith("-") for p in parts): + # Remove the - prefix from each + clean_parts = [p[1:] for p in parts] + # Check if they're all simple strings (not operators) + if all(not re.match(r"^(>=|<=|!=|<>|>|<)", p) for p in clean_parts): + quoted = ", ".join(f"'{p}'" for p in clean_parts) + return f"{{model}}.{field} NOT IN ({quoted})" + + # Check if all parts are simple values (no operators) -> IN clause + if all(not p.startswith("-") and not re.match(r"^(>=|<=|!=|<>|>|<)", p) for p in parts): + # Check if all parts are numeric + if all(p.replace(".", "").replace("-", "").isdigit() for p in parts): + # Numeric IN clause (no quotes) + return f"{{model}}.{field} IN ({', '.join(parts)})" + else: + # String IN clause (with quotes) + quoted = ", ".join(f"'{p}'" for p in parts) + return f"{{model}}.{field} IN ({quoted})" + + # Mixed operators - this is actually multiple filter conditions + # LookML doesn't really support this in a single filter value + # Fall through to single value handling (will be slightly wrong but safer) + + # Handle negation prefix for single values + if value.startswith("-") and not re.match(r"^-(>=|<=|!=|<>|>|<|\d)", value): + negated_value = value[1:] + if negated_value.replace(".", "").replace("-", "").isdigit(): + return f"{{model}}.{field} != {negated_value}" + else: + return f"{{model}}.{field} != '{negated_value}'" + + # Handle comparison operators: ">1000", "<=100", ">=5", "<10", "!=0" + if match := re.match(r"^(>=|<=|!=|<>|>|<)(.+)$", value): + operator, operand = match.groups() + operand = operand.strip() + # Normalize <> to != + if operator == "<>": + operator = "!=" + # Check if operand is numeric + if operand.replace(".", "").replace("-", "").isdigit(): + return f"{{model}}.{field} {operator} {operand}" + else: + return f"{{model}}.{field} {operator} '{operand}'" + + # Handle wildcard patterns (LIKE) + if "%" in value or "_" in value: + return f"{{model}}.{field} LIKE '{value}'" + + # Handle numeric values + if value.replace(".", "").replace("-", "").isdigit(): + return f"{{model}}.{field} = {value}" + + # Default: string equality + return f"{{model}}.{field} = '{value}'" + def _parse_view(self, view_def: dict) -> Model | None: """Parse LookML view into Sidemantic model. @@ -160,6 +266,9 @@ def _parse_view(self, view_def: dict) -> Model | None: derived_table = view_def.get("derived_table") if derived_table: sql = derived_table.get("sql") + # Handle native derived tables with explore_source + if not sql and "explore_source" in derived_table: + sql = self._convert_explore_source_to_sql(derived_table) # First pass: build a lookup dict of dimension SQL expressions # This is used to resolve ${dimension_name} references @@ -309,6 +418,11 @@ def _parse_dimension_group( return [] group_type = dim_group_def.get("type", "time") + + # Handle duration type separately + if group_type == "duration": + return self._parse_duration_group(group_name, dim_group_def) + if group_type != "time": return [] @@ -352,6 +466,113 @@ def _parse_dimension_group( return dimensions + def _convert_explore_source_to_sql(self, derived_table: dict) -> str: + """Convert a native derived table (explore_source) to a SQL representation. + + Native derived tables in LookML use explore_source to define the query + declaratively. We convert this to a SQL comment documenting the source, + since the actual SQL is generated by Looker at runtime. + + Args: + derived_table: The derived_table definition containing explore_source + + Returns: + A SQL comment describing the explore_source + """ + explore_source = derived_table.get("explore_source") + if not explore_source: + return "-- Native derived table (explore_source)" + + # explore_source can be a string (explore name) or a dict with config + if isinstance(explore_source, str): + explore_name = explore_source + columns = [] + filters = [] + else: + # It's a dict with explore name as key + # lkml parses it as: {"explore_name": {...config...}} + if isinstance(explore_source, dict): + explore_name = list(explore_source.keys())[0] if explore_source else "unknown" + config = explore_source.get(explore_name, {}) + if isinstance(config, dict): + columns = config.get("columns") or config.get("column") or [] + filters = config.get("filters") or config.get("filter") or [] + else: + columns = [] + filters = [] + else: + explore_name = str(explore_source) + columns = [] + filters = [] + + # Build a descriptive SQL comment + sql_parts = [f"-- Native Derived Table from explore: {explore_name}"] + + if columns: + col_names = [] + for col in columns if isinstance(columns, list) else [columns]: + if isinstance(col, dict): + col_name = col.get("name") or col.get("column") + if col_name: + col_names.append(col_name) + if col_names: + sql_parts.append(f"-- Columns: {', '.join(col_names)}") + + if filters: + sql_parts.append("-- Has filters applied") + + sql_parts.append(f"SELECT * FROM {explore_name}") + + return "\n".join(sql_parts) + + def _parse_duration_group(self, group_name: str, dim_group_def: dict) -> list[Dimension]: + """Parse LookML dimension_group with type: duration. + + Duration dimension groups calculate the difference between two timestamps + in various intervals (seconds, minutes, hours, days, weeks, months, years). + + Args: + group_name: Name of the dimension group + dim_group_def: Dimension group definition + + Returns: + List of duration dimensions + """ + intervals = dim_group_def.get("intervals", ["day"]) + sql_start = dim_group_def.get("sql_start", "") + sql_end = dim_group_def.get("sql_end", "") + + if sql_start: + sql_start = sql_start.replace("${TABLE}", "{model}") + if sql_end: + sql_end = sql_end.replace("${TABLE}", "{model}") + + # If no sql_start/sql_end, we can't create duration dimensions + if not sql_start or not sql_end: + return [] + + dimensions = [] + for interval in intervals: + # Create a dimension for each interval + # The SQL calculates the difference between start and end + # Note: The exact SQL depends on the database dialect + dim_name = f"{group_name}_{interval}s" if interval != "second" else f"{group_name}_seconds" + + # Generate appropriate SQL for duration calculation + # This uses a generic DATE_DIFF pattern that works in most SQL dialects + duration_sql = f"DATE_DIFF({sql_end}, {sql_start}, {interval.upper()})" + + dimensions.append( + Dimension( + name=dim_name, + type="numeric", + sql=duration_sql, + description=f"Duration in {interval}s between start and end", + ) + ) + + return dimensions + def _parse_measure( self, measure_def: dict, @@ -412,7 +633,9 @@ def _parse_measure( description=measure_def.get("description"), ) - # Map LookML measure types + # Map LookML measure types to sidemantic aggregation types + # Only include types supported by Metric.agg: sum, count, count_distinct, avg, min, max, median + # Unsupported types (percentile, list, date, string, yesno) become derived measures type_mapping = { "count": "count", "count_distinct": "count_distinct", @@ -420,49 +643,45 @@ def _parse_measure( "average": "avg", "min": "min", "max": "max", - "number": None, # Calculated measures + "median": "median", + # Unsupported as agg, will be treated as derived: + "percentile": None, + "list": None, + "date": None, + "number": None, # Calculated/derived measures + "string": None, # String measures are derived + "yesno": None, # Boolean measures are derived } agg_type = type_mapping.get(measure_type) # Parse filters - lkml parses these as filters__all - # Convert to SQL-style filters for compatibility with generator + # There are TWO different filter syntaxes in LookML: + # 1. Shorthand: filters: [status: "completed"] + # -> lkml returns [[{'status': 'completed'}]] + # 2. Block syntax: filters: { field: x value: y } + # -> lkml returns [{'field': 'flight_length', 'value': '>120'}] + # We need to handle both formats. filters = [] filters_all = measure_def.get("filters__all") or [] if filters_all: - for filter_list in filters_all: - for filter_dict in filter_list: - if isinstance(filter_dict, dict): - for field, value in filter_dict.items(): - # Convert LookML filter format to SQL condition - # field: "value" -> {model}.field = 'value' - # Handle special LookML filter values: - # - "yes"/"no" for yesno dimensions -> true/false - # - comparison operators: ">1000", "<=100", ">=5", "<10", "!=0" - # - numeric values like "5" -> numeric comparison - if value.lower() == "yes": - filters.append(f"{{model}}.{field} = true") - elif value.lower() == "no": - filters.append(f"{{model}}.{field} = false") - elif match := re.match(r"^(>=|<=|!=|<>|>|<)(.+)$", value): - # Comparison operator prefix: ">1000", "<=100", etc. - operator, operand = match.groups() - operand = operand.strip() - # Normalize <> to != - if operator == "<>": - operator = "!=" - # Check if operand is numeric - if operand.replace(".", "").replace("-", "").isdigit(): - filters.append(f"{{model}}.{field} {operator} {operand}") - else: - # String comparison with operator - filters.append(f"{{model}}.{field} {operator} '{operand}'") - elif value.replace(".", "").replace("-", "").isdigit(): - # Numeric value - filters.append(f"{{model}}.{field} = {value}") - else: - # String value - filters.append(f"{{model}}.{field} = '{value}'") + for item in filters_all: + if isinstance(item, list): + # Format 1: Shorthand syntax - list of dicts with field:value pairs + for filter_dict in item: + if isinstance(filter_dict, dict): + for field, value in filter_dict.items(): + filter_sql = self._convert_lookml_filter_to_sql(field, value) + if filter_sql: + filters.append(filter_sql) + elif isinstance(item, dict): + # Format 2: Block syntax - dict with 'field' and 'value' keys + field = item.get("field") + value = item.get("value") + if field and value: + filter_sql = self._convert_lookml_filter_to_sql(field, value) + if filter_sql: + filters.append(filter_sql) # Replace ${TABLE} and resolve ${dimension_ref} placeholders in SQL sql = measure_def.get("sql") @@ -494,7 +713,13 @@ def resolve_reference(match): # Determine if this is a derived/ratio metric metric_type = None if measure_type == "number": - metric_type = "derived" + # type: number is a derived measure, but it requires SQL + # If no SQL, this is likely a placeholder in an abstract/template view + if sql: + metric_type = "derived" + else: + # Skip placeholder measures with no SQL + return None # If there's SQL but no explicit type, treat as derived measure elif sql and not has_explicit_type: metric_type = "derived" diff --git a/tests/adapters/lookml/test_edge_cases.py b/tests/adapters/lookml/test_edge_cases.py new file mode 100644 index 00000000..db07eb23 --- /dev/null +++ b/tests/adapters/lookml/test_edge_cases.py @@ -0,0 +1,1564 @@ +"""Tests for LookML adapter edge cases. + +These tests verify handling of complex LookML patterns found in real-world deployments, +inspired by fixtures from joshtemple/lkml, node-lookml-parser, and lookml-tools. +""" + +from pathlib import Path + +import pytest + +from sidemantic.adapters.lookml import LookMLAdapter + +# ============================================================================= +# EXTENDS AND REFINEMENTS TESTS +# ============================================================================= + + +def test_lookml_extends_base_view(): + """Test parsing base views for extension.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_extends.lkml")) + + # Check base_entity was parsed + assert "base_entity" in graph.models + base = graph.get_model("base_entity") + + # Check base dimensions + assert base.get_dimension("id") is not None + assert base.get_dimension("name") is not None + assert base.get_dimension("is_active") is not None + + # Check time dimensions + assert base.get_dimension("created_date") is not None + assert base.get_dimension("created_week") is not None + + # Check measure + count_measure = base.get_metric("count") + assert count_measure is not None + assert count_measure.agg == "count" + + +def test_lookml_extends_extended_view(): + """Test parsing views that extend other views.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_extends.lkml")) + + # Note: sidemantic doesn't currently resolve extends - it just parses each view + # The customers_extended view only has its own dimensions, not inherited ones + assert "customers_extended" in graph.models + extended = graph.get_model("customers_extended") + + # Check customer-specific dimensions + assert extended.get_dimension("email") is not None + assert extended.get_dimension("tier") is not None + assert extended.get_dimension("lifetime_value") is not None + + # Check measures + assert extended.get_metric("total_ltv") is not None + assert extended.get_metric("avg_ltv") is not None + + +def test_lookml_refinement_syntax(): + """Test parsing refinement syntax (+view_name).""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_extends.lkml")) + + # Refinement views are parsed as separate models + # The +base_entity becomes a model named "+base_entity" + assert "+base_entity" in graph.models + refinement = graph.get_model("+base_entity") + + # Should have the added dimension + assert refinement.get_dimension("refined_field") is not None + + +def test_lookml_abstract_view(): + """Test parsing abstract views (extension: required).""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_extends.lkml")) + + assert "abstract_metrics" in graph.models + abstract = graph.get_model("abstract_metrics") + + # Abstract view has only measures + assert len(abstract.dimensions) == 0 + assert len(abstract.metrics) == 5 + + # Check all measures exist + assert abstract.get_metric("record_count") is not None + assert abstract.get_metric("sum_amount") is not None + assert abstract.get_metric("avg_amount") is not None + assert abstract.get_metric("min_amount") is not None + assert abstract.get_metric("max_amount") is not None + + +def test_lookml_concrete_extends_abstract(): + """Test parsing concrete view extending abstract.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_extends.lkml")) + + assert "transactions" in graph.models + transactions = graph.get_model("transactions") + + # Check dimensions + assert transactions.get_dimension("id") is not None + assert transactions.get_dimension("amount") is not None + assert transactions.get_dimension("status") is not None + + # Check time dimensions + assert transactions.get_dimension("transaction_time") is not None + assert transactions.get_dimension("transaction_date") is not None + + +# ============================================================================= +# LIQUID TEMPLATING TESTS +# ============================================================================= + + +def test_lookml_liquid_case_dimension(): + """Test parsing case dimensions (similar to CASE WHEN).""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_liquid.lkml")) + + assert "dynamic_sales" in graph.models + sales = graph.get_model("dynamic_sales") + + # Check case dimension exists + region_group = sales.get_dimension("region_group") + assert region_group is not None + assert region_group.type == "categorical" + + +def test_lookml_liquid_html_dimension(): + """Test parsing dimensions with HTML formatting.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_liquid.lkml")) + + sales = graph.get_model("dynamic_sales") + status_dim = sales.get_dimension("status") + assert status_dim is not None + + +def test_lookml_liquid_dimension_reference_in_sql(): + """Test parsing dimension references in SQL.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_liquid.lkml")) + + sales = graph.get_model("dynamic_sales") + + # days_since_sale references sale_date dimension + days_dim = sales.get_dimension("days_since_sale") + assert days_dim is not None + # The SQL should contain the reference (may be resolved or not) + assert days_dim.sql is not None + + +def test_lookml_value_formats(): + """Test parsing various value_format patterns.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_liquid.lkml")) + + assert "format_examples" in graph.models + formats = graph.get_model("format_examples") + + # Check dimensions with value formats exist + assert formats.get_dimension("percentage_value") is not None + assert formats.get_dimension("currency_value") is not None + + # Check measures + assert formats.get_metric("sum_value") is not None + assert formats.get_metric("sum_currency") is not None + assert formats.get_metric("formatted_total") is not None + + +def test_lookml_derived_table(): + """Test parsing derived tables.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_liquid.lkml")) + + assert "templated_orders" in graph.models + orders = graph.get_model("templated_orders") + + # Derived table should have SQL, no table name + assert orders.table is None + assert orders.sql is not None + assert "SELECT" in orders.sql.upper() + + +# ============================================================================= +# COMPLEX FILTER TESTS +# ============================================================================= + + +def test_lookml_numeric_comparison_filters(): + """Test parsing numeric comparison operators in filters.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # Greater than filter + high_value = filters_view.get_metric("high_value_count") + assert high_value is not None + assert high_value.filters is not None + assert any("> 1000" in f or ">1000" in f for f in high_value.filters) + + # Less than filter + low_value = filters_view.get_metric("low_value_count") + assert low_value is not None + assert low_value.filters is not None + assert any("< 100" in f or "<100" in f for f in low_value.filters) + + # Not equal filter + non_zero = filters_view.get_metric("non_zero_count") + assert non_zero is not None + assert non_zero.filters is not None + assert any("!= 0" in f or "!=0" in f or "<>" in f for f in non_zero.filters) + + +def test_lookml_string_filters(): + """Test parsing string filters.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # Simple string match + completed = filters_view.get_metric("completed_count") + assert completed is not None + assert completed.filters is not None + # Filter should contain 'completed' + assert any("completed" in f.lower() for f in completed.filters) + + +def test_lookml_boolean_filters(): + """Test parsing yes/no boolean filters.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # Yes filter + premium = filters_view.get_metric("premium_count") + assert premium is not None + assert premium.filters is not None + # Should contain true or yes + assert any("true" in f.lower() or "yes" in f.lower() for f in premium.filters) + + # No filter + non_premium = filters_view.get_metric("non_premium_count") + assert non_premium is not None + assert non_premium.filters is not None + assert any("false" in f.lower() or "no" in f.lower() for f in non_premium.filters) + + +def test_lookml_multiple_filters(): + """Test parsing measures with multiple filters.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # Measure with multiple filters (AND condition) + multi_filter = filters_view.get_metric("high_value_premium") + assert multi_filter is not None + assert multi_filter.filters is not None + assert len(multi_filter.filters) >= 2 + + +def test_lookml_segments(): + """Test parsing filter definitions as segments.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # Check segments + assert len(filters_view.segments) == 4 + segment_names = [s.name for s in filters_view.segments] + assert "high_value" in segment_names + assert "premium_segment" in segment_names + assert "active_period" in segment_names + assert "successful_transactions" in segment_names + + +# ============================================================================= +# COMPLEX SQL TESTS +# ============================================================================= + + +def test_lookml_subquery_dimension(): + """Test parsing dimensions with subqueries.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_sql.lkml")) + + complex_view = graph.get_model("complex_sql_view") + + # Check dimensions with subqueries + rank_dim = complex_view.get_dimension("customer_order_rank") + assert rank_dim is not None + assert "SELECT" in rank_dim.sql.upper() + + ltv_dim = complex_view.get_dimension("customer_lifetime_value") + assert ltv_dim is not None + assert "SELECT" in ltv_dim.sql.upper() + assert "SUM" in ltv_dim.sql.upper() + + +def test_lookml_case_expression_dimension(): + """Test parsing CASE expression dimensions.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_sql.lkml")) + + complex_view = graph.get_model("complex_sql_view") + + # Check CASE dimension + bucket_dim = complex_view.get_dimension("order_size_bucket") + assert bucket_dim is not None + assert "CASE" in bucket_dim.sql.upper() + assert "WHEN" in bucket_dim.sql.upper() + + +def test_lookml_derived_table_cte(): + """Test parsing derived tables with CTEs.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_sql.lkml")) + + assert "customer_cohorts" in graph.models + cohorts = graph.get_model("customer_cohorts") + + # Should have SQL with CTE + assert cohorts.table is None + assert cohorts.sql is not None + assert "WITH" in cohorts.sql.upper() + + +def test_lookml_sql_table_name_reference(): + """Test parsing views referencing other views' SQL_TABLE_NAME.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_sql.lkml")) + + # order_facts references customer_cohorts.SQL_TABLE_NAME + assert "order_facts" in graph.models + order_facts = graph.get_model("order_facts") + + assert order_facts.sql is not None + # The SQL should contain the reference + assert "customer_cohorts" in order_facts.sql.lower() + + +# ============================================================================= +# ACTIONS AND DRILL FIELDS TESTS +# ============================================================================= + + +def test_lookml_links(): + """Test parsing dimensions with links.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_actions.lkml")) + + orders = graph.get_model("interactive_orders") + + # Links are parsed but not stored in sidemantic model + # We just verify the dimension exists and is parsed correctly + id_dim = orders.get_dimension("id") + assert id_dim is not None + + customer_id_dim = orders.get_dimension("customer_id") + assert customer_id_dim is not None + + +def test_lookml_html_formatting(): + """Test parsing dimensions with HTML formatting.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_actions.lkml")) + + orders = graph.get_model("interactive_orders") + + # HTML is parsed but stored in dimension + amount_dim = orders.get_dimension("amount") + assert amount_dim is not None + + status_dim = orders.get_dimension("status") + assert status_dim is not None + + +def test_lookml_drill_fields(): + """Test parsing measures with drill_fields.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_actions.lkml")) + + orders = graph.get_model("interactive_orders") + + # Drill fields are parsed but not stored in sidemantic + # We verify measures exist + count_measure = orders.get_metric("count") + assert count_measure is not None + + revenue_measure = orders.get_metric("total_revenue") + assert revenue_measure is not None + + +def test_lookml_sets(): + """Test parsing set definitions.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_actions.lkml")) + + # Sets are parsed by lkml but not currently stored in sidemantic + # We verify the view parses correctly + orders = graph.get_model("interactive_orders") + assert orders is not None + + +def test_lookml_filtered_measures_various(): + """Test various filtered measure patterns.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_actions.lkml")) + + orders = graph.get_model("interactive_orders") + + # Check various filtered measures exist + assert orders.get_metric("completed_orders") is not None + assert orders.get_metric("pending_orders") is not None + assert orders.get_metric("cancelled_orders") is not None + assert orders.get_metric("web_orders") is not None + assert orders.get_metric("mobile_orders") is not None + + +# ============================================================================= +# SPECIAL TYPES TESTS +# ============================================================================= + + +def test_lookml_tier_dimension(): + """Test parsing tier dimensions.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_special_types.lkml")) + + special = graph.get_model("special_types") + + # Tier dimensions should be parsed + age_tier = special.get_dimension("age_tier") + assert age_tier is not None + # Tier maps to categorical + assert age_tier.type == "categorical" + + income_tier = special.get_dimension("income_tier") + assert income_tier is not None + + +def test_lookml_case_dimension(): + """Test parsing case dimensions.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_special_types.lkml")) + + special = graph.get_model("special_types") + + # Case dimensions + segment = special.get_dimension("customer_value_segment") + assert segment is not None + assert segment.type == "categorical" + + +def test_lookml_location_dimension(): + """Test parsing location (geo) dimensions.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_special_types.lkml")) + + special = graph.get_model("special_types") + + # Location dimension (type: location combines lat/lng) + location = special.get_dimension("location") + assert location is not None + + # Underlying lat/lng dimensions + lat = special.get_dimension("latitude") + assert lat is not None + assert lat.type == "numeric" + + +def test_lookml_yesno_dimension(): + """Test parsing yesno dimensions.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_special_types.lkml")) + + special = graph.get_model("special_types") + + # yesno dimensions + is_active = special.get_dimension("is_active") + assert is_active is not None + # yesno maps to categorical + assert is_active.type == "categorical" + + is_verified = special.get_dimension("is_verified") + assert is_verified is not None + + has_purchases = special.get_dimension("has_purchases") + assert has_purchases is not None + + +def test_lookml_json_extraction(): + """Test parsing JSON extraction dimensions.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_special_types.lkml")) + + json_view = graph.get_model("json_array_types") + + # JSON extraction dimensions + source = json_view.get_dimension("property_source") + assert source is not None + assert "JSON_EXTRACT" in source.sql.upper() + + browser = json_view.get_dimension("user_agent_browser") + assert browser is not None + + +# ============================================================================= +# EXPLORES TESTS +# ============================================================================= + + +def test_lookml_explore_views(): + """Test parsing views that are part of explores.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_explores.lkml")) + + # All views should be parsed + assert "fact_orders" in graph.models + assert "dim_customers" in graph.models + assert "dim_products" in graph.models + assert "dim_stores" in graph.models + assert "dim_regions" in graph.models + + +def test_lookml_explore_derived_table(): + """Test parsing derived table in explore context.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_explores.lkml")) + + # date_spine is a derived table + assert "date_spine" in graph.models + date_spine = graph.get_model("date_spine") + + assert date_spine.sql is not None + assert "GENERATE_DATE_ARRAY" in date_spine.sql.upper() or "SELECT" in date_spine.sql.upper() + + +def test_lookml_explore_persisted_derived_table(): + """Test parsing persisted derived table.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_explores.lkml")) + + assert "order_daily_metrics" in graph.models + metrics = graph.get_model("order_daily_metrics") + + assert metrics.sql is not None + assert "GROUP BY" in metrics.sql.upper() + + +# ============================================================================= +# KITCHEN SINK TESTS +# ============================================================================= + + +def test_lookml_kitchen_sink_comprehensive(): + """Test the comprehensive kitchen_sink fixture covers all patterns.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/kitchen_sink.lkml")) + + # Verify all models exist + assert "regions" in graph.models + assert "categories" in graph.models + assert "customers" in graph.models + assert "products" in graph.models + assert "orders" in graph.models + assert "order_items" in graph.models + assert "shipments" in graph.models + assert "reviews" in graph.models + + +def test_lookml_kitchen_sink_dimension_references(): + """Test dimension reference resolution in kitchen_sink.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/kitchen_sink.lkml")) + + order_items = graph.get_model("order_items") + + # line_total references quantity, unit_price, line_discount + line_total = order_items.get_dimension("line_total") + assert line_total is not None + assert line_total.sql is not None + # The dimension references should be resolved to their SQL + # Original: ${quantity} * ${unit_price} - ${line_discount} + # Resolved: should contain {model}.quantity etc. + assert "{model}" in line_total.sql + + +def test_lookml_kitchen_sink_measure_references(): + """Test measure-to-measure reference resolution in kitchen_sink.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/kitchen_sink.lkml")) + + orders = graph.get_model("orders") + + # delivery_rate references delivered_orders and count + delivery_rate = orders.get_metric("delivery_rate") + assert delivery_rate is not None + assert delivery_rate.type == "derived" + assert delivery_rate.sql is not None + # Should reference the measures + assert "delivered_orders" in delivery_rate.sql + assert "count" in delivery_rate.sql + + +def test_lookml_kitchen_sink_segments(): + """Test segment parsing in kitchen_sink.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/kitchen_sink.lkml")) + + orders = graph.get_model("orders") + + # Check segments + segment_names = [s.name for s in orders.segments] + assert "completed" in segment_names + assert "high_value" in segment_names + assert "discounted" in segment_names + + +# ============================================================================= +# FILTER PARSING TESTS +# ============================================================================= + + +def test_lookml_filter_in_clause(): + """Test parsing comma-separated filter values as IN clause.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # pending_or_processing_count uses filters: [status: "pending,processing"] + pending_processing = filters_view.get_metric("pending_or_processing_count") + assert pending_processing is not None + assert pending_processing.filters is not None + # Should be an IN clause + filter_str = pending_processing.filters[0] + assert "IN" in filter_str.upper() + assert "pending" in filter_str + assert "processing" in filter_str + + +def test_lookml_filter_not_in_clause(): + """Test parsing negated comma-separated values as NOT IN clause.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # excluding_cancelled_amount uses filters: [status: "-cancelled,-refunded"] + excluding = filters_view.get_metric("excluding_cancelled_amount") + assert excluding is not None + assert excluding.filters is not None + filter_str = excluding.filters[0] + assert "NOT IN" in filter_str.upper() + assert "cancelled" in filter_str + assert "refunded" in filter_str + + +def test_lookml_filter_single_negation(): + """Test parsing single negation filter.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # not_cancelled_count uses filters: [status: "-cancelled"] + not_cancelled = filters_view.get_metric("not_cancelled_count") + assert not_cancelled is not None + assert not_cancelled.filters is not None + filter_str = not_cancelled.filters[0] + assert "!=" in filter_str or "NOT" in filter_str.upper() + assert "cancelled" in filter_str + + +def test_lookml_filter_null(): + """Test parsing NULL filters.""" + adapter = LookMLAdapter() + + # NULL filters are in special_filter_cases view in edge_cases_filters.lkml + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + special = graph.get_model("special_filter_cases") + + # null_values uses filters: [nullable_field: "NULL"] + null_values = special.get_metric("null_values") + assert null_values is not None + assert null_values.filters is not None + assert "IS NULL" in null_values.filters[0].upper() + + # not_null_values uses filters: [nullable_field: "-NULL"] + not_null = special.get_metric("not_null_values") + assert not_null is not None + assert not_null.filters is not None + assert "IS NOT NULL" in not_null.filters[0].upper() + + +def test_lookml_filter_wildcard(): + """Test parsing wildcard/LIKE filters.""" + adapter = LookMLAdapter() + graph = adapter.parse(Path("tests/fixtures/lookml/edge_cases_filters.lkml")) + + filters_view = graph.get_model("filter_edge_cases") + + # a_region_count uses filters: [region: "A%"] + a_region = filters_view.get_metric("a_region_count") + assert a_region is not None + assert a_region.filters is not None + assert "LIKE" in a_region.filters[0].upper() + assert "A%" in a_region.filters[0] + + +def test_lookml_filter_numeric_in_clause(): + """Test parsing numeric comma-separated filter values as IN clause.""" + import tempfile + + lkml_content = """ +view: numeric_filter_test { + sql_table_name: orders ;; + + dimension: order_id { type: number sql: ${TABLE}.order_id ;; } + dimension: price { type: number sql: ${TABLE}.price ;; } + + measure: specific_orders { + type: count + filters: [order_id: "1,2,3"] + } + + measure: specific_prices { + type: sum + sql: ${price} ;; + filters: [price: "10.5,20.0,30.99"] + } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("numeric_filter_test") + + # Check integer IN clause + specific_orders = model.get_metric("specific_orders") + assert specific_orders is not None + assert specific_orders.filters is not None + filter_str = specific_orders.filters[0] + assert "IN" in filter_str.upper() + # Should be unquoted integers + assert "IN (1, 2, 3)" in filter_str + + # Check decimal IN clause + specific_prices = model.get_metric("specific_prices") + assert specific_prices is not None + assert specific_prices.filters is not None + filter_str = specific_prices.filters[0] + assert "IN" in filter_str.upper() + # Should be unquoted decimals + assert "10.5" in filter_str + assert "20.0" in filter_str + + +# ============================================================================= +# DURATION DIMENSION GROUP TESTS +# ============================================================================= + + +def test_lookml_duration_dimension_group(): + """Test parsing dimension_group with type: duration.""" + import tempfile + + lkml_content = """ +view: duration_test { + sql_table_name: t ;; + + dimension_group: process_time { + type: duration + intervals: [second, minute, hour, day] + sql_start: ${TABLE}.started_at ;; + sql_end: ${TABLE}.completed_at ;; + } + + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("duration_test") + + # Check that duration dimensions were created + dim_names = [d.name for d in model.dimensions] + assert "process_time_seconds" in dim_names + assert "process_time_minutes" in dim_names + assert "process_time_hours" in dim_names + assert "process_time_days" in dim_names + + # Check SQL contains DATE_DIFF + seconds_dim = model.get_dimension("process_time_seconds") + assert seconds_dim is not None + assert "DATE_DIFF" in seconds_dim.sql.upper() + assert "SECOND" in seconds_dim.sql.upper() + + +# ============================================================================= +# NATIVE DERIVED TABLE (EXPLORE_SOURCE) TESTS +# ============================================================================= + + +def test_lookml_native_derived_table(): + """Test parsing native derived tables with explore_source.""" + import tempfile + + lkml_content = """ +view: native_dt_test { + derived_table: { + explore_source: orders { + column: customer_id {} + column: total_revenue { field: orders.revenue } + } + } + + dimension: customer_id { + primary_key: yes + sql: ${TABLE}.customer_id ;; + } + + dimension: total_revenue { + type: number + sql: ${TABLE}.total_revenue ;; + } + + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("native_dt_test") + + # Check model was created and has SQL (even if placeholder) + assert model is not None + assert model.table is None # It's a derived table + assert model.sql is not None + + +# ============================================================================= +# ADDITIONAL MEASURE TYPES TESTS +# ============================================================================= + + +def test_lookml_measure_type_median(): + """Test parsing median measure type.""" + import tempfile + + lkml_content = """ +view: median_test { + sql_table_name: t ;; + + dimension: value { type: number sql: ${TABLE}.value ;; } + + measure: median_value { + type: median + sql: ${value} ;; + } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("median_test") + + median_measure = model.get_metric("median_value") + assert median_measure is not None + assert median_measure.agg == "median" + + +def test_lookml_measure_type_percentile(): + """Test parsing percentile measure type (becomes derived).""" + import tempfile + + lkml_content = """ +view: percentile_test { + sql_table_name: t ;; + + dimension: value { type: number sql: ${TABLE}.value ;; } + + measure: p90_value { + type: percentile + percentile: 90 + sql: ${value} ;; + } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("percentile_test") + + percentile_measure = model.get_metric("p90_value") + assert percentile_measure is not None + # Percentile is not supported as agg, so it becomes derived or None + # The key is that it parses without error + + +# ============================================================================= +# ADDITIONAL EDGE CASES TESTS +# ============================================================================= + + +def test_lookml_cross_view_reference(): + """Test parsing cross-view field references (${other_view.field}).""" + import tempfile + + lkml_content = """ +view: orders { + sql_table_name: orders ;; + dimension: id { type: number sql: ${TABLE}.id ;; } + dimension: customer_name { + type: string + sql: ${customers.name} ;; + } + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + orders = graph.get_model("orders") + + # Cross-view references should be preserved + customer_name = orders.get_dimension("customer_name") + assert customer_name is not None + assert "customers.name" in customer_name.sql + + +def test_lookml_recursive_dimension_references(): + """Test recursive dimension references (dim_a -> dim_b -> dim_c).""" + import tempfile + + lkml_content = """ +view: recursive_test { + sql_table_name: t ;; + + dimension: base_amount { type: number sql: ${TABLE}.amount ;; } + dimension: doubled { type: number sql: ${base_amount} * 2 ;; } + dimension: quadrupled { type: number sql: ${doubled} * 2 ;; } + + measure: sum_quad { type: sum sql: ${quadrupled} ;; } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("recursive_test") + + # All dimensions should resolve back to {model}.amount + assert "{model}.amount" in model.get_dimension("base_amount").sql + assert "{model}.amount" in model.get_dimension("doubled").sql + assert "{model}.amount" in model.get_dimension("quadrupled").sql + assert "{model}.amount" in model.get_metric("sum_quad").sql + + +def test_lookml_special_characters_in_sql(): + """Test SQL with special characters (quotes, brackets, backticks).""" + import tempfile + + lkml_content = """ +view: special_chars { + sql_table_name: "schema"."table" ;; + + dimension: quoted_col { type: string sql: ${TABLE}."column name" ;; } + dimension: escaped_quote { type: string sql: CONCAT(${TABLE}.name, '''s value') ;; } + + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("special_chars") + + assert model.get_dimension("quoted_col") is not None + # Escaped quotes should be preserved + assert "'''" in model.get_dimension("escaped_quote").sql + + +def test_lookml_window_functions(): + """Test parsing window functions in dimensions.""" + import tempfile + + lkml_content = """ +view: window_funcs { + sql_table_name: orders ;; + + dimension: id { type: number sql: ${TABLE}.id ;; primary_key: yes } + dimension: customer_id { type: number sql: ${TABLE}.customer_id ;; } + dimension: amount { type: number sql: ${TABLE}.amount ;; } + + dimension: customer_order_rank { + type: number + sql: ROW_NUMBER() OVER (PARTITION BY ${customer_id} ORDER BY ${TABLE}.created_at) ;; + } + + dimension: running_total { + type: number + sql: SUM(${amount}) OVER (PARTITION BY ${customer_id} ORDER BY ${TABLE}.created_at) ;; + } + + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("window_funcs") + + rank_dim = model.get_dimension("customer_order_rank") + assert rank_dim is not None + assert "ROW_NUMBER" in rank_dim.sql + assert "OVER" in rank_dim.sql + + running_dim = model.get_dimension("running_total") + assert running_dim is not None + assert "SUM" in running_dim.sql + assert "OVER" in running_dim.sql + + +def test_lookml_complex_measure_expressions(): + """Test complex derived measure SQL expressions.""" + import tempfile + + lkml_content = """ +view: complex_measures { + sql_table_name: metrics ;; + + dimension: revenue { type: number sql: ${TABLE}.revenue ;; } + dimension: cost { type: number sql: ${TABLE}.cost ;; } + dimension: units { type: number sql: ${TABLE}.units ;; } + + measure: count { type: count } + measure: total_revenue { type: sum sql: ${revenue} ;; } + measure: total_cost { type: sum sql: ${cost} ;; } + measure: total_units { type: sum sql: ${units} ;; } + + measure: margin_pct { + type: number + sql: 100.0 * (${total_revenue} - ${total_cost}) / NULLIF(${total_revenue}, 0) ;; + } + + measure: revenue_per_unit { + type: number + sql: CASE WHEN ${total_units} > 0 THEN ${total_revenue} / ${total_units} ELSE 0 END ;; + } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("complex_measures") + + margin = model.get_metric("margin_pct") + assert margin is not None + assert margin.type == "derived" + assert "NULLIF" in margin.sql + + rpu = model.get_metric("revenue_per_unit") + assert rpu is not None + assert "CASE" in rpu.sql + + +def test_lookml_nested_ctes(): + """Test parsing derived tables with nested CTEs.""" + import tempfile + + lkml_content = """ +view: nested_ctes { + derived_table: { + sql: + WITH daily AS ( + SELECT date, SUM(amount) as daily_total + FROM orders + GROUP BY date + ), + weekly AS ( + SELECT DATE_TRUNC('week', date) as week, SUM(daily_total) as weekly_total + FROM daily + GROUP BY 1 + ) + SELECT * FROM weekly + ;; + } + + dimension: week { type: date sql: ${TABLE}.week ;; } + dimension: weekly_total { type: number sql: ${TABLE}.weekly_total ;; } + + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("nested_ctes") + + assert model.table is None + assert model.sql is not None + assert "WITH" in model.sql + assert "daily" in model.sql + assert "weekly" in model.sql + + +def test_lookml_circular_reference_no_crash(): + """Test that circular dimension references don't crash the parser.""" + import tempfile + + lkml_content = """ +view: circular { + sql_table_name: t ;; + + dimension: dim_a { type: number sql: ${dim_b} + 1 ;; } + dimension: dim_b { type: number sql: ${dim_a} + 1 ;; } + + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + # Should not crash, even with circular references + graph = adapter.parse(Path(f.name)) + model = graph.get_model("circular") + + assert model is not None + assert model.get_dimension("dim_a") is not None + assert model.get_dimension("dim_b") is not None + + +def test_lookml_empty_view(): + """Test parsing empty/minimal views.""" + import tempfile + + lkml_content = """ +view: empty_view { + sql_table_name: empty ;; +} + +view: minimal_view { + sql_table_name: minimal ;; + dimension: id { sql: ${TABLE}.id ;; } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + + empty = graph.get_model("empty_view") + assert empty is not None + assert len(empty.dimensions) == 0 + + minimal = graph.get_model("minimal_view") + assert minimal is not None + assert len(minimal.dimensions) == 1 + + +def test_lookml_many_dimensions(): + """Test parsing views with many dimensions (stress test).""" + import tempfile + + lkml_content = "view: many_dims {\n sql_table_name: big_table ;;\n\n" + for i in range(50): + lkml_content += f" dimension: dim_{i} {{ type: number sql: ${{TABLE}}.col_{i} ;; }}\n" + lkml_content += "\n measure: count { type: count }\n measure: total { type: sum sql: ${dim_0} ;; }\n}\n" + + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("many_dims") + + assert len(model.dimensions) == 50 + assert "{model}.col_0" in model.get_dimension("dim_0").sql + assert "{model}.col_49" in model.get_dimension("dim_49").sql + + +def test_lookml_json_struct_access(): + """Test parsing JSON and struct field access patterns.""" + import tempfile + + lkml_content = """ +view: json_data { + sql_table_name: events ;; + + dimension: id { type: number sql: ${TABLE}.id ;; } + + dimension: bq_json_value { + type: string + sql: JSON_VALUE(${TABLE}.data, '$.user.name') ;; + } + + dimension: pg_json_value { + type: string + sql: ${TABLE}.data->>'user'->>'name' ;; + } + + dimension: struct_field { + type: string + sql: ${TABLE}.nested.field.value ;; + } + + dimension: array_access { + type: string + sql: ${TABLE}.items[OFFSET(0)] ;; + } + + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("json_data") + + assert "JSON_VALUE" in model.get_dimension("bq_json_value").sql + assert "->>" in model.get_dimension("pg_json_value").sql + assert "nested.field.value" in model.get_dimension("struct_field").sql + assert "OFFSET" in model.get_dimension("array_access").sql + + +def test_lookml_regex_in_sql(): + """Test parsing SQL with regex functions.""" + import tempfile + + lkml_content = """ +view: regex_view { + sql_table_name: logs ;; + + dimension: id { type: number sql: ${TABLE}.id ;; } + + dimension: extracted_email { + type: string + sql: REGEXP_EXTRACT(${TABLE}.text, r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+') ;; + } + + dimension: has_phone { + type: yesno + sql: REGEXP_CONTAINS(${TABLE}.text, r'\\d{3}-\\d{3}-\\d{4}') ;; + } + + measure: count { type: count } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("regex_view") + + assert model.get_dimension("extracted_email") is not None + assert "REGEXP_EXTRACT" in model.get_dimension("extracted_email").sql + + +def test_lookml_hidden_fields(): + """Test parsing hidden dimensions and measures.""" + import tempfile + + lkml_content = """ +view: hidden_test { + sql_table_name: data ;; + + dimension: visible_id { + type: number + sql: ${TABLE}.id ;; + primary_key: yes + } + + dimension: hidden_internal_id { + type: number + sql: ${TABLE}.internal_id ;; + hidden: yes + } + + measure: visible_count { type: count } + + measure: hidden_sum { + type: sum + sql: ${TABLE}.amount ;; + hidden: yes + } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("hidden_test") + + # All fields should be parsed, hidden or not + assert model.get_dimension("visible_id") is not None + assert model.get_dimension("hidden_internal_id") is not None + assert model.get_metric("visible_count") is not None + assert model.get_metric("hidden_sum") is not None + + +def test_lookml_date_range_filters(): + """Test parsing Looker date range filter syntax.""" + import tempfile + + lkml_content = """ +view: date_filters { + sql_table_name: events ;; + + dimension: id { type: number sql: ${TABLE}.id ;; } + + dimension_group: created { + type: time + timeframes: [date, week, month] + sql: ${TABLE}.created_at ;; + } + + measure: last_30_days { + type: count + filters: [created_date: "last 30 days"] + } + + measure: this_year { + type: count + filters: [created_date: "this year"] + } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("date_filters") + + # Date filters are preserved as string literals (Looker-specific runtime syntax) + last_30 = model.get_metric("last_30_days") + assert last_30 is not None + assert last_30.filters is not None + # The filter value should be preserved + assert any("30" in f for f in last_30.filters) + + +def test_lookml_block_style_filters(): + """Test parsing block-style filter syntax: filters: { field: x value: y }.""" + import tempfile + + lkml_content = """ +view: block_filters { + sql_table_name: flights ;; + + dimension: flight_length { type: number sql: ${TABLE}.flight_length ;; } + dimension: is_delayed { type: yesno sql: ${TABLE}.is_delayed ;; } + + measure: count { type: count } + + measure: long_flights { + type: count + filters: { + field: flight_length + value: ">120" + } + } + + measure: delayed_flights { + type: count + filters: { + field: is_delayed + value: "yes" + } + } + + measure: long_delayed_flights { + type: count + filters: { + field: flight_length + value: ">120" + } + filters: { + field: is_delayed + value: "yes" + } + } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("block_filters") + + # Check long_flights filter + long_flights = model.get_metric("long_flights") + assert long_flights is not None + assert long_flights.filters is not None + assert len(long_flights.filters) == 1 + assert "> 120" in long_flights.filters[0] or ">120" in long_flights.filters[0] + + # Check delayed_flights filter + delayed = model.get_metric("delayed_flights") + assert delayed is not None + assert delayed.filters is not None + assert "true" in delayed.filters[0].lower() + + # Check long_delayed_flights has both filters + long_delayed = model.get_metric("long_delayed_flights") + assert long_delayed is not None + assert long_delayed.filters is not None + assert len(long_delayed.filters) == 2 + + +def test_lookml_placeholder_measure_skipped(): + """Test that placeholder measures (type: number with no SQL) are skipped.""" + import tempfile + + lkml_content = """ +view: template_view { + extension: required + + measure: placeholder_measure { + type: number + hidden: yes + } + + measure: real_derived_measure { + type: number + sql: ${some_field} * 2 ;; + } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + graph = adapter.parse(Path(f.name)) + model = graph.get_model("template_view") + + # Placeholder measure should be skipped + assert model.get_metric("placeholder_measure") is None + + # Real derived measure should exist + assert model.get_metric("real_derived_measure") is not None + + +def test_lookml_duplicate_refinement_skipped(): + """Test that duplicate refinements (+view) are skipped.""" + import tempfile + + lkml_content = """ +view: base_view { + sql_table_name: t ;; + dimension: id { type: number sql: ${TABLE}.id ;; } + measure: count { type: count } +} + +view: +base_view { + dimension: new_field_1 { type: string sql: ${TABLE}.field1 ;; } +} + +view: +base_view { + dimension: new_field_2 { type: string sql: ${TABLE}.field2 ;; } +} +""" + adapter = LookMLAdapter() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".lkml", delete=False) as f: + f.write(lkml_content) + f.flush() + + # Should not raise an error + graph = adapter.parse(Path(f.name)) + + # Both base_view and first +base_view should exist + assert "base_view" in graph.models + assert "+base_view" in graph.models + + # Only the first refinement is kept + refinement = graph.get_model("+base_view") + assert refinement.get_dimension("new_field_1") is not None + # Second refinement's fields are not added (skipped) + assert refinement.get_dimension("new_field_2") is None + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/fixtures/lookml/edge_cases_actions.lkml b/tests/fixtures/lookml/edge_cases_actions.lkml new file mode 100644 index 00000000..f023a8f5 --- /dev/null +++ b/tests/fixtures/lookml/edge_cases_actions.lkml @@ -0,0 +1,411 @@ +# Edge Cases: Links, Actions, and Drill Fields +# Tests interactive features that are common in real Looker deployments +# NOTE: Some action/form features simplified for parser compatibility + +view: interactive_orders { + sql_table_name: analytics.orders ;; + description: "Orders with links, actions, and drill fields" + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + # Link to external system + link: { + label: "View in Admin" + url: "https://admin.example.com/orders/{{ value }}" + icon_url: "https://example.com/favicon.ico" + } + } + + dimension: customer_id { + type: number + sql: ${TABLE}.customer_id ;; + link: { + label: "View Customer Profile" + url: "/dashboards/123?customer_id={{ value }}" + } + } + + dimension: payment_id { + type: string + sql: ${TABLE}.payment_id ;; + } + + dimension: email { + type: string + sql: ${TABLE}.email ;; + } + + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + value_format_name: usd + # HTML formatting + html: + {{ rendered_value }} + ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + html: + {{ value }} + ;; + } + + dimension: region { + type: string + sql: ${TABLE}.region ;; + # Drill to regional dashboard + link: { + label: "View Regional Dashboard" + url: "/dashboards/456?region={{ value }}" + } + } + + dimension: channel { + type: string + sql: ${TABLE}.channel ;; + } + + dimension_group: created { + type: time + timeframes: [raw, time, date, week, month, quarter, year] + sql: ${TABLE}.created_at ;; + } + + # Define sets for drill fields + set: order_details { + fields: [id, customer_id, email, amount, status, created_date] + } + + set: customer_info { + fields: [customer_id, email, region] + } + + set: revenue_fields { + fields: [amount, total_revenue, avg_order_value] + } + + measure: count { + type: count + description: "Total orders" + # Drill into specific fields on click + drill_fields: [order_details*] + } + + measure: total_revenue { + type: sum + sql: ${amount} ;; + value_format_name: usd + description: "Total revenue" + drill_fields: [order_details*, region, channel] + # Link to revenue analysis + link: { + label: "Revenue Breakdown" + url: "/dashboards/revenue_analysis?total={{ value }}" + } + } + + measure: avg_order_value { + type: average + sql: ${amount} ;; + value_format_name: usd + drill_fields: [order_details*] + } + + measure: unique_customers { + type: count_distinct + sql: ${customer_id} ;; + drill_fields: [customer_info*] + } + + measure: completed_orders { + type: count + filters: [status: "completed"] + drill_fields: [order_details*] + } + + measure: completion_rate { + type: number + sql: 1.0 * ${completed_orders} / NULLIF(${count}, 0) ;; + value_format_name: percent_1 + drill_fields: [status, count, completed_orders] + } + + # Filtered measures for different statuses + measure: pending_orders { + type: count + filters: [status: "pending"] + } + + measure: cancelled_orders { + type: count + filters: [status: "cancelled"] + } + + measure: pending_revenue { + type: sum + sql: ${amount} ;; + filters: [status: "pending"] + } + + # Measures by channel + measure: web_orders { + type: count + filters: [channel: "web"] + } + + measure: mobile_orders { + type: count + filters: [channel: "mobile"] + } + + measure: store_orders { + type: count + filters: [channel: "store"] + } + + # Derived measures + measure: web_share { + type: number + sql: 1.0 * ${web_orders} / NULLIF(${count}, 0) ;; + value_format_name: percent_1 + } + + measure: mobile_share { + type: number + sql: 1.0 * ${mobile_orders} / NULLIF(${count}, 0) ;; + value_format_name: percent_1 + } + + # Segments + filter: high_value { + sql: ${TABLE}.amount >= 500 ;; + description: "High value orders" + } + + filter: recent_orders { + sql: ${TABLE}.created_at >= DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY) ;; + description: "Orders in last 30 days" + } +} + +# View demonstrating links on multiple dimensions +view: linked_products { + sql_table_name: analytics.products ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + link: { + label: "Product Detail" + url: "/products/{{ value }}" + } + link: { + label: "Edit Product" + url: "/admin/products/{{ value }}/edit" + } + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + link: { + label: "Search Google" + url: "https://www.google.com/search?q={{ value }}" + } + } + + dimension: category { + type: string + sql: ${TABLE}.category ;; + link: { + label: "Category Dashboard" + url: "/dashboards/category?name={{ value }}" + } + } + + dimension: brand { + type: string + sql: ${TABLE}.brand ;; + } + + dimension: price { + type: number + sql: ${TABLE}.price ;; + value_format_name: usd + } + + dimension: cost { + type: number + sql: ${TABLE}.cost ;; + value_format_name: usd + } + + dimension: sku { + type: string + sql: ${TABLE}.sku ;; + } + + dimension: is_active { + type: yesno + sql: ${TABLE}.is_active ;; + } + + dimension_group: created { + type: time + timeframes: [date, week, month, year] + sql: ${TABLE}.created_at ;; + } + + set: product_detail { + fields: [id, name, category, brand, price, sku] + } + + measure: count { + type: count + drill_fields: [product_detail*] + } + + measure: active_products { + type: count + filters: [is_active: "yes"] + drill_fields: [product_detail*] + } + + measure: avg_price { + type: average + sql: ${price} ;; + value_format_name: usd + } + + measure: total_value { + type: sum + sql: ${price} ;; + value_format_name: usd + } + + measure: avg_margin { + type: number + sql: AVG(${price} - ${cost}) ;; + value_format_name: usd + } + + measure: margin_pct { + type: number + sql: 100.0 * (SUM(${price}) - SUM(${cost})) / NULLIF(SUM(${price}), 0) ;; + value_format: "0.0\%" + } +} + +# View with extensive drill fields +view: drill_heavy_view { + sql_table_name: analytics.events ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: event_type { + type: string + sql: ${TABLE}.event_type ;; + } + + dimension: user_id { + type: number + sql: ${TABLE}.user_id ;; + } + + dimension: session_id { + type: string + sql: ${TABLE}.session_id ;; + } + + dimension: page_url { + type: string + sql: ${TABLE}.page_url ;; + } + + dimension: referrer { + type: string + sql: ${TABLE}.referrer ;; + } + + dimension: browser { + type: string + sql: ${TABLE}.browser ;; + } + + dimension: device { + type: string + sql: ${TABLE}.device ;; + } + + dimension: country { + type: string + sql: ${TABLE}.country ;; + } + + dimension_group: created { + type: time + timeframes: [raw, time, date, hour, week, month] + sql: ${TABLE}.created_at ;; + } + + set: event_details { + fields: [id, event_type, user_id, session_id, created_time] + } + + set: user_context { + fields: [user_id, browser, device, country] + } + + set: page_context { + fields: [page_url, referrer] + } + + measure: count { + type: count + drill_fields: [event_details*] + } + + measure: unique_users { + type: count_distinct + sql: ${user_id} ;; + drill_fields: [user_context*] + } + + measure: unique_sessions { + type: count_distinct + sql: ${session_id} ;; + drill_fields: [event_details*] + } + + measure: pageviews { + type: count + filters: [event_type: "pageview"] + drill_fields: [page_context*, created_time] + } + + measure: clicks { + type: count + filters: [event_type: "click"] + } + + measure: conversions { + type: count + filters: [event_type: "conversion"] + } + + measure: bounce_rate { + type: number + sql: 100.0 * COUNT(CASE WHEN ${event_type} = 'bounce' THEN 1 END) / NULLIF(${unique_sessions}, 0) ;; + value_format: "0.0\%" + } +} diff --git a/tests/fixtures/lookml/edge_cases_explores.lkml b/tests/fixtures/lookml/edge_cases_explores.lkml new file mode 100644 index 00000000..6b7f6efa --- /dev/null +++ b/tests/fixtures/lookml/edge_cases_explores.lkml @@ -0,0 +1,489 @@ +# Edge Cases: Complex Explore Patterns +# Tests various join types, sql_always_where, access filters, and other explore features + +# Base views for explore testing +view: fact_orders { + sql_table_name: analytics.orders ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: customer_id { + type: number + sql: ${TABLE}.customer_id ;; + } + + dimension: product_id { + type: number + sql: ${TABLE}.product_id ;; + } + + dimension: store_id { + type: number + sql: ${TABLE}.store_id ;; + } + + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + } + + dimension: channel { + type: string + sql: ${TABLE}.channel ;; + } + + dimension_group: created { + type: time + timeframes: [raw, time, date, week, month, quarter, year] + sql: ${TABLE}.created_at ;; + } + + measure: count { + type: count + } + + measure: total_amount { + type: sum + sql: ${amount} ;; + } + + measure: avg_amount { + type: average + sql: ${amount} ;; + } + + measure: unique_customers { + type: count_distinct + sql: ${customer_id} ;; + } +} + +view: dim_customers { + sql_table_name: analytics.customers ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + } + + dimension: email { + type: string + sql: ${TABLE}.email ;; + } + + dimension: tier { + type: string + sql: ${TABLE}.tier ;; + } + + dimension: region_id { + type: number + sql: ${TABLE}.region_id ;; + } + + dimension: account_manager_id { + type: number + sql: ${TABLE}.account_manager_id ;; + } + + dimension_group: registered { + type: time + timeframes: [date, week, month, year] + sql: ${TABLE}.registered_at ;; + } + + measure: count { + type: count + } +} + +view: dim_products { + sql_table_name: analytics.products ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + } + + dimension: category { + type: string + sql: ${TABLE}.category ;; + } + + dimension: brand { + type: string + sql: ${TABLE}.brand ;; + } + + dimension: price { + type: number + sql: ${TABLE}.price ;; + } + + measure: count { + type: count + } +} + +view: dim_stores { + sql_table_name: analytics.stores ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + } + + dimension: city { + type: string + sql: ${TABLE}.city ;; + } + + dimension: region_id { + type: number + sql: ${TABLE}.region_id ;; + } + + measure: count { + type: count + } +} + +view: dim_regions { + sql_table_name: analytics.regions ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + } + + dimension: country { + type: string + sql: ${TABLE}.country ;; + } + + measure: count { + type: count + } +} + +view: dim_account_managers { + sql_table_name: analytics.account_managers ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + } + + dimension: email { + type: string + sql: ${TABLE}.email ;; + } + + dimension: team { + type: string + sql: ${TABLE}.team ;; + } + + measure: count { + type: count + } +} + +# Complex explore with multiple join types +explore: orders { + from: fact_orders + label: "Orders Analysis" + description: "Main orders explore with all dimensions" + group_label: "Sales" + + # Always filter to active orders + sql_always_where: ${fact_orders.status} != 'deleted' ;; + + # Required filter + always_filter: { + filters: [fact_orders.created_date: "last 365 days"] + } + + # Conditional filter + conditionally_filter: { + filters: [fact_orders.channel: "web"] + unless: [fact_orders.store_id] + } + + # Access filter for row-level security + access_filter: { + field: dim_regions.name + user_attribute: allowed_regions + } + + # Many-to-one join + join: dim_customers { + type: left_outer + relationship: many_to_one + sql_on: ${fact_orders.customer_id} = ${dim_customers.id} ;; + } + + # Another many-to-one + join: dim_products { + type: left_outer + relationship: many_to_one + sql_on: ${fact_orders.product_id} = ${dim_products.id} ;; + } + + # Join with foreign_key shorthand + join: dim_stores { + type: left_outer + relationship: many_to_one + foreign_key: fact_orders.store_id + } + + # Chained join (through customers) + join: customer_region { + from: dim_regions + type: left_outer + relationship: many_to_one + sql_on: ${dim_customers.region_id} = ${customer_region.id} ;; + } + + # Another chained join (through stores) + join: store_region { + from: dim_regions + type: left_outer + relationship: many_to_one + sql_on: ${dim_stores.region_id} = ${store_region.id} ;; + } + + # Join with account managers + join: dim_account_managers { + type: left_outer + relationship: many_to_one + sql_on: ${dim_customers.account_manager_id} = ${dim_account_managers.id} ;; + } +} + +# Explore with one-to-many join +explore: customers { + from: dim_customers + label: "Customer Analysis" + group_label: "Sales" + + # One-to-many join + join: customer_orders { + from: fact_orders + type: left_outer + relationship: one_to_many + sql_on: ${dim_customers.id} = ${customer_orders.customer_id} ;; + } + + join: dim_regions { + type: left_outer + relationship: many_to_one + sql_on: ${dim_customers.region_id} = ${dim_regions.id} ;; + } +} + +# Explore with inner join +explore: completed_orders { + from: fact_orders + label: "Completed Orders Only" + group_label: "Sales" + + # Pre-filter to completed only + sql_always_where: ${fact_orders.status} = 'completed' ;; + + # Inner join - only orders with customers + join: dim_customers { + type: inner + relationship: many_to_one + sql_on: ${fact_orders.customer_id} = ${dim_customers.id} ;; + } + + # Inner join - only orders with products + join: dim_products { + type: inner + relationship: many_to_one + sql_on: ${fact_orders.product_id} = ${dim_products.id} ;; + } +} + +# Explore with full outer join +explore: all_customers_orders { + from: dim_customers + label: "All Customers and Orders" + group_label: "Sales" + + join: fact_orders { + type: full_outer + relationship: one_to_many + sql_on: ${dim_customers.id} = ${fact_orders.customer_id} ;; + } +} + +# Explore with cross join (cartesian) +explore: date_product_matrix { + from: dim_products + label: "Date-Product Matrix" + group_label: "Planning" + + # Note: cross joins create cartesian product, use carefully + join: date_spine { + type: cross + relationship: many_to_many + } +} + +# Date spine view for cross join example +view: date_spine { + derived_table: { + sql: + SELECT date + FROM UNNEST(GENERATE_DATE_ARRAY( + DATE_SUB(CURRENT_DATE(), INTERVAL 365 DAY), + CURRENT_DATE() + )) AS date + ;; + } + + dimension: date { + type: date + primary_key: yes + sql: ${TABLE}.date ;; + } + + dimension_group: report { + type: time + timeframes: [date, week, month, quarter, year] + sql: ${date} ;; + } +} + +# Explore with sql_always_having +explore: high_volume_products { + from: dim_products + label: "High Volume Products" + group_label: "Products" + + sql_always_having: ${product_orders.count} > 10 ;; + + join: product_orders { + from: fact_orders + type: left_outer + relationship: one_to_many + sql_on: ${dim_products.id} = ${product_orders.product_id} ;; + } +} + +# Explore with required access grants +explore: sensitive_orders { + from: fact_orders + label: "Sensitive Order Data" + group_label: "Admin" + + required_access_grants: [can_view_sensitive_data] + + join: dim_customers { + type: left_outer + relationship: many_to_one + sql_on: ${fact_orders.customer_id} = ${dim_customers.id} ;; + } +} + +# Explore with persisted derived table base +explore: order_metrics { + from: order_daily_metrics + label: "Order Metrics" + group_label: "Metrics" + persist_with: daily_datagroup +} + +view: order_daily_metrics { + derived_table: { + sql: + SELECT + DATE(created_at) AS order_date, + COUNT(*) AS order_count, + SUM(amount) AS total_revenue, + COUNT(DISTINCT customer_id) AS unique_customers + FROM analytics.orders + WHERE status != 'deleted' + GROUP BY 1 + ;; + datagroup_trigger: daily_datagroup + indexes: ["order_date"] + } + + dimension: order_date { + type: date + primary_key: yes + sql: ${TABLE}.order_date ;; + } + + dimension: order_count { + type: number + sql: ${TABLE}.order_count ;; + } + + dimension: total_revenue { + type: number + sql: ${TABLE}.total_revenue ;; + } + + dimension: unique_customers { + type: number + sql: ${TABLE}.unique_customers ;; + } + + measure: sum_orders { + type: sum + sql: ${order_count} ;; + } + + measure: sum_revenue { + type: sum + sql: ${total_revenue} ;; + } + + measure: avg_revenue_per_day { + type: average + sql: ${total_revenue} ;; + } +} diff --git a/tests/fixtures/lookml/edge_cases_extends.lkml b/tests/fixtures/lookml/edge_cases_extends.lkml new file mode 100644 index 00000000..ff048c4d --- /dev/null +++ b/tests/fixtures/lookml/edge_cases_extends.lkml @@ -0,0 +1,169 @@ +# Edge Cases: View Extends and Refinements +# Tests LookML inheritance patterns that parsers commonly struggle with + +# Base view for inheritance testing +view: base_entity { + sql_table_name: analytics.entities ;; + description: "Base entity with common fields" + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: name { + type: string + sql: ${TABLE}.name ;; + } + + dimension: is_active { + type: yesno + sql: ${TABLE}.is_active ;; + } + + dimension_group: created { + type: time + timeframes: [date, week, month, year] + sql: ${TABLE}.created_at ;; + } + + measure: count { + type: count + description: "Total count" + } +} + +# View that extends base_entity +view: customers_extended { + extends: [base_entity] + sql_table_name: analytics.customers ;; + description: "Customer entity extending base" + + # Override the name dimension from base + dimension: name { + label: "Customer Name" + sql: CONCAT(${TABLE}.first_name, ' ', ${TABLE}.last_name) ;; + } + + # Add new customer-specific dimensions + dimension: email { + type: string + sql: ${TABLE}.email ;; + } + + dimension: tier { + type: string + sql: ${TABLE}.tier ;; + suggestions: ["bronze", "silver", "gold", "platinum"] + } + + dimension: lifetime_value { + type: number + sql: ${TABLE}.ltv ;; + value_format_name: usd + } + + # Add new measures + measure: total_ltv { + type: sum + sql: ${lifetime_value} ;; + value_format_name: usd + } + + measure: avg_ltv { + type: average + sql: ${lifetime_value} ;; + value_format_name: usd + } +} + +# Refinement syntax (LookML plus notation) +view: +base_entity { + # Refinements add to the base view without extending + dimension: refined_field { + type: string + sql: ${TABLE}.refined_field ;; + description: "Added via refinement" + } +} + +# View with multiple extends +view: multi_extend_view { + extends: [base_entity] + sql_table_name: analytics.multi_entities ;; + + dimension: extra_id { + type: string + sql: ${TABLE}.external_id ;; + description: "External system identifier" + } + + dimension: metadata { + type: string + sql: ${TABLE}.metadata ;; + } + + measure: unique_external_ids { + type: count_distinct + sql: ${extra_id} ;; + } +} + +# Abstract view (meant only to be extended, not used directly) +view: abstract_metrics { + extension: required + description: "Abstract view containing reusable metric definitions" + + measure: record_count { + type: count + } + + measure: sum_amount { + type: sum + sql: ${TABLE}.amount ;; + } + + measure: avg_amount { + type: average + sql: ${TABLE}.amount ;; + } + + measure: min_amount { + type: min + sql: ${TABLE}.amount ;; + } + + measure: max_amount { + type: max + sql: ${TABLE}.amount ;; + } +} + +# Concrete view extending abstract +view: transactions { + extends: [abstract_metrics] + sql_table_name: analytics.transactions ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + } + + dimension_group: transaction { + type: time + timeframes: [time, date, week, month, quarter, year] + sql: ${TABLE}.transaction_at ;; + } +} diff --git a/tests/fixtures/lookml/edge_cases_filters.lkml b/tests/fixtures/lookml/edge_cases_filters.lkml new file mode 100644 index 00000000..25ca3590 --- /dev/null +++ b/tests/fixtures/lookml/edge_cases_filters.lkml @@ -0,0 +1,310 @@ +# Edge Cases: Complex Filter Syntax +# Tests various filter patterns that are common in real LookML + +view: filter_edge_cases { + sql_table_name: analytics.transactions ;; + description: "Tests complex filter syntax patterns" + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + + dimension: quantity { + type: number + sql: ${TABLE}.quantity ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + } + + dimension: category { + type: string + sql: ${TABLE}.category ;; + } + + dimension: region { + type: string + sql: ${TABLE}.region ;; + } + + dimension: is_premium { + type: yesno + sql: ${TABLE}.is_premium ;; + } + + dimension: discount_pct { + type: number + sql: ${TABLE}.discount_pct ;; + } + + dimension_group: created { + type: time + timeframes: [raw, time, date, week, month, quarter, year] + sql: ${TABLE}.created_at ;; + } + + # Basic measures + measure: count { + type: count + } + + measure: total_amount { + type: sum + sql: ${amount} ;; + } + + measure: total_quantity { + type: sum + sql: ${quantity} ;; + } + + # Numeric comparison filters + measure: high_value_count { + type: count + filters: [amount: ">1000"] + description: "Count where amount > 1000" + } + + measure: low_value_count { + type: count + filters: [amount: "<100"] + description: "Count where amount < 100" + } + + measure: mid_range_count { + type: count + filters: [amount: ">=100", amount: "<=1000"] + description: "Count where 100 <= amount <= 1000" + } + + measure: non_zero_count { + type: count + filters: [amount: "!=0"] + description: "Count where amount is not zero" + } + + measure: not_null_count { + type: count + filters: [amount: "-NULL"] + description: "Count where amount is not null" + } + + # String filters + measure: completed_count { + type: count + filters: [status: "completed"] + } + + measure: not_cancelled_count { + type: count + filters: [status: "-cancelled"] + description: "Count where status is not cancelled" + } + + measure: pending_or_processing_count { + type: count + filters: [status: "pending,processing"] + description: "Count where status is pending OR processing" + } + + measure: specific_categories_count { + type: count + filters: [category: "electronics,clothing,home"] + description: "Count for specific categories" + } + + # Wildcard/pattern filters + measure: a_region_count { + type: count + filters: [region: "A%"] + description: "Count where region starts with A" + } + + measure: contains_west_count { + type: count + filters: [region: "%west%"] + description: "Count where region contains 'west'" + } + + # Boolean filters + measure: premium_count { + type: count + filters: [is_premium: "yes"] + } + + measure: non_premium_count { + type: count + filters: [is_premium: "no"] + } + + # Multiple filters (AND condition) + measure: high_value_premium { + type: sum + sql: ${amount} ;; + filters: [amount: ">500", is_premium: "yes"] + description: "Premium transactions over $500" + } + + measure: completed_electronics { + type: count + filters: [status: "completed", category: "electronics"] + } + + measure: q1_large_orders { + type: count + filters: [created_quarter: "Q1", amount: ">1000"] + } + + # Numeric range patterns + measure: discount_applied { + type: count + filters: [discount_pct: ">0"] + description: "Transactions with discount" + } + + measure: full_price { + type: count + filters: [discount_pct: "0"] + description: "Full price transactions" + } + + measure: heavy_discount { + type: count + filters: [discount_pct: ">=20"] + description: "20%+ discount transactions" + } + + # Complex derived measures with filter logic + measure: premium_conversion_rate { + type: number + sql: 1.0 * ${premium_count} / NULLIF(${count}, 0) ;; + description: "Percentage of premium transactions" + } + + measure: completion_rate { + type: number + sql: 1.0 * ${completed_count} / NULLIF(${count}, 0) ;; + description: "Order completion rate" + } + + measure: high_value_share { + type: number + sql: 1.0 * ${high_value_count} / NULLIF(${count}, 0) ;; + description: "Share of high-value transactions" + } + + # Filters with negative operators + measure: excluding_cancelled_amount { + type: sum + sql: ${amount} ;; + filters: [status: "-cancelled,-refunded"] + description: "Amount excluding cancelled and refunded" + } + + # Time-based filters (using dimension values) + measure: recent_transactions { + type: count + filters: [created_date: "last 30 days"] + description: "Transactions in last 30 days" + } + + measure: ytd_transactions { + type: count + filters: [created_date: "this year"] + description: "Year to date transactions" + } + + # Segments + filter: high_value { + sql: ${TABLE}.amount >= 1000 ;; + } + + filter: premium_segment { + sql: ${TABLE}.is_premium = TRUE ;; + } + + filter: active_period { + sql: ${TABLE}.created_at >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) ;; + } + + filter: successful_transactions { + sql: ${TABLE}.status IN ('completed', 'shipped', 'delivered') ;; + } +} + +# View testing special filter edge cases +view: special_filter_cases { + sql_table_name: analytics.edge_data ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: value { + type: number + sql: ${TABLE}.value ;; + } + + dimension: text_field { + type: string + sql: ${TABLE}.text_field ;; + } + + dimension: nullable_field { + type: string + sql: ${TABLE}.nullable_field ;; + } + + # Null handling + measure: null_values { + type: count + filters: [nullable_field: "NULL"] + description: "Count of null values" + } + + measure: not_null_values { + type: count + filters: [nullable_field: "-NULL"] + description: "Count of non-null values" + } + + # Empty string handling + measure: empty_string_count { + type: count + filters: [text_field: "EMPTY"] + description: "Count of empty strings" + } + + measure: not_empty_count { + type: count + filters: [text_field: "-EMPTY"] + description: "Count of non-empty strings" + } + + # Special characters in filter values + measure: contains_special { + type: count + filters: [text_field: "%@%"] + description: "Contains @ symbol" + } + + measure: count { + type: count + } + + measure: total_value { + type: sum + sql: ${value} ;; + } +} diff --git a/tests/fixtures/lookml/edge_cases_liquid.lkml b/tests/fixtures/lookml/edge_cases_liquid.lkml new file mode 100644 index 00000000..d154bd67 --- /dev/null +++ b/tests/fixtures/lookml/edge_cases_liquid.lkml @@ -0,0 +1,268 @@ +# Edge Cases: Liquid Templating +# Tests Liquid syntax that parsers often have trouble with +# NOTE: Some advanced Liquid features are simplified for parser compatibility + +view: dynamic_sales { + # Dynamic sql_table_name with Liquid + sql_table_name: analytics.sales ;; + + description: "Sales with dynamic features" + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + + dimension: currency { + type: string + sql: ${TABLE}.currency ;; + } + + # Liquid in SQL expression (simple form) + dimension: formatted_amount { + type: string + sql: CASE + WHEN ${currency} = 'USD' THEN CONCAT('$', CAST(${amount} AS STRING)) + WHEN ${currency} = 'EUR' THEN CONCAT('E', CAST(${amount} AS STRING)) + ELSE CONCAT(${currency}, ' ', CAST(${amount} AS STRING)) + END ;; + description: "Currency-formatted amount" + } + + # HTML with Liquid variable references + dimension: status { + type: string + sql: ${TABLE}.status ;; + html: {{ rendered_value }} ;; + } + + dimension: region { + type: string + sql: ${TABLE}.region ;; + } + + # Case dimension (CASE WHEN equivalent) + dimension: region_group { + type: string + case: { + when: { + sql: ${region} IN ('US', 'CA', 'MX') ;; + label: "North America" + } + when: { + sql: ${region} IN ('UK', 'DE', 'FR', 'ES', 'IT') ;; + label: "Europe" + } + when: { + sql: ${region} IN ('JP', 'CN', 'KR', 'AU') ;; + label: "Asia Pacific" + } + else: "Other" + } + } + + dimension_group: sale { + type: time + timeframes: [raw, time, date, week, month, quarter, year] + sql: ${TABLE}.sale_at ;; + } + + # Liquid variable references in SQL + dimension: days_since_sale { + type: number + sql: DATE_DIFF(CURRENT_DATE(), ${sale_date}, DAY) ;; + } + + measure: count { + type: count + } + + measure: total_amount { + type: sum + sql: ${amount} ;; + } + + # Measure with value_format + measure: avg_amount { + type: average + sql: ${amount} ;; + value_format: "#,##0.00" + } + + measure: min_amount { + type: min + sql: ${amount} ;; + } + + measure: max_amount { + type: max + sql: ${amount} ;; + } +} + +# View demonstrating templated filters (simplified) +view: templated_orders { + derived_table: { + sql: + SELECT * + FROM analytics.orders + WHERE status != 'deleted' + ;; + } + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + } + + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + + dimension_group: created { + type: time + timeframes: [date, week, month, year] + sql: ${TABLE}.created_at ;; + } + + measure: count { + type: count + } + + measure: total_amount { + type: sum + sql: ${amount} ;; + } + + measure: avg_amount { + type: average + sql: ${amount} ;; + } +} + +# Parameterized view (simplified - parameters as dimensions) +view: parameterized_metrics { + sql_table_name: analytics.metrics ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: revenue { + type: number + sql: ${TABLE}.revenue ;; + } + + dimension: order_count { + type: number + sql: ${TABLE}.order_count ;; + } + + dimension: customer_count { + type: number + sql: ${TABLE}.customer_count ;; + } + + dimension_group: event { + type: time + timeframes: [date, week, month, year] + sql: ${TABLE}.event_date ;; + } + + measure: count { + type: count + } + + measure: total_revenue { + type: sum + sql: ${revenue} ;; + } + + measure: total_orders { + type: sum + sql: ${order_count} ;; + } + + measure: total_customers { + type: sum + sql: ${customer_count} ;; + } + + measure: avg_revenue { + type: average + sql: ${revenue} ;; + } +} + +# View with various value formats +view: format_examples { + sql_table_name: analytics.metrics ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: raw_value { + type: number + sql: ${TABLE}.value ;; + } + + dimension: percentage_value { + type: number + sql: ${TABLE}.percentage ;; + value_format: "0.00\%" + } + + dimension: currency_value { + type: number + sql: ${TABLE}.amount ;; + value_format_name: usd + } + + measure: count { + type: count + } + + measure: sum_value { + type: sum + sql: ${raw_value} ;; + value_format: "#,##0.00" + } + + measure: sum_currency { + type: sum + sql: ${currency_value} ;; + value_format_name: usd + } + + measure: avg_percentage { + type: average + sql: ${percentage_value} ;; + value_format: "0.0\%" + } + + # Conditional value format using SQL CASE + measure: formatted_total { + type: sum + sql: ${raw_value} ;; + value_format: "[>=1000000]0.0,,\"M\";[>=1000]0.0,\"K\";0" + description: "Total with M/K suffixes" + } +} diff --git a/tests/fixtures/lookml/edge_cases_special_types.lkml b/tests/fixtures/lookml/edge_cases_special_types.lkml new file mode 100644 index 00000000..9a05f825 --- /dev/null +++ b/tests/fixtures/lookml/edge_cases_special_types.lkml @@ -0,0 +1,367 @@ +# Edge Cases: Special Dimension Types +# Tests tier, case, location, zipcode, and other special types + +view: special_types { + sql_table_name: analytics.user_data ;; + description: "Tests special dimension types" + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: age { + type: number + sql: ${TABLE}.age ;; + } + + # Tier dimension - automatic bucketing + dimension: age_tier { + type: tier + tiers: [0, 18, 25, 35, 45, 55, 65] + style: integer + sql: ${age} ;; + description: "Age groups" + } + + dimension: income { + type: number + sql: ${TABLE}.income ;; + } + + # Tier with different style + dimension: income_tier { + type: tier + tiers: [0, 25000, 50000, 75000, 100000, 150000, 250000] + style: classic + sql: ${income} ;; + value_format_name: usd_0 + description: "Income brackets" + } + + # Relational tier + dimension: income_tier_relational { + type: tier + tiers: [0, 25000, 50000, 75000, 100000] + style: relational + sql: ${income} ;; + } + + dimension: score { + type: number + sql: ${TABLE}.score ;; + } + + # Tier with interval style + dimension: score_tier { + type: tier + tiers: [0, 20, 40, 60, 80, 100] + style: interval + sql: ${score} ;; + description: "Score ranges" + } + + # Case dimension (similar to CASE WHEN) + dimension: customer_value_segment { + type: string + case: { + when: { + sql: ${income} >= 150000 AND ${score} >= 80 ;; + label: "Premium" + } + when: { + sql: ${income} >= 75000 AND ${score} >= 60 ;; + label: "Standard" + } + when: { + sql: ${income} >= 25000 OR ${score} >= 40 ;; + label: "Basic" + } + else: "New" + } + description: "Customer value segment based on income and score" + } + + # Case with alpha sorting + dimension: priority_segment { + type: string + alpha_sort: yes + case: { + when: { + sql: ${score} >= 90 ;; + label: "A - Critical" + } + when: { + sql: ${score} >= 70 ;; + label: "B - High" + } + when: { + sql: ${score} >= 50 ;; + label: "C - Medium" + } + else: "D - Low" + } + } + + # Geographic dimensions + dimension: latitude { + type: number + sql: ${TABLE}.latitude ;; + hidden: yes + } + + dimension: longitude { + type: number + sql: ${TABLE}.longitude ;; + hidden: yes + } + + # Location type combines lat/long + dimension: location { + type: location + sql_latitude: ${latitude} ;; + sql_longitude: ${longitude} ;; + description: "User location" + } + + # Zipcode type + dimension: zipcode { + type: zipcode + sql: ${TABLE}.zipcode ;; + description: "US ZIP code" + } + + # Bin dimension (for histograms) + dimension: age_bin { + type: bin + bins: [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] + style: classic + sql: ${age} ;; + } + + # Distance dimension + dimension: store_latitude { + type: number + sql: ${TABLE}.store_latitude ;; + hidden: yes + } + + dimension: store_longitude { + type: number + sql: ${TABLE}.store_longitude ;; + hidden: yes + } + + dimension: distance_to_store { + type: distance + start_location_field: location + end_location_field: store_location + units: miles + description: "Distance from user to nearest store" + } + + dimension: store_location { + type: location + sql_latitude: ${store_latitude} ;; + sql_longitude: ${store_longitude} ;; + hidden: yes + } + + # Duration dimension + dimension: session_seconds { + type: number + sql: ${TABLE}.session_duration_seconds ;; + hidden: yes + } + + dimension: session_duration { + type: duration_second + sql: ${session_seconds} ;; + description: "Session duration" + } + + # YesNo with custom labels + dimension: is_active { + type: yesno + sql: ${TABLE}.is_active ;; + } + + dimension: is_verified { + type: yesno + sql: ${TABLE}.verified_at IS NOT NULL ;; + description: "Whether user has been verified" + } + + dimension: has_purchases { + type: yesno + sql: ${TABLE}.purchase_count > 0 ;; + } + + # String with suggestions + dimension: country { + type: string + sql: ${TABLE}.country ;; + suggest_persist_for: "24 hours" + suggestions: ["US", "CA", "UK", "DE", "FR", "AU", "JP"] + } + + # String with suggest dimension + dimension: city { + type: string + sql: ${TABLE}.city ;; + suggest_dimension: country + } + + # String with suggest explore + dimension: product_category { + type: string + sql: ${TABLE}.product_category ;; + suggest_explore: products + suggest_dimension: products.category + } + + dimension_group: created { + type: time + timeframes: [raw, time, date, week, month, quarter, year] + sql: ${TABLE}.created_at ;; + } + + # Duration dimension (time between events) + dimension: time_to_first_purchase { + type: duration_day + sql_start: ${created_raw} ;; + sql_end: ${TABLE}.first_purchase_at ;; + description: "Days from signup to first purchase" + } + + measure: count { + type: count + } + + measure: avg_age { + type: average + sql: ${age} ;; + } + + measure: avg_income { + type: average + sql: ${income} ;; + value_format_name: usd_0 + } + + measure: avg_score { + type: average + sql: ${score} ;; + value_format: "0.00" + } + + measure: active_users { + type: count + filters: [is_active: "yes"] + } + + measure: verified_users { + type: count + filters: [is_verified: "yes"] + } + + measure: verified_rate { + type: number + sql: 1.0 * ${verified_users} / NULLIF(${count}, 0) ;; + value_format_name: percent_1 + } +} + +# View with array and JSON types (BigQuery specific) +view: json_array_types { + sql_table_name: analytics.events ;; + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: event_type { + type: string + sql: ${TABLE}.event_type ;; + } + + # JSON field access + dimension: properties { + type: string + sql: ${TABLE}.properties ;; + hidden: yes + } + + dimension: property_source { + type: string + sql: JSON_EXTRACT_SCALAR(${properties}, '$.source') ;; + } + + dimension: property_campaign { + type: string + sql: JSON_EXTRACT_SCALAR(${properties}, '$.campaign') ;; + } + + dimension: property_value { + type: number + sql: CAST(JSON_EXTRACT_SCALAR(${properties}, '$.value') AS FLOAT64) ;; + } + + # Nested JSON access + dimension: user_agent_browser { + type: string + sql: JSON_EXTRACT_SCALAR(${properties}, '$.user_agent.browser') ;; + } + + dimension: user_agent_os { + type: string + sql: JSON_EXTRACT_SCALAR(${properties}, '$.user_agent.os') ;; + } + + # Array field (BigQuery ARRAY type) + dimension: tags { + type: string + sql: ${TABLE}.tags ;; + hidden: yes + } + + dimension: tag_count { + type: number + sql: ARRAY_LENGTH(${TABLE}.tags) ;; + } + + dimension: first_tag { + type: string + sql: ${TABLE}.tags[SAFE_OFFSET(0)] ;; + } + + dimension_group: created { + type: time + timeframes: [raw, time, date, hour, week, month] + sql: ${TABLE}.created_at ;; + } + + measure: count { + type: count + } + + measure: total_value { + type: sum + sql: ${property_value} ;; + } + + measure: avg_tag_count { + type: average + sql: ${tag_count} ;; + } + + # Array aggregation + measure: all_event_types { + type: string + sql: ARRAY_TO_STRING(ARRAY_AGG(DISTINCT ${event_type} ORDER BY ${event_type}), ', ') ;; + description: "Comma-separated list of event types" + } +} diff --git a/tests/fixtures/lookml/edge_cases_sql.lkml b/tests/fixtures/lookml/edge_cases_sql.lkml new file mode 100644 index 00000000..2a649674 --- /dev/null +++ b/tests/fixtures/lookml/edge_cases_sql.lkml @@ -0,0 +1,373 @@ +# Edge Cases: Complex SQL Patterns +# Tests nested SQL, subqueries, window functions, and cross-view references + +view: complex_sql_view { + sql_table_name: analytics.orders ;; + description: "View with complex SQL patterns" + + dimension: id { + type: number + primary_key: yes + sql: ${TABLE}.id ;; + } + + dimension: customer_id { + type: number + sql: ${TABLE}.customer_id ;; + } + + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + + dimension: status { + type: string + sql: ${TABLE}.status ;; + } + + # Subquery in dimension + dimension: customer_order_rank { + type: number + sql: ( + SELECT COUNT(*) + 1 + FROM analytics.orders o2 + WHERE o2.customer_id = ${TABLE}.customer_id + AND o2.created_at < ${TABLE}.created_at + ) ;; + description: "Order sequence number for this customer" + } + + # Correlated subquery for aggregation + dimension: customer_total_orders { + type: number + sql: ( + SELECT COUNT(*) + FROM analytics.orders o2 + WHERE o2.customer_id = ${TABLE}.customer_id + ) ;; + description: "Total orders for this customer" + } + + # Subquery with SUM + dimension: customer_lifetime_value { + type: number + sql: ( + SELECT COALESCE(SUM(o2.amount), 0) + FROM analytics.orders o2 + WHERE o2.customer_id = ${TABLE}.customer_id + AND o2.status = 'completed' + ) ;; + value_format_name: usd + description: "Customer's total completed order value" + } + + # Complex CASE expression + dimension: order_size_bucket { + type: string + sql: CASE + WHEN ${amount} IS NULL THEN 'Unknown' + WHEN ${amount} < 50 THEN 'Small' + WHEN ${amount} < 200 THEN 'Medium' + WHEN ${amount} < 500 THEN 'Large' + WHEN ${amount} < 1000 THEN 'XL' + ELSE 'Enterprise' + END ;; + description: "Order size bucket" + } + + # Nested CASE with multiple conditions + dimension: customer_segment { + type: string + sql: CASE + WHEN ${customer_lifetime_value} >= 10000 THEN + CASE + WHEN ${customer_total_orders} >= 50 THEN 'VIP Frequent' + ELSE 'VIP Occasional' + END + WHEN ${customer_lifetime_value} >= 1000 THEN + CASE + WHEN ${customer_total_orders} >= 10 THEN 'Regular Frequent' + ELSE 'Regular Occasional' + END + ELSE 'New' + END ;; + description: "Customer segment based on value and frequency" + } + + # Window function in dimension (though typically these go in derived tables) + dimension: pct_of_customer_total { + type: number + sql: SAFE_DIVIDE(${amount}, ${customer_lifetime_value}) * 100 ;; + value_format: "0.0\%" + description: "This order as percentage of customer's total spend" + } + + # JSON extraction (BigQuery style) + dimension: metadata_source { + type: string + sql: JSON_EXTRACT_SCALAR(${TABLE}.metadata, '$.source') ;; + } + + dimension: metadata_campaign { + type: string + sql: JSON_EXTRACT_SCALAR(${TABLE}.metadata, '$.campaign_id') ;; + } + + # Date arithmetic + dimension_group: created { + type: time + timeframes: [raw, time, date, week, month, quarter, year] + sql: ${TABLE}.created_at ;; + } + + dimension: days_since_created { + type: number + sql: DATE_DIFF(CURRENT_DATE(), DATE(${created_raw}), DAY) ;; + } + + dimension: is_recent { + type: yesno + sql: ${days_since_created} <= 30 ;; + } + + dimension: order_age_bucket { + type: string + sql: CASE + WHEN ${days_since_created} <= 7 THEN '0-7 days' + WHEN ${days_since_created} <= 30 THEN '8-30 days' + WHEN ${days_since_created} <= 90 THEN '31-90 days' + WHEN ${days_since_created} <= 365 THEN '91-365 days' + ELSE '365+ days' + END ;; + } + + measure: count { + type: count + } + + measure: total_amount { + type: sum + sql: ${amount} ;; + } + + measure: avg_amount { + type: average + sql: ${amount} ;; + } + + # Measure with complex SQL + measure: median_amount { + type: number + sql: APPROX_QUANTILES(${amount}, 100)[OFFSET(50)] ;; + description: "Approximate median order amount" + } + + measure: unique_customers { + type: count_distinct + sql: ${customer_id} ;; + } +} + +# Derived table with complex SQL +view: customer_cohorts { + derived_table: { + sql: + WITH first_orders AS ( + SELECT + customer_id, + MIN(created_at) AS first_order_date, + MIN(amount) AS first_order_amount + FROM analytics.orders + WHERE status = 'completed' + GROUP BY customer_id + ), + order_metrics AS ( + SELECT + customer_id, + COUNT(*) AS total_orders, + SUM(amount) AS total_revenue, + AVG(amount) AS avg_order_value, + MAX(created_at) AS last_order_date + FROM analytics.orders + WHERE status = 'completed' + GROUP BY customer_id + ) + SELECT + f.customer_id, + f.first_order_date, + f.first_order_amount, + DATE_TRUNC(f.first_order_date, MONTH) AS cohort_month, + m.total_orders, + m.total_revenue, + m.avg_order_value, + m.last_order_date, + DATE_DIFF(m.last_order_date, f.first_order_date, DAY) AS customer_lifespan_days + FROM first_orders f + JOIN order_metrics m ON f.customer_id = m.customer_id + ;; + datagroup_trigger: daily_etl + indexes: ["customer_id", "cohort_month"] + } + + dimension: customer_id { + type: number + primary_key: yes + sql: ${TABLE}.customer_id ;; + } + + dimension: first_order_amount { + type: number + sql: ${TABLE}.first_order_amount ;; + value_format_name: usd + } + + dimension_group: first_order { + type: time + timeframes: [date, week, month, quarter, year] + sql: ${TABLE}.first_order_date ;; + } + + dimension_group: cohort { + type: time + timeframes: [month, quarter, year] + sql: ${TABLE}.cohort_month ;; + } + + dimension_group: last_order { + type: time + timeframes: [date, week, month] + sql: ${TABLE}.last_order_date ;; + } + + dimension: total_orders { + type: number + sql: ${TABLE}.total_orders ;; + } + + dimension: total_revenue { + type: number + sql: ${TABLE}.total_revenue ;; + value_format_name: usd + } + + dimension: avg_order_value { + type: number + sql: ${TABLE}.avg_order_value ;; + value_format_name: usd + } + + dimension: customer_lifespan_days { + type: number + sql: ${TABLE}.customer_lifespan_days ;; + } + + dimension: customer_lifespan_tier { + type: tier + tiers: [0, 30, 90, 180, 365] + style: integer + sql: ${customer_lifespan_days} ;; + } + + measure: count { + type: count + description: "Number of customers" + } + + measure: avg_total_orders { + type: average + sql: ${total_orders} ;; + } + + measure: avg_total_revenue { + type: average + sql: ${total_revenue} ;; + value_format_name: usd + } + + measure: avg_customer_lifespan { + type: average + sql: ${customer_lifespan_days} ;; + value_format: "0.0" + } + + measure: total_cohort_revenue { + type: sum + sql: ${total_revenue} ;; + value_format_name: usd + } +} + +# View referencing another view's SQL_TABLE_NAME +view: order_facts { + derived_table: { + sql: + SELECT + o.id AS order_id, + o.customer_id, + o.amount, + o.created_at, + c.cohort_month, + c.total_orders AS customer_total_orders, + c.total_revenue AS customer_total_revenue, + o.amount / NULLIF(c.total_revenue, 0) AS pct_of_customer_revenue + FROM analytics.orders o + LEFT JOIN ${customer_cohorts.SQL_TABLE_NAME} c + ON o.customer_id = c.customer_id + ;; + } + + dimension: order_id { + type: number + primary_key: yes + sql: ${TABLE}.order_id ;; + } + + dimension: customer_id { + type: number + sql: ${TABLE}.customer_id ;; + } + + dimension: amount { + type: number + sql: ${TABLE}.amount ;; + } + + dimension_group: created { + type: time + timeframes: [date, week, month] + sql: ${TABLE}.created_at ;; + } + + dimension_group: cohort { + type: time + timeframes: [month, quarter, year] + sql: ${TABLE}.cohort_month ;; + } + + dimension: customer_total_orders { + type: number + sql: ${TABLE}.customer_total_orders ;; + } + + dimension: customer_total_revenue { + type: number + sql: ${TABLE}.customer_total_revenue ;; + value_format_name: usd + } + + dimension: pct_of_customer_revenue { + type: number + sql: ${TABLE}.pct_of_customer_revenue ;; + value_format: "0.0\%" + } + + measure: count { + type: count + } + + measure: total_amount { + type: sum + sql: ${amount} ;; + } +}