From 75299194a2f99d86f2ba302efbd9b27253836667 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Mon, 29 Dec 2025 08:26:59 -0800 Subject: [PATCH] Fix Cube adapter to handle empty YAML sections and {CUBE} syntax variants - Handle None from empty pre_aggregations, dimensions, measures, segments, joins, filters, indexes sections in YAML - Normalize all Cube.js SQL syntax variants: ${CUBE}, {CUBE}, ${cube_name}, {cube_name} to {model} - Apply normalization to dimension, measure, filter, and segment SQL - Add comprehensive tests for edge cases including empty sections and syntax variants --- sidemantic/adapters/cube.py | 83 +++++++++++++++++------ tests/adapters/test_cube.py | 104 +++++++++++++++++++++++++++++ tests/fixtures/cube/edge_cases.yml | 86 ++++++++++++++++++++++++ 3 files changed, 251 insertions(+), 22 deletions(-) create mode 100644 tests/fixtures/cube/edge_cases.yml diff --git a/sidemantic/adapters/cube.py b/sidemantic/adapters/cube.py index 78ce06f5..d22afd45 100644 --- a/sidemantic/adapters/cube.py +++ b/sidemantic/adapters/cube.py @@ -13,6 +13,37 @@ from sidemantic.core.semantic_graph import SemanticGraph +def _normalize_cube_sql(sql: str | None, cube_name: str | None = None) -> str | None: + """Normalize Cube.js SQL syntax to Sidemantic format. + + Handles: + - ${CUBE} -> {model} placeholder + - ${cube_name} -> {model} placeholder + - {CUBE} -> {model} placeholder (variant without dollar sign) + - ${measure_ref} references are preserved as-is (for derived metrics) + + Args: + sql: SQL expression string or None + cube_name: Name of the cube (used to replace ${cube_name} references) + + Returns: + Normalized SQL string or None + """ + if sql is None: + return None + + # Replace ${CUBE} and {CUBE} variants with {model} + result = sql.replace("${CUBE}", "{model}") + result = result.replace("{CUBE}", "{model}") + + # Replace ${cube_name} with {model} if cube_name is provided + if cube_name: + result = result.replace(f"${{{cube_name}}}", "{model}") + result = result.replace(f"{{{cube_name}}}", "{model}") + + return result + + class CubeAdapter(BaseAdapter): """Adapter for importing/exporting Cube.js YAML semantic models. @@ -61,7 +92,7 @@ def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None: return # Cube YAML has "cubes" key with list of cube definitions - cubes = data.get("cubes", []) + cubes = data.get("cubes") or [] for cube_def in cubes: model = self._parse_cube(cube_def) @@ -89,8 +120,8 @@ def _parse_cube(self, cube_def: dict) -> Model | None: dimensions = [] primary_key = "id" # default - for dim_def in cube_def.get("dimensions", []): - dim = self._parse_dimension(dim_def) + for dim_def in cube_def.get("dimensions") or []: + dim = self._parse_dimension(dim_def, name) if dim: dimensions.append(dim) @@ -100,8 +131,8 @@ def _parse_cube(self, cube_def: dict) -> Model | None: # Parse measures measures = [] - for measure_def in cube_def.get("measures", []): - measure = self._parse_measure(measure_def) + for measure_def in cube_def.get("measures") or []: + measure = self._parse_measure(measure_def, name) if measure: measures.append(measure) @@ -109,12 +140,12 @@ def _parse_cube(self, cube_def: dict) -> Model | None: from sidemantic.core.segment import Segment segments = [] - for segment_def in cube_def.get("segments", []): + for segment_def in cube_def.get("segments") or []: segment_name = segment_def.get("name") segment_sql = segment_def.get("sql") if segment_name and segment_sql: - # Replace ${CUBE} with {model} placeholder - segment_sql = segment_sql.replace("${CUBE}", "{model}") + # Normalize ${CUBE}/{CUBE} to {model} placeholder + segment_sql = _normalize_cube_sql(segment_sql, name) segments.append( Segment( name=segment_name, @@ -125,16 +156,16 @@ def _parse_cube(self, cube_def: dict) -> Model | None: # Parse joins to create relationships relationships = [] - for join_def in cube_def.get("joins", []): + for join_def in cube_def.get("joins") or []: join_name = join_def.get("name") if join_name: # Get relationship type from join definition, default to many_to_one rel_type = join_def.get("relationship", "many_to_one") relationships.append(Relationship(name=join_name, type=rel_type, foreign_key=f"{join_name}_id")) - # Parse pre-aggregations + # Parse pre-aggregations (handle None from empty YAML section) pre_aggregations = [] - for preagg_def in cube_def.get("pre_aggregations", []): + for preagg_def in cube_def.get("pre_aggregations") or []: preagg = self._parse_preaggregation(preagg_def, name) if preagg: pre_aggregations.append(preagg) @@ -152,11 +183,12 @@ def _parse_cube(self, cube_def: dict) -> Model | None: pre_aggregations=pre_aggregations, ) - def _parse_dimension(self, dim_def: dict) -> Dimension | None: + def _parse_dimension(self, dim_def: dict, cube_name: str) -> Dimension | None: """Parse Cube dimension into Sidemantic dimension. Args: dim_def: Dimension definition dictionary + cube_name: Name of the parent cube (for SQL normalization) Returns: Dimension instance or None @@ -182,20 +214,24 @@ def _parse_dimension(self, dim_def: dict) -> Dimension | None: if dim_type == "time": granularity = "day" # Default granularity + # Normalize SQL to replace ${CUBE}/{CUBE} with {model} + dim_sql = _normalize_cube_sql(dim_def.get("sql"), cube_name) + return Dimension( name=name, type=sidemantic_type, - sql=dim_def.get("sql"), + sql=dim_sql, granularity=granularity, description=dim_def.get("description"), format=dim_def.get("format"), ) - def _parse_measure(self, measure_def: dict) -> Metric | None: + def _parse_measure(self, measure_def: dict, cube_name: str) -> Metric | None: """Parse Cube measure into Sidemantic measure. Args: measure_def: Metric definition dictionary + cube_name: Name of the parent cube (for SQL normalization) Returns: Measure instance or None @@ -220,13 +256,13 @@ def _parse_measure(self, measure_def: dict) -> Metric | None: agg_type = type_mapping.get(measure_type, "count") - # Parse filters + # Parse filters and normalize ${CUBE}/{CUBE} references filters = [] - for filter_def in measure_def.get("filters", []): + for filter_def in measure_def.get("filters") or []: if isinstance(filter_def, dict): sql_filter = filter_def.get("sql") if sql_filter: - filters.append(sql_filter) + filters.append(_normalize_cube_sql(sql_filter, cube_name)) # Check for rolling_window (cumulative metric) rolling_window = measure_def.get("rolling_window") @@ -242,11 +278,14 @@ def _parse_measure(self, measure_def: dict) -> Metric | None: if sql_expr: metric_type = "derived" + # Normalize SQL to replace ${CUBE}/{CUBE} with {model} + measure_sql = _normalize_cube_sql(measure_def.get("sql"), cube_name) + return Metric( name=name, type=metric_type, agg=agg_type, - sql=measure_def.get("sql"), + sql=measure_sql, window=window, filters=filters if filters else None, description=measure_def.get("description"), @@ -271,7 +310,7 @@ def _parse_preaggregation(self, preagg_def: dict, cube_name: str) -> PreAggregat # Extract measures - strip CUBE prefix if present measures = [] - for measure_ref in preagg_def.get("measures", []): + for measure_ref in preagg_def.get("measures") or []: if isinstance(measure_ref, str): # Remove CUBE. or {cube_name}. prefix measure_name = measure_ref.replace("CUBE.", "").replace(f"{cube_name}.", "") @@ -279,7 +318,7 @@ def _parse_preaggregation(self, preagg_def: dict, cube_name: str) -> PreAggregat # Extract dimensions - strip CUBE prefix if present dimensions = [] - for dim_ref in preagg_def.get("dimensions", []): + for dim_ref in preagg_def.get("dimensions") or []: if isinstance(dim_ref, str): # Remove CUBE. or {cube_name}. prefix dim_name = dim_ref.replace("CUBE.", "").replace(f"{cube_name}.", "") @@ -313,10 +352,10 @@ def _parse_preaggregation(self, preagg_def: dict, cube_name: str) -> PreAggregat # Parse indexes indexes = [] - for index_def in preagg_def.get("indexes", []): + for index_def in preagg_def.get("indexes") or []: if isinstance(index_def, dict): index_name = index_def.get("name", f"idx_{len(indexes)}") - index_columns = index_def.get("columns", []) + index_columns = index_def.get("columns") or [] # Strip CUBE prefix from column names cleaned_columns = [col.replace("CUBE.", "").replace(f"{cube_name}.", "") for col in index_columns] diff --git a/tests/adapters/test_cube.py b/tests/adapters/test_cube.py index 3030ec23..e0b33a0e 100644 --- a/tests/adapters/test_cube.py +++ b/tests/adapters/test_cube.py @@ -646,5 +646,109 @@ def test_cube_healthcare_patients(): assert departments.table == "healthcare.departments" +def test_cube_adapter_empty_sections(): + """Test Cube adapter handles empty YAML sections gracefully. + + GitHub issue: Cube generates models with empty pre-aggregation sections. + """ + adapter = CubeAdapter() + graph = adapter.parse(Path("tests/fixtures/cube/edge_cases.yml")) + + # Check that model with empty sections was parsed successfully + assert "empty_sections" in graph.models + empty_sections = graph.get_model("empty_sections") + + # Verify empty sections didn't cause errors + assert empty_sections.pre_aggregations == [] + assert empty_sections.segments == [] + assert empty_sections.relationships == [] + + # Verify dimensions and measures were still parsed + assert len(empty_sections.dimensions) >= 2 + assert len(empty_sections.metrics) >= 1 + + +def test_cube_adapter_cube_syntax_variants(): + """Test Cube adapter handles {CUBE} syntax variants. + + GitHub issue: {CUBE} syntax (without dollar sign) should also be handled. + """ + adapter = CubeAdapter() + graph = adapter.parse(Path("tests/fixtures/cube/edge_cases.yml")) + + # Check cube_syntax_variants model + assert "cube_syntax_variants" in graph.models + model = graph.get_model("cube_syntax_variants") + + # Verify {CUBE} syntax (without dollar) was normalized to {model} + name_dim = model.get_dimension("name") + assert name_dim is not None + assert "{model}" in name_dim.sql + assert "{CUBE}" not in name_dim.sql + + # Verify ${CUBE} syntax was normalized + price_dim = model.get_dimension("price") + assert price_dim is not None + assert "{model}" in price_dim.sql + assert "${CUBE}" not in price_dim.sql + + # Verify measure SQL was normalized + total_price = model.get_metric("total_price") + assert total_price is not None + assert "{model}" in total_price.sql + assert "{CUBE}" not in total_price.sql + + # Verify filter SQL was normalized + filtered_count = model.get_metric("filtered_count") + assert filtered_count is not None + assert filtered_count.filters is not None + assert len(filtered_count.filters) == 1 + assert "{model}" in filtered_count.filters[0] + assert "{CUBE}" not in filtered_count.filters[0] + + # Verify segment SQL was normalized + expensive_segment = next((s for s in model.segments if s.name == "expensive"), None) + assert expensive_segment is not None + assert "{model}" in expensive_segment.sql + assert "{CUBE}" not in expensive_segment.sql + + +def test_cube_adapter_cube_name_reference(): + """Test Cube adapter handles ${cube_name} and {cube_name} syntax. + + GitHub issue: References to cube by name should also be normalized. + """ + adapter = CubeAdapter() + graph = adapter.parse(Path("tests/fixtures/cube/edge_cases.yml")) + + # Check custom_cube_ref model + assert "custom_cube_ref" in graph.models + model = graph.get_model("custom_cube_ref") + + # Verify ${cube_name} syntax was normalized + id_dim = model.get_dimension("id") + assert id_dim is not None + assert "{model}" in id_dim.sql + assert "${custom_cube_ref}" not in id_dim.sql + + # Verify {cube_name} syntax (without dollar) was normalized + value_dim = model.get_dimension("value") + assert value_dim is not None + assert "{model}" in value_dim.sql + assert "{custom_cube_ref}" not in value_dim.sql + + # Verify measure SQL was normalized + total = model.get_metric("total") + assert total is not None + assert "{model}" in total.sql + assert "${custom_cube_ref}" not in total.sql + + # Verify segment SQL was normalized + high_value_segment = next((s for s in model.segments if s.name == "high_value"), None) + assert high_value_segment is not None + assert "{model}" in high_value_segment.sql + assert "${custom_cube_ref}" not in high_value_segment.sql + + if __name__ == "__main__": pytest.main([__file__, "-v"]) diff --git a/tests/fixtures/cube/edge_cases.yml b/tests/fixtures/cube/edge_cases.yml new file mode 100644 index 00000000..dea4fe12 --- /dev/null +++ b/tests/fixtures/cube/edge_cases.yml @@ -0,0 +1,86 @@ +cubes: + # Test with empty pre_aggregations (GitHub issue: Cube generates models with empty pre-aggregation section) + - name: empty_sections + sql_table: public.orders + + dimensions: + - name: id + sql: id + type: number + primary_key: true + + - name: status + sql: status + type: string + + measures: + - name: count + type: count + + # Empty sections that should not cause errors + pre_aggregations: + segments: + joins: + + # Test with {CUBE} syntax (without dollar sign) - variant syntax + - name: cube_syntax_variants + sql_table: public.products + + dimensions: + - name: id + sql: "{CUBE}.id" + type: number + primary_key: true + + - name: name + sql: "{CUBE}.product_name" + type: string + + - name: price + sql: ${CUBE}.price + type: number + description: "Test ${CUBE} syntax" + + measures: + - name: count + type: count + + - name: total_price + sql: "{CUBE}.price" + type: sum + description: "Total using {CUBE} syntax" + + - name: filtered_count + type: count + filters: + - sql: "{CUBE}.status = 'active'" + description: "Filter with {CUBE} syntax" + + segments: + - name: expensive + sql: "{CUBE}.price > 100" + description: "Segment with {CUBE} syntax" + + # Test with cube name reference ${cube_name} + - name: custom_cube_ref + sql_table: public.items + + dimensions: + - name: id + sql: "${custom_cube_ref}.id" + type: number + primary_key: true + + - name: value + sql: "{custom_cube_ref}.value" + type: number + description: "Using cube name without dollar" + + measures: + - name: total + sql: "${custom_cube_ref}.amount" + type: sum + + segments: + - name: high_value + sql: "${custom_cube_ref}.value > 1000"