Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 61 additions & 22 deletions sidemantic/adapters/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,37 @@
from sidemantic.core.semantic_graph import SemanticGraph


def _normalize_cube_sql(sql: str | None, cube_name: str | None = None) -> str | None:
"""Normalize Cube.js SQL syntax to Sidemantic format.

Handles:
- ${CUBE} -> {model} placeholder
- ${cube_name} -> {model} placeholder
- {CUBE} -> {model} placeholder (variant without dollar sign)
- ${measure_ref} references are preserved as-is (for derived metrics)

Args:
sql: SQL expression string or None
cube_name: Name of the cube (used to replace ${cube_name} references)

Returns:
Normalized SQL string or None
"""
if sql is None:
return None

# Replace ${CUBE} and {CUBE} variants with {model}
result = sql.replace("${CUBE}", "{model}")
result = result.replace("{CUBE}", "{model}")

# Replace ${cube_name} with {model} if cube_name is provided
if cube_name:
result = result.replace(f"${{{cube_name}}}", "{model}")
result = result.replace(f"{{{cube_name}}}", "{model}")

return result


class CubeAdapter(BaseAdapter):
"""Adapter for importing/exporting Cube.js YAML semantic models.

Expand Down Expand Up @@ -61,7 +92,7 @@ def _parse_file(self, file_path: Path, graph: SemanticGraph) -> None:
return

# Cube YAML has "cubes" key with list of cube definitions
cubes = data.get("cubes", [])
cubes = data.get("cubes") or []

for cube_def in cubes:
model = self._parse_cube(cube_def)
Expand Down Expand Up @@ -89,8 +120,8 @@ def _parse_cube(self, cube_def: dict) -> Model | None:
dimensions = []
primary_key = "id" # default

for dim_def in cube_def.get("dimensions", []):
dim = self._parse_dimension(dim_def)
for dim_def in cube_def.get("dimensions") or []:
dim = self._parse_dimension(dim_def, name)
if dim:
dimensions.append(dim)

Expand All @@ -100,21 +131,21 @@ def _parse_cube(self, cube_def: dict) -> Model | None:

# Parse measures
measures = []
for measure_def in cube_def.get("measures", []):
measure = self._parse_measure(measure_def)
for measure_def in cube_def.get("measures") or []:
measure = self._parse_measure(measure_def, name)
if measure:
measures.append(measure)

# Parse segments
from sidemantic.core.segment import Segment

segments = []
for segment_def in cube_def.get("segments", []):
for segment_def in cube_def.get("segments") or []:
segment_name = segment_def.get("name")
segment_sql = segment_def.get("sql")
if segment_name and segment_sql:
# Replace ${CUBE} with {model} placeholder
segment_sql = segment_sql.replace("${CUBE}", "{model}")
# Normalize ${CUBE}/{CUBE} to {model} placeholder
segment_sql = _normalize_cube_sql(segment_sql, name)
segments.append(
Segment(
name=segment_name,
Expand All @@ -125,16 +156,16 @@ def _parse_cube(self, cube_def: dict) -> Model | None:

# Parse joins to create relationships
relationships = []
for join_def in cube_def.get("joins", []):
for join_def in cube_def.get("joins") or []:
join_name = join_def.get("name")
if join_name:
# Get relationship type from join definition, default to many_to_one
rel_type = join_def.get("relationship", "many_to_one")
relationships.append(Relationship(name=join_name, type=rel_type, foreign_key=f"{join_name}_id"))

# Parse pre-aggregations
# Parse pre-aggregations (handle None from empty YAML section)
pre_aggregations = []
for preagg_def in cube_def.get("pre_aggregations", []):
for preagg_def in cube_def.get("pre_aggregations") or []:
preagg = self._parse_preaggregation(preagg_def, name)
if preagg:
pre_aggregations.append(preagg)
Expand All @@ -152,11 +183,12 @@ def _parse_cube(self, cube_def: dict) -> Model | None:
pre_aggregations=pre_aggregations,
)

def _parse_dimension(self, dim_def: dict) -> Dimension | None:
def _parse_dimension(self, dim_def: dict, cube_name: str) -> Dimension | None:
"""Parse Cube dimension into Sidemantic dimension.

Args:
dim_def: Dimension definition dictionary
cube_name: Name of the parent cube (for SQL normalization)

Returns:
Dimension instance or None
Expand All @@ -182,20 +214,24 @@ def _parse_dimension(self, dim_def: dict) -> Dimension | None:
if dim_type == "time":
granularity = "day" # Default granularity

# Normalize SQL to replace ${CUBE}/{CUBE} with {model}
dim_sql = _normalize_cube_sql(dim_def.get("sql"), cube_name)

return Dimension(
name=name,
type=sidemantic_type,
sql=dim_def.get("sql"),
sql=dim_sql,
granularity=granularity,
description=dim_def.get("description"),
format=dim_def.get("format"),
)

def _parse_measure(self, measure_def: dict) -> Metric | None:
def _parse_measure(self, measure_def: dict, cube_name: str) -> Metric | None:
"""Parse Cube measure into Sidemantic measure.

Args:
measure_def: Metric definition dictionary
cube_name: Name of the parent cube (for SQL normalization)

Returns:
Measure instance or None
Expand All @@ -220,13 +256,13 @@ def _parse_measure(self, measure_def: dict) -> Metric | None:

agg_type = type_mapping.get(measure_type, "count")

# Parse filters
# Parse filters and normalize ${CUBE}/{CUBE} references
filters = []
for filter_def in measure_def.get("filters", []):
for filter_def in measure_def.get("filters") or []:
if isinstance(filter_def, dict):
sql_filter = filter_def.get("sql")
if sql_filter:
filters.append(sql_filter)
filters.append(_normalize_cube_sql(sql_filter, cube_name))

# Check for rolling_window (cumulative metric)
rolling_window = measure_def.get("rolling_window")
Expand All @@ -242,11 +278,14 @@ def _parse_measure(self, measure_def: dict) -> Metric | None:
if sql_expr:
metric_type = "derived"

# Normalize SQL to replace ${CUBE}/{CUBE} with {model}
measure_sql = _normalize_cube_sql(measure_def.get("sql"), cube_name)

return Metric(
name=name,
type=metric_type,
agg=agg_type,
sql=measure_def.get("sql"),
sql=measure_sql,
window=window,
filters=filters if filters else None,
description=measure_def.get("description"),
Expand All @@ -271,15 +310,15 @@ def _parse_preaggregation(self, preagg_def: dict, cube_name: str) -> PreAggregat

# Extract measures - strip CUBE prefix if present
measures = []
for measure_ref in preagg_def.get("measures", []):
for measure_ref in preagg_def.get("measures") or []:
if isinstance(measure_ref, str):
# Remove CUBE. or {cube_name}. prefix
measure_name = measure_ref.replace("CUBE.", "").replace(f"{cube_name}.", "")
measures.append(measure_name)

# Extract dimensions - strip CUBE prefix if present
dimensions = []
for dim_ref in preagg_def.get("dimensions", []):
for dim_ref in preagg_def.get("dimensions") or []:
if isinstance(dim_ref, str):
# Remove CUBE. or {cube_name}. prefix
dim_name = dim_ref.replace("CUBE.", "").replace(f"{cube_name}.", "")
Expand Down Expand Up @@ -313,10 +352,10 @@ def _parse_preaggregation(self, preagg_def: dict, cube_name: str) -> PreAggregat

# Parse indexes
indexes = []
for index_def in preagg_def.get("indexes", []):
for index_def in preagg_def.get("indexes") or []:
if isinstance(index_def, dict):
index_name = index_def.get("name", f"idx_{len(indexes)}")
index_columns = index_def.get("columns", [])
index_columns = index_def.get("columns") or []

# Strip CUBE prefix from column names
cleaned_columns = [col.replace("CUBE.", "").replace(f"{cube_name}.", "") for col in index_columns]
Expand Down
104 changes: 104 additions & 0 deletions tests/adapters/test_cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,5 +646,109 @@ def test_cube_healthcare_patients():
assert departments.table == "healthcare.departments"


def test_cube_adapter_empty_sections():
"""Test Cube adapter handles empty YAML sections gracefully.

GitHub issue: Cube generates models with empty pre-aggregation sections.
"""
adapter = CubeAdapter()
graph = adapter.parse(Path("tests/fixtures/cube/edge_cases.yml"))

# Check that model with empty sections was parsed successfully
assert "empty_sections" in graph.models
empty_sections = graph.get_model("empty_sections")

# Verify empty sections didn't cause errors
assert empty_sections.pre_aggregations == []
assert empty_sections.segments == []
assert empty_sections.relationships == []

# Verify dimensions and measures were still parsed
assert len(empty_sections.dimensions) >= 2
assert len(empty_sections.metrics) >= 1


def test_cube_adapter_cube_syntax_variants():
"""Test Cube adapter handles {CUBE} syntax variants.

GitHub issue: {CUBE} syntax (without dollar sign) should also be handled.
"""
adapter = CubeAdapter()
graph = adapter.parse(Path("tests/fixtures/cube/edge_cases.yml"))

# Check cube_syntax_variants model
assert "cube_syntax_variants" in graph.models
model = graph.get_model("cube_syntax_variants")

# Verify {CUBE} syntax (without dollar) was normalized to {model}
name_dim = model.get_dimension("name")
assert name_dim is not None
assert "{model}" in name_dim.sql
assert "{CUBE}" not in name_dim.sql

# Verify ${CUBE} syntax was normalized
price_dim = model.get_dimension("price")
assert price_dim is not None
assert "{model}" in price_dim.sql
assert "${CUBE}" not in price_dim.sql

# Verify measure SQL was normalized
total_price = model.get_metric("total_price")
assert total_price is not None
assert "{model}" in total_price.sql
assert "{CUBE}" not in total_price.sql

# Verify filter SQL was normalized
filtered_count = model.get_metric("filtered_count")
assert filtered_count is not None
assert filtered_count.filters is not None
assert len(filtered_count.filters) == 1
assert "{model}" in filtered_count.filters[0]
assert "{CUBE}" not in filtered_count.filters[0]

# Verify segment SQL was normalized
expensive_segment = next((s for s in model.segments if s.name == "expensive"), None)
assert expensive_segment is not None
assert "{model}" in expensive_segment.sql
assert "{CUBE}" not in expensive_segment.sql


def test_cube_adapter_cube_name_reference():
"""Test Cube adapter handles ${cube_name} and {cube_name} syntax.

GitHub issue: References to cube by name should also be normalized.
"""
adapter = CubeAdapter()
graph = adapter.parse(Path("tests/fixtures/cube/edge_cases.yml"))

# Check custom_cube_ref model
assert "custom_cube_ref" in graph.models
model = graph.get_model("custom_cube_ref")

# Verify ${cube_name} syntax was normalized
id_dim = model.get_dimension("id")
assert id_dim is not None
assert "{model}" in id_dim.sql
assert "${custom_cube_ref}" not in id_dim.sql

# Verify {cube_name} syntax (without dollar) was normalized
value_dim = model.get_dimension("value")
assert value_dim is not None
assert "{model}" in value_dim.sql
assert "{custom_cube_ref}" not in value_dim.sql

# Verify measure SQL was normalized
total = model.get_metric("total")
assert total is not None
assert "{model}" in total.sql
assert "${custom_cube_ref}" not in total.sql

# Verify segment SQL was normalized
high_value_segment = next((s for s in model.segments if s.name == "high_value"), None)
assert high_value_segment is not None
assert "{model}" in high_value_segment.sql
assert "${custom_cube_ref}" not in high_value_segment.sql


if __name__ == "__main__":
pytest.main([__file__, "-v"])
Loading