From ddd5af15468fa0ed0be2f73eff8b4dff4c4cf1ce Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 11:16:32 -0700 Subject: [PATCH 1/8] Harden DuckDB extension persistence --- sidemantic-duckdb/CMakeLists.txt | 46 +- sidemantic-duckdb/README.md | 45 +- sidemantic-duckdb/demo.sql | 2 +- .../src/sidemantic_extension.cpp | 192 +++- sidemantic-duckdb/test/sql/sidemantic.test | 20 +- ...mantic_memory_and_invalid_persistence.test | 19 +- .../test/sql/sidemantic_native_contract.test | 2 + .../test/sql/sidemantic_persistence.test | 4 +- sidemantic-rs/Cargo.lock | 1 + sidemantic-rs/Cargo.toml | 3 + sidemantic-rs/src/ffi.rs | 966 ++++++++++++++---- 11 files changed, 1070 insertions(+), 230 deletions(-) diff --git a/sidemantic-duckdb/CMakeLists.txt b/sidemantic-duckdb/CMakeLists.txt index 0309d647..a13a8f4b 100644 --- a/sidemantic-duckdb/CMakeLists.txt +++ b/sidemantic-duckdb/CMakeLists.txt @@ -13,15 +13,33 @@ include_directories(src/include) set(SIDEMANTIC_RS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../sidemantic-rs") set(SIDEMANTIC_CARGO_TARGET_DIR "${SIDEMANTIC_RS_DIR}/target" CACHE PATH "Cargo target directory for sidemantic-rs") set(SIDEMANTIC_CARGO_PROFILE "release" CACHE STRING "Cargo profile used for the sidemantic-rs static library") +set(SIDEMANTIC_CARGO_TARGET "" CACHE STRING "Rust target triple for sidemantic-rs static library") +set(SIDEMANTIC_RUST_NATIVE_STATIC_LIBS "" CACHE STRING "Additional native libraries required by the sidemantic-rs staticlib") set(SIDEMANTIC_INCLUDE "${SIDEMANTIC_RS_DIR}/include") -if(WIN32) +if(NOT SIDEMANTIC_CARGO_TARGET AND DEFINED Rust_CARGO_TARGET) + set(SIDEMANTIC_CARGO_TARGET "${Rust_CARGO_TARGET}" CACHE STRING "Rust target triple for sidemantic-rs static library" FORCE) +endif() + +if(SIDEMANTIC_CARGO_TARGET MATCHES "pc-windows-msvc$") + set(SIDEMANTIC_STATICLIB_NAME "sidemantic.lib") +elseif(SIDEMANTIC_CARGO_TARGET) + set(SIDEMANTIC_STATICLIB_NAME "libsidemantic.a") +elseif(WIN32) set(SIDEMANTIC_STATICLIB_NAME "sidemantic.lib") else() set(SIDEMANTIC_STATICLIB_NAME "libsidemantic.a") endif() -set(SIDEMANTIC_LIB "${SIDEMANTIC_CARGO_TARGET_DIR}/${SIDEMANTIC_CARGO_PROFILE}/${SIDEMANTIC_STATICLIB_NAME}") +if(SIDEMANTIC_CARGO_TARGET) + set(SIDEMANTIC_LIB_DIR "${SIDEMANTIC_CARGO_TARGET_DIR}/${SIDEMANTIC_CARGO_TARGET}/${SIDEMANTIC_CARGO_PROFILE}") +else() + set(SIDEMANTIC_LIB_DIR "${SIDEMANTIC_CARGO_TARGET_DIR}/${SIDEMANTIC_CARGO_PROFILE}") +endif() + +set(SIDEMANTIC_LIB "${SIDEMANTIC_LIB_DIR}/${SIDEMANTIC_STATICLIB_NAME}") +message(STATUS "sidemantic-rs cargo target: '${SIDEMANTIC_CARGO_TARGET}'") +message(STATUS "sidemantic-rs static library: ${SIDEMANTIC_LIB}") find_program(CARGO_EXECUTABLE cargo) if(NOT CARGO_EXECUTABLE) @@ -29,6 +47,9 @@ if(NOT CARGO_EXECUTABLE) endif() set(SIDEMANTIC_CARGO_BUILD_ARGS build --manifest-path "${SIDEMANTIC_RS_DIR}/Cargo.toml" --lib) +if(SIDEMANTIC_CARGO_TARGET) + list(APPEND SIDEMANTIC_CARGO_BUILD_ARGS --target "${SIDEMANTIC_CARGO_TARGET}") +endif() if(SIDEMANTIC_CARGO_PROFILE STREQUAL "release") list(APPEND SIDEMANTIC_CARGO_BUILD_ARGS --release) elseif(NOT SIDEMANTIC_CARGO_PROFILE STREQUAL "debug") @@ -60,22 +81,21 @@ build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) add_dependencies(${EXTENSION_NAME} sidemantic_rust_staticlib) add_dependencies(${LOADABLE_EXTENSION_NAME} sidemantic_rust_staticlib) -# Link the Rust static library -target_link_libraries(${EXTENSION_NAME} ${SIDEMANTIC_LIB}) -target_link_libraries(${LOADABLE_EXTENSION_NAME} ${SIDEMANTIC_LIB}) - -# On macOS, we need to link against some system frameworks +# Link the Rust static library and native libraries rustc would normally pass +# when producing a final binary for the target platform. if(APPLE) - target_link_libraries(${EXTENSION_NAME} "-framework Security" "-framework CoreFoundation") - target_link_libraries(${LOADABLE_EXTENSION_NAME} "-framework Security" "-framework CoreFoundation") + list(APPEND SIDEMANTIC_RUST_NATIVE_STATIC_LIBS "-framework Security" "-framework CoreFoundation") endif() - -# On Linux, we need pthread and dl if(UNIX AND NOT APPLE) - target_link_libraries(${EXTENSION_NAME} pthread dl) - target_link_libraries(${LOADABLE_EXTENSION_NAME} pthread dl) + list(APPEND SIDEMANTIC_RUST_NATIVE_STATIC_LIBS pthread dl m) +endif() +if(WIN32 OR SIDEMANTIC_CARGO_TARGET MATCHES "pc-windows") + list(APPEND SIDEMANTIC_RUST_NATIVE_STATIC_LIBS advapi32 ws2_32 userenv bcrypt ntdll) endif() +target_link_libraries(${EXTENSION_NAME} ${SIDEMANTIC_LIB} ${SIDEMANTIC_RUST_NATIVE_STATIC_LIBS}) +target_link_libraries(${LOADABLE_EXTENSION_NAME} ${SIDEMANTIC_LIB} ${SIDEMANTIC_RUST_NATIVE_STATIC_LIBS}) + install( TARGETS ${EXTENSION_NAME} EXPORT "${DUCKDB_EXPORT_SET}" diff --git a/sidemantic-duckdb/README.md b/sidemantic-duckdb/README.md index c89d1e75..12464990 100644 --- a/sidemantic-duckdb/README.md +++ b/sidemantic-duckdb/README.md @@ -12,12 +12,33 @@ A DuckDB extension that adds a SQL-first semantic layer. Define metrics and dime ## Installation +Current builds are loaded from a local build or GitHub release artifact: + +Start DuckDB with unsigned-extension loading enabled because these artifacts are not signed yet: + +```bash +duckdb -unsigned +``` + +```sql +LOAD '/absolute/path/to/sidemantic.duckdb_extension'; +``` + +For local development: + +```bash +make deps DUCKDB_VERSION=v1.4.2 +make +make test +./build/release/duckdb -unsigned +``` + ```sql --- From community extensions (when published) -INSTALL sidemantic FROM community; -LOAD sidemantic; +LOAD 'build/release/extension/sidemantic/sidemantic.duckdb_extension'; ``` +For embedded clients, set DuckDB's `allow_unsigned_extensions` database configuration before opening the connection. Community extension installation is planned, but this repository does not yet publish the signed multi-platform artifacts required for `INSTALL sidemantic FROM community`. + ## Quick Start (Pure SQL) Define your semantic layer entirely in SQL, no YAML required: @@ -32,7 +53,6 @@ INSERT INTO orders VALUES -- 2. Define a semantic model SEMANTIC CREATE MODEL orders_model ( - name orders_model, table orders, primary_key order_id ); @@ -67,12 +87,13 @@ Creates a new semantic model linked to a physical table. ```sql SEMANTIC CREATE MODEL model_name ( - name model_name, table physical_table_name, primary_key pk_column ); ``` +The model name may also be repeated inside the body for compatibility, but it must match the name after `MODEL`. + ### SEMANTIC CREATE METRIC Defines a metric (aggregation) on the current model. @@ -286,23 +307,21 @@ SELECT sidemantic_rewrite_sql('SELECT orders.revenue FROM orders'); ## Building from Source ```bash -# Clone with submodules -git clone --recurse-submodules https://github.com/your-repo/sidemantic-duckdb.git +# From this repository checkout cd sidemantic-duckdb -# Build the Rust library first -cd ../sidemantic-rs -cargo build --release -cd ../sidemantic-duckdb +# Fetch the DuckDB source version used by CI. +# extension-ci-tools is vendored in this directory and pinned to v1.4.2. +make deps DUCKDB_VERSION=v1.4.2 -# Build the extension +# Build the extension. CMake builds the sibling sidemantic-rs static library automatically. make # Run tests make test # Use the extension -./build/release/duckdb +./build/release/duckdb -unsigned ``` ## Architecture diff --git a/sidemantic-duckdb/demo.sql b/sidemantic-duckdb/demo.sql index d77e664e..79a29067 100644 --- a/sidemantic-duckdb/demo.sql +++ b/sidemantic-duckdb/demo.sql @@ -1,5 +1,5 @@ -- Sidemantic DuckDB Extension Demo --- Run with: ./build/release/duckdb < demo.sql +-- Run with: ./build/release/duckdb -unsigned < demo.sql -- Load the extension LOAD 'build/release/extension/sidemantic/sidemantic.duckdb_extension'; diff --git a/sidemantic-duckdb/src/sidemantic_extension.cpp b/sidemantic-duckdb/src/sidemantic_extension.cpp index 600f068b..4e1bd23d 100644 --- a/sidemantic-duckdb/src/sidemantic_extension.cpp +++ b/sidemantic-duckdb/src/sidemantic_extension.cpp @@ -6,7 +6,7 @@ #include "duckdb/function/table_function.hpp" #include "sidemantic.h" -#include +#include #include namespace duckdb { @@ -266,6 +266,147 @@ static bool StartsWithKeyword(const std::string &str, const std::string &keyword return true; } +static std::string TrimCopy(const std::string &value) { + size_t start = 0; + while (start < value.size() && std::isspace(value[start])) { + start++; + } + size_t end = value.size(); + while (end > start && std::isspace(value[end - 1])) { + end--; + } + return value.substr(start, end - start); +} + +static bool IsBareIdentifier(const std::string &value) { + if (value.empty() || !(std::isalpha(value[0]) || value[0] == '_')) { + return false; + } + for (auto ch : value) { + if (!(std::isalnum(ch) || ch == '_')) { + return false; + } + } + return true; +} + +static bool StartsWithNameProperty(const std::string &value, size_t pos) { + const std::string keyword = "name"; + if (pos + keyword.size() >= value.size()) { + return false; + } + for (size_t i = 0; i < keyword.size(); i++) { + if (std::tolower(value[pos + i]) != keyword[i]) { + return false; + } + } + return std::isspace(value[pos + keyword.size()]); +} + +static bool ExtractModelNameProperty(const std::string &body, std::string &name, std::string &error) { + auto open = body.find('('); + if (open == std::string::npos) { + return false; + } + + size_t pos = open + 1; + bool in_single_quote = false; + bool in_double_quote = false; + int depth = 0; + + while (pos < body.size()) { + while (pos < body.size() && depth == 0 && + (std::isspace(body[pos]) || body[pos] == ',')) { + pos++; + } + if (pos >= body.size() || (depth == 0 && body[pos] == ')')) { + return false; + } + + if (depth == 0 && StartsWithNameProperty(body, pos)) { + pos += 4; + while (pos < body.size() && std::isspace(body[pos])) { + pos++; + } + if (pos < body.size() && (body[pos] == '\'' || body[pos] == '"')) { + auto quote = body[pos]; + pos++; + std::string quoted_name; + while (pos < body.size()) { + auto ch = body[pos]; + if (ch == quote) { + if (pos + 1 < body.size() && body[pos + 1] == quote) { + quoted_name.push_back(quote); + pos += 2; + continue; + } + name = quoted_name; + return true; + } + quoted_name.push_back(ch); + pos++; + } + error = "CREATE MODEL body name is unterminated"; + return false; + } + size_t name_start = pos; + while (pos < body.size() && (std::isalnum(body[pos]) || body[pos] == '_')) { + pos++; + } + if (pos > name_start) { + name = body.substr(name_start, pos - name_start); + return true; + } + error = "CREATE MODEL body name must be a bare or quoted identifier"; + return false; + } + + while (pos < body.size()) { + auto ch = body[pos]; + if (in_single_quote) { + if (ch == '\'' && pos + 1 < body.size() && body[pos + 1] == '\'') { + pos += 2; + continue; + } + if (ch == '\'') { + in_single_quote = false; + } + pos++; + continue; + } + if (in_double_quote) { + if (ch == '"' && pos + 1 < body.size() && body[pos + 1] == '"') { + pos += 2; + continue; + } + if (ch == '"') { + in_double_quote = false; + } + pos++; + continue; + } + + if (ch == '\'') { + in_single_quote = true; + } else if (ch == '"') { + in_double_quote = true; + } else if (ch == '(') { + depth++; + } else if (ch == ')') { + if (depth == 0) { + return false; + } + depth--; + } else if (ch == ',' && depth == 0) { + break; + } + pos++; + } + } + + return false; +} + // Check if query is a CREATE [OR REPLACE] METRIC/DIMENSION/SEGMENT statement // Returns the statement type or empty string if not matched // Handles syntaxes like: @@ -393,7 +534,7 @@ static std::string IsDefinitionStatement(const std::string &query, std::string & // Check if stripped query is a CREATE [OR REPLACE] MODEL statement // Returns: 0 = not a create model, 1 = create model, 2 = create or replace model // Sets definition to be in nom-parser format: "MODEL (name ..., ...)" -static int IsCreateModelStatement(const std::string &query, std::string &definition) { +static int IsCreateModelStatement(const std::string &query, std::string &definition, std::string &error) { size_t pos = 0; // Check for CREATE @@ -437,9 +578,40 @@ static int IsCreateModelStatement(const std::string &query, std::string &definit return 0; // No parenthesis found } - // Build definition in nom format: "MODEL (name ..., ...)" - // The content inside parens should already have "name xxx" as first property - definition = "MODEL " + rest.substr(paren_pos); + auto outer_name = TrimCopy(rest.substr(0, paren_pos)); + auto body = rest.substr(paren_pos); + if (!outer_name.empty() && !IsBareIdentifier(outer_name)) { + error = "Invalid CREATE MODEL name: " + outer_name; + return -1; + } + + std::string inner_name; + std::string name_error; + if (!outer_name.empty() && ExtractModelNameProperty(body, inner_name, name_error)) { + if (inner_name != outer_name) { + error = "CREATE MODEL name '" + outer_name + "' does not match body name '" + inner_name + "'"; + return -1; + } + definition = "MODEL " + body; + return is_replace ? 2 : 1; + } + if (!name_error.empty()) { + error = name_error; + return -1; + } + + if (!outer_name.empty()) { + auto after_open = body.substr(1); + auto trimmed_after_open = TrimCopy(after_open); + if (!trimmed_after_open.empty() && trimmed_after_open[0] == ')') { + definition = "MODEL (name " + outer_name + after_open; + } else { + definition = "MODEL (name " + outer_name + ", " + after_open; + } + return is_replace ? 2 : 1; + } + + definition = "MODEL " + body; return is_replace ? 2 : 1; } @@ -459,8 +631,12 @@ ParserExtensionParseResult sidemantic_parse(ParserExtensionInfo *info, // Check if this is a CREATE [OR REPLACE] MODEL statement std::string definition; - int create_type = IsCreateModelStatement(stripped_query, definition); + std::string create_model_error; + int create_type = IsCreateModelStatement(stripped_query, definition, create_model_error); + if (create_type < 0) { + return ParserExtensionParseResult(create_model_error); + } if (create_type > 0) { // This is a CREATE MODEL statement - handle specially bool replace = (create_type == 2); @@ -627,9 +803,9 @@ static void LoadInternal(ExtensionLoader &loader) { const char *db_path_ptr = db_path.empty() ? nullptr : db_path.c_str(); char *error = sidemantic_autoload_for_context(ContextKeyPtr(context_key), db_path_ptr); if (error) { - // Log warning but don't fail extension load - std::fprintf(stderr, "Warning: failed to autoload sidemantic definitions: %s\n", error); + std::string message = "Failed to autoload sidemantic definitions: " + std::string(error); sidemantic_free(error); + throw InvalidInputException("%s", message.c_str()); } // Register parser extension diff --git a/sidemantic-duckdb/test/sql/sidemantic.test b/sidemantic-duckdb/test/sql/sidemantic.test index 52d9e13e..a2b53ed3 100644 --- a/sidemantic-duckdb/test/sql/sidemantic.test +++ b/sidemantic-duckdb/test/sql/sidemantic.test @@ -85,7 +85,10 @@ products # Test CREATE METRIC/DIMENSION syntax statement ok -SEMANTIC CREATE MODEL test_model (name test_model, table orders, primary_key order_id); +SEMANTIC CREATE MODEL test_model (table orders, primary_key order_id); + +statement ok +SEMANTIC CREATE OR REPLACE MODEL test_model (name 'test_model', table orders, primary_key order_id); statement ok SEMANTIC CREATE METRIC test_sum AS SUM(amount); @@ -93,6 +96,19 @@ SEMANTIC CREATE METRIC test_sum AS SUM(amount); statement ok SEMANTIC CREATE DIMENSION test_dim AS status; +statement ok +SEMANTIC CREATE SEGMENT completed AS status = 'completed'; + +statement error +SEMANTIC CREATE MODEL mismatched_model (name other_model, table orders, primary_key order_id); +---- +CREATE MODEL name 'mismatched_model' does not match body name 'other_model' + +statement error +SEMANTIC CREATE MODEL mismatched_quoted_model (name 'other_quoted_model', table orders, primary_key order_id); +---- +CREATE MODEL name 'mismatched_quoted_model' does not match body name 'other_quoted_model' + # Verify test_model is loaded query I rowsort SELECT * FROM sidemantic_models(); @@ -135,7 +151,7 @@ SEMANTIC SELECT reloadable.revenue FROM reloadable; # Test SEMANTIC MODEL active-model switching and repeated use/rewrite sequence statement ok -SEMANTIC CREATE MODEL sql_orders (name sql_orders, table orders, primary_key order_id); +SEMANTIC CREATE MODEL sql_orders (table orders, primary_key order_id); statement ok SEMANTIC CREATE METRIC sql_revenue AS SUM(amount); diff --git a/sidemantic-duckdb/test/sql/sidemantic_memory_and_invalid_persistence.test b/sidemantic-duckdb/test/sql/sidemantic_memory_and_invalid_persistence.test index ed4bdf39..0c80ce60 100644 --- a/sidemantic-duckdb/test/sql/sidemantic_memory_and_invalid_persistence.test +++ b/sidemantic-duckdb/test/sql/sidemantic_memory_and_invalid_persistence.test @@ -45,20 +45,7 @@ COPY (SELECT 'MODEL (' AS line) TO '__TEST_DIR__/sidemantic_invalid_persistence. restart -require sidemantic - -query I -SELECT count(*) FROM sidemantic_models() WHERE model_name = 'valid_events'; +statement error +LOAD sidemantic; ---- -0 - -statement ok -SEMANTIC CREATE MODEL valid_events (name valid_events, table valid_events, primary_key event_id); - -statement ok -SEMANTIC CREATE METRIC event_count AS COUNT(*); - -query I -SEMANTIC SELECT valid_events.event_count FROM valid_events; ----- -2 +Failed to autoload sidemantic definitions diff --git a/sidemantic-duckdb/test/sql/sidemantic_native_contract.test b/sidemantic-duckdb/test/sql/sidemantic_native_contract.test index de496636..6cacb29f 100644 --- a/sidemantic-duckdb/test/sql/sidemantic_native_contract.test +++ b/sidemantic-duckdb/test/sql/sidemantic_native_contract.test @@ -2,6 +2,8 @@ # description: Test native Sidemantic YAML and SQL contract paths in the DuckDB extension # group: [sidemantic] +load __TEST_DIR__/sidemantic_native_contract.duckdb + require sidemantic statement ok diff --git a/sidemantic-duckdb/test/sql/sidemantic_persistence.test b/sidemantic-duckdb/test/sql/sidemantic_persistence.test index 358ab6ac..05810dcd 100644 --- a/sidemantic-duckdb/test/sql/sidemantic_persistence.test +++ b/sidemantic-duckdb/test/sql/sidemantic_persistence.test @@ -13,7 +13,7 @@ statement ok INSERT INTO events VALUES (1, 10), (2, 20), (3, 30); statement ok -SEMANTIC CREATE MODEL events (name events, table events, primary_key event_id); +SEMANTIC CREATE MODEL events (table events, primary_key event_id); statement ok SEMANTIC CREATE METRIC event_count AS COUNT(*); @@ -38,7 +38,7 @@ SEMANTIC SELECT events.event_count FROM events; 3 statement ok -SEMANTIC CREATE OR REPLACE MODEL events (name events, table events, primary_key event_id); +SEMANTIC CREATE OR REPLACE MODEL events (table events, primary_key event_id); statement ok SEMANTIC MODEL events; diff --git a/sidemantic-rs/Cargo.lock b/sidemantic-rs/Cargo.lock index 503960c2..f163a97b 100644 --- a/sidemantic-rs/Cargo.lock +++ b/sidemantic-rs/Cargo.lock @@ -1673,6 +1673,7 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-test", + "windows-sys 0.61.2", ] [[package]] diff --git a/sidemantic-rs/Cargo.toml b/sidemantic-rs/Cargo.toml index a5e6c17a..12c875c1 100644 --- a/sidemantic-rs/Cargo.toml +++ b/sidemantic-rs/Cargo.toml @@ -52,6 +52,9 @@ tower-lsp = { version = "0.20.0", optional = true } ratatui = { version = "0.29.0", optional = true } crossterm = { version = "0.28.1", optional = true } +[target.'cfg(windows)'.dependencies] +windows-sys = { version = "0.61.2", features = ["Win32_Storage_FileSystem"] } + [dev-dependencies] wasm-bindgen-test = "0.3" diff --git a/sidemantic-rs/src/ffi.rs b/sidemantic-rs/src/ffi.rs index 68713137..5ee52446 100644 --- a/sidemantic-rs/src/ffi.rs +++ b/sidemantic-rs/src/ffi.rs @@ -9,21 +9,27 @@ use std::collections::{HashMap, HashSet}; use std::ffi::{CStr, CString}; use std::fs::{self, OpenOptions}; -use std::io::Write; +use std::io::{self, Write}; use std::os::raw::c_char; use std::path::{Path, PathBuf}; use std::ptr; use std::sync::Mutex; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use once_cell::sync::Lazy; -use crate::config::{load_from_directory, load_from_file, load_from_string, parse_sql_model}; +use crate::config::{ + load_from_directory_with_metadata, load_from_file_with_metadata, + load_from_sql_string_with_metadata, load_from_string_with_metadata, parse_sql_model, +}; use crate::core::SemanticGraph; use crate::sql::QueryRewriter; const DEFAULT_CONTEXT_KEY: &str = "__sidemantic_default_context__"; +const DEFINITIONS_LOCK_TIMEOUT: Duration = Duration::from_secs(10); +const DEFINITIONS_STALE_LOCK_AFTER: Duration = Duration::from_secs(300); -#[derive(Default)] +#[derive(Default, Clone)] struct FfiState { graph: SemanticGraph, active_model: Option, @@ -75,6 +81,14 @@ fn context_key(context: *const c_char) -> std::result::Result Option { + if model_order.len() == 1 { + model_order.first().cloned() + } else { + None + } +} + /// Load semantic models from YAML string /// /// Returns null on success, error message on failure. @@ -99,16 +113,18 @@ pub extern "C" fn sidemantic_load_yaml_for_context( Err(error) => return error, }; - match load_from_string(&yaml_str) { - Ok(new_graph) => { + match load_from_string_with_metadata(&yaml_str) { + Ok(metadata) => { + let active_model = active_model_for_loaded_models(&metadata.model_order); let mut states = FFI_STATES.lock().unwrap(); let state = states.entry(key).or_default(); // Merge new models into existing graph, replacing same-name definitions. - for model in new_graph.models() { + for model in metadata.graph.models() { if let Err(e) = state.graph.replace_model(model.clone()) { return to_c_string(&format!("Error adding model: {e}")); } } + state.active_model = active_model; ptr::null_mut() // Success } Err(e) => to_c_string(&format!("Error: {e}")), @@ -147,21 +163,23 @@ pub extern "C" fn sidemantic_load_file_for_context( } let result = if path.is_dir() { - load_from_directory(path) + load_from_directory_with_metadata(path) } else { - load_from_file(path) + load_from_file_with_metadata(path) }; match result { - Ok(new_graph) => { + Ok(metadata) => { + let active_model = active_model_for_loaded_models(&metadata.model_order); let mut states = FFI_STATES.lock().unwrap(); let state = states.entry(key).or_default(); // Merge new models into existing graph, replacing same-name definitions. - for model in new_graph.models() { + for model in metadata.graph.models() { if let Err(e) = state.graph.replace_model(model.clone()) { return to_c_string(&format!("Error adding model: {e}")); } } + state.active_model = active_model; ptr::null_mut() // Success } Err(e) => to_c_string(&format!("Error: {e}")), @@ -225,39 +243,46 @@ pub extern "C" fn sidemantic_define_for_context( let model_name = model.name.clone(); - // Determine the definitions file path let definitions_path = get_definitions_path(db_path); - // Handle OR REPLACE: read existing file, remove model if exists - if replace { - if let Some(definitions_path) = definitions_path.as_ref() { - if let Err(e) = remove_model_from_file(definitions_path, &model_name) { - return to_c_string(&format!("Error removing existing model: {e}")); - } - } - } - - // Append definition to file - if let Some(definitions_path) = definitions_path.as_ref() { - if let Err(e) = append_definition_to_file(definitions_path, &sql_str) { - return to_c_string(&format!("Error writing to definitions file: {e}")); - } - } - - // Load model into current session + // Stage all in-memory work first so duplicate/invalid definitions never touch disk. let mut states = FFI_STATES.lock().unwrap(); let state = states.entry(key).or_default(); + let mut candidate_state = state.clone(); let result = if replace { - state.graph.replace_model(model) + candidate_state.graph.replace_model(model) } else { - state.graph.add_model(model) + candidate_state.graph.add_model(model) }; if let Err(e) = result { return to_c_string(&format!("Error adding model to session: {e}")); } + candidate_state.active_model = Some(model_name.clone()); - // Set this model as the active model for subsequent METRIC/DIMENSION additions - state.active_model = Some(model_name); + if let Some(definitions_path) = definitions_path.as_ref() { + let _definitions_lock = match lock_definitions_file(definitions_path) { + Ok(lock) => lock, + Err(e) => return to_c_string(&format!("Error locking definitions file: {e}")), + }; + let content = match read_definitions_file(definitions_path) { + Ok(content) => content, + Err(e) => return to_c_string(&format!("Error reading definitions file: {e}")), + }; + let content = if replace { + remove_model_from_content(&content, &model_name) + } else { + content + }; + let candidate_content = append_definition_to_content(&content, &sql_str); + if let Err(e) = validate_definitions_content(&candidate_content) { + return to_c_string(&format!("Error validating definitions file: {e}")); + } + if let Err(e) = write_definitions_file_atomic(definitions_path, &candidate_content) { + return to_c_string(&format!("Error writing to definitions file: {e}")); + } + } + + *state = candidate_state; ptr::null_mut() // Success } @@ -287,16 +312,19 @@ fn get_definitions_path(db_path: *const c_char) -> Option { } /// Remove a model definition from the file by name +#[cfg(test)] fn remove_model_from_file(path: &Path, model_name: &str) -> std::io::Result<()> { - if !path.exists() { - return Ok(()); // Nothing to remove - } + let _definitions_lock = lock_definitions_file(path)?; + let content = read_definitions_file(path)?; + let result = remove_model_from_content(&content, model_name); + write_definitions_file_atomic(path, &result) +} - let content = fs::read_to_string(path)?; +fn remove_model_from_content(content: &str, model_name: &str) -> String { let mut result = String::new(); let mut cursor = 0; - for (start, end) in model_definition_ranges(&content) { + for (start, end) in model_definition_ranges(content) { result.push_str(&content[cursor..start]); let block = &content[start..end]; @@ -312,40 +340,39 @@ fn remove_model_from_file(path: &Path, model_name: &str) -> std::io::Result<()> } result.push_str(&content[cursor..]); - fs::write(path, result.trim_end())?; - Ok(()) + result.trim_end().to_string() +} + +fn content_has_model_block(content: &str, model_name: &str) -> bool { + model_definition_ranges(content) + .into_iter() + .filter_map(|(start, end)| parse_sql_model(&content[start..end]).ok()) + .any(|model| model.name == model_name) } fn model_definition_ranges(content: &str) -> Vec<(usize, usize)> { - let mut starts = Vec::new(); - let content_upper = content.to_uppercase(); - let mut search_start = 0; - - while let Some(pos) = content_upper[search_start..].find("MODEL") { - let actual_pos = search_start + pos; - let is_start = - actual_pos == 0 || !content.as_bytes()[actual_pos - 1].is_ascii_alphanumeric(); - let is_followed_by_boundary = actual_pos + 5 >= content.len() - || matches!( - content.as_bytes()[actual_pos + 5], - b' ' | b'(' | b'\t' | b'\n' - ); - - if is_start && is_followed_by_boundary { - starts.push(actual_pos); + let mut ranges = Vec::new(); + let mut current_start = None; + let mut current_end = None; + + for (start, end) in statement_ranges(content) { + let statement = &content[start..end]; + if starts_with_definition_keyword(statement, "MODEL") { + if let (Some(block_start), Some(block_end)) = (current_start, current_end) { + ranges.push((block_start, block_end)); + } + current_start = Some(start); + } + if current_start.is_some() { + current_end = Some(end); } + } - search_start = actual_pos + 1; + if let (Some(block_start), Some(block_end)) = (current_start, current_end) { + ranges.push((block_start, block_end)); } - starts - .iter() - .enumerate() - .map(|(index, start)| { - let end = starts.get(index + 1).copied().unwrap_or(content.len()); - (*start, end) - }) - .collect() + ranges } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -390,35 +417,93 @@ fn statement_ranges(block: &str) -> Vec<(usize, usize)> { let mut start = None; let mut in_single_quote = false; let mut in_double_quote = false; + let mut in_line_comment = false; + let mut in_block_comment = false; + let bytes = block.as_bytes(); + let mut idx = 0; - for (idx, ch) in block.char_indices() { - if start.is_none() && !ch.is_whitespace() { - start = Some(idx); - } + while idx < bytes.len() { + let byte = bytes[idx]; + if in_line_comment { + if byte == b'\n' { + in_line_comment = false; + } + idx += 1; + continue; + } + if in_block_comment { + if byte == b'*' && bytes.get(idx + 1) == Some(&b'/') { + in_block_comment = false; + idx += 2; + } else { + idx += 1; + } + continue; + } if in_single_quote { - if ch == '\'' { + if byte == b'\'' && bytes.get(idx + 1) == Some(&b'\'') { + idx += 2; + continue; + } + if byte == b'\'' { in_single_quote = false; } + idx += 1; continue; } if in_double_quote { - if ch == '"' { + if byte == b'"' && bytes.get(idx + 1) == Some(&b'"') { + idx += 2; + continue; + } + if byte == b'"' { in_double_quote = false; } + idx += 1; + continue; + } + + if start.is_none() { + if byte.is_ascii_whitespace() { + idx += 1; + continue; + } + if byte == b'-' && bytes.get(idx + 1) == Some(&b'-') { + in_line_comment = true; + idx += 2; + continue; + } + if byte == b'/' && bytes.get(idx + 1) == Some(&b'*') { + in_block_comment = true; + idx += 2; + continue; + } + start = Some(idx); + } + + if byte == b'-' && bytes.get(idx + 1) == Some(&b'-') { + in_line_comment = true; + idx += 2; + continue; + } + if byte == b'/' && bytes.get(idx + 1) == Some(&b'*') { + in_block_comment = true; + idx += 2; continue; } - match ch { - '\'' => in_single_quote = true, - '"' => in_double_quote = true, - ';' => { + match byte { + b'\'' => in_single_quote = true, + b'"' => in_double_quote = true, + b';' => { if let Some(statement_start) = start.take() { - ranges.push((statement_start, idx + ch.len_utf8())); + ranges.push((statement_start, idx + 1)); } } _ => {} } + idx += 1; } if let Some(statement_start) = start { @@ -430,26 +515,25 @@ fn statement_ranges(block: &str) -> Vec<(usize, usize)> { ranges } -fn persist_model_item_definition_to_file( - path: &Path, +fn persist_model_item_definition_to_content( + content: &str, model_name: &str, kind: DefinitionKind, item_names: &[String], definition: &str, is_replace: bool, -) -> std::io::Result<()> { - if !path.exists() { - return append_definition_to_file(path, definition); +) -> String { + if content.trim().is_empty() { + return append_definition_to_content(content, definition); } - let content = fs::read_to_string(path)?; let item_names: HashSet<&str> = item_names.iter().map(String::as_str).collect(); let (_, adjusted_definition) = extract_model_prefix(definition.trim()); let mut result = String::new(); let mut cursor = 0; let mut inserted = false; - for (start, end) in model_definition_ranges(&content) { + for (start, end) in model_definition_ranges(content) { result.push_str(&content[cursor..start]); let block = &content[start..end]; @@ -480,13 +564,11 @@ fn persist_model_item_definition_to_file( } result.push_str(&content[cursor..]); - fs::write(path, result.trim_end())?; - if !inserted { - append_definition_to_file(path, definition)?; + result = append_definition_to_content(&result, definition); } - Ok(()) + result } fn remove_item_definitions_from_block( @@ -575,17 +657,164 @@ fn insert_definition_at_block_end(block: &str, definition: &str) -> String { format!("{body}\n\n{trimmed_definition}{trailing}") } -/// Append a definition to the file -fn append_definition_to_file(path: &Path, definition: &str) -> std::io::Result<()> { - let mut file = OpenOptions::new().create(true).append(true).open(path)?; +fn append_definition_to_content(content: &str, definition: &str) -> String { + let trimmed_definition = definition.trim(); + if trimmed_definition.is_empty() { + return content.trim_end().to_string(); + } + + let mut result = content.trim_end().to_string(); + if !result.is_empty() { + result.push_str("\n\n"); + } + result.push_str(trimmed_definition); + result.push('\n'); + result +} + +fn read_definitions_file(path: &Path) -> io::Result { + match fs::read_to_string(path) { + Ok(content) => Ok(content), + Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(String::new()), + Err(error) => Err(error), + } +} + +fn validate_definitions_content(content: &str) -> Result<(), String> { + if content.trim().is_empty() { + return Ok(()); + } + load_from_sql_string_with_metadata(content) + .map(|_| ()) + .map_err(|error| error.to_string()) +} + +struct DefinitionsFileLock { + path: PathBuf, + file: Option, +} + +impl Drop for DefinitionsFileLock { + fn drop(&mut self) { + self.file.take(); + let _ = fs::remove_file(&self.path); + } +} + +fn definitions_lock_path(path: &Path) -> PathBuf { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let file_name = path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("definitions.sql"); + parent.join(format!(".{file_name}.lock")) +} + +fn lock_definitions_file(path: &Path) -> io::Result { + let lock_path = definitions_lock_path(path); + let deadline = Instant::now() + DEFINITIONS_LOCK_TIMEOUT; + + loop { + match OpenOptions::new() + .write(true) + .create_new(true) + .open(&lock_path) + { + Ok(mut file) => { + writeln!(file, "pid={}", std::process::id())?; + file.sync_all()?; + return Ok(DefinitionsFileLock { + path: lock_path, + file: Some(file), + }); + } + Err(error) if error.kind() == io::ErrorKind::AlreadyExists => { + if is_stale_definitions_lock(&lock_path) { + let _ = fs::remove_file(&lock_path); + continue; + } + if Instant::now() >= deadline { + return Err(io::Error::new( + io::ErrorKind::TimedOut, + format!("timed out waiting for {}", lock_path.display()), + )); + } + std::thread::sleep(Duration::from_millis(50)); + } + Err(error) => return Err(error), + } + } +} + +fn is_stale_definitions_lock(lock_path: &Path) -> bool { + fs::metadata(lock_path) + .and_then(|metadata| metadata.modified()) + .ok() + .and_then(|modified| SystemTime::now().duration_since(modified).ok()) + .is_some_and(|age| age > DEFINITIONS_STALE_LOCK_AFTER) +} + +#[cfg(not(windows))] +fn replace_file_atomic(temp_path: &Path, path: &Path) -> io::Result<()> { + fs::rename(temp_path, path) +} + +#[cfg(windows)] +fn replace_file_atomic(temp_path: &Path, path: &Path) -> io::Result<()> { + use std::os::windows::ffi::OsStrExt; + use windows_sys::Win32::Storage::FileSystem::{ + MoveFileExW, MOVEFILE_REPLACE_EXISTING, MOVEFILE_WRITE_THROUGH, + }; + + fn wide_path(path: &Path) -> Vec { + path.as_os_str().encode_wide().chain(Some(0)).collect() + } + + let temp_wide = wide_path(temp_path); + let path_wide = wide_path(path); + let result = unsafe { + MoveFileExW( + temp_wide.as_ptr(), + path_wide.as_ptr(), + MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH, + ) + }; + if result == 0 { + return Err(io::Error::last_os_error()); + } + Ok(()) +} + +fn write_definitions_file_atomic(path: &Path, content: &str) -> std::io::Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let file_name = path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("definitions.sql"); + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_nanos()) + .unwrap_or_default(); + let temp_path = parent.join(format!( + ".{file_name}.{}.{}.tmp", + std::process::id(), + unique + )); + + { + let mut file = OpenOptions::new() + .write(true) + .create_new(true) + .open(&temp_path)?; + file.write_all(content.as_bytes())?; + file.sync_all()?; + } - // Add newlines for separation if file is not empty - if path.exists() && fs::metadata(path)?.len() > 0 { - writeln!(file)?; - writeln!(file)?; + if let Err(error) = replace_file_atomic(&temp_path, path) { + let _ = fs::remove_file(&temp_path); + return Err(error); } - writeln!(file, "{}", definition.trim())?; Ok(()) } @@ -609,10 +838,14 @@ pub extern "C" fn sidemantic_autoload_for_context( Err(error) => return error, }; let Some(definitions_path) = get_definitions_path(db_path) else { + let mut states = FFI_STATES.lock().unwrap(); + states.insert(key, FfiState::default()); return ptr::null_mut(); }; if !definitions_path.exists() { + let mut states = FFI_STATES.lock().unwrap(); + states.insert(key, FfiState::default()); return ptr::null_mut(); // No file to load, success } @@ -623,75 +856,39 @@ pub extern "C" fn sidemantic_autoload_for_context( }; if content.trim().is_empty() { + let mut states = FFI_STATES.lock().unwrap(); + states.insert(key, FfiState::default()); return ptr::null_mut(); // Empty file, success } - // Parse each model definition in the file - // Split on MODEL keyword to handle multiple definitions - let mut states = FFI_STATES.lock().unwrap(); - let state = states.entry(key).or_default(); - - let mut last_model_name = None; - for block in split_definitions(&content) { - if block.trim().is_empty() { - continue; - } - match parse_sql_model(block) { - Ok(model) => { - last_model_name = Some(model.name.clone()); - if let Err(e) = state.graph.replace_model(model) { - return to_c_string(&format!("Error loading model: {e}")); - } - } - Err(e) => { - // Log but don't fail on parse errors for individual models - eprintln!("Warning: failed to parse model definition: {e}"); - } + let metadata = match load_from_sql_string_with_metadata(&content) { + Ok(metadata) => metadata, + Err(e) => { + let mut states = FFI_STATES.lock().unwrap(); + states.insert(key, FfiState::default()); + return to_c_string(&format!("Error loading definitions file: {e}")); } - } + }; - state.active_model = last_model_name; + let mut states = FFI_STATES.lock().unwrap(); + states.insert( + key, + FfiState { + active_model: active_model_for_loaded_models(&metadata.model_order), + graph: metadata.graph, + }, + ); ptr::null_mut() // Success } /// Split content into individual model definitions +#[cfg(test)] fn split_definitions(content: &str) -> Vec<&str> { - let mut definitions = Vec::new(); - let mut start = None; - - // Find each MODEL keyword and split there - let content_upper = content.to_uppercase(); - let mut search_start = 0; - - while let Some(pos) = content_upper[search_start..].find("MODEL") { - let actual_pos = search_start + pos; - - // Check this is actually the start of a MODEL statement (not inside a word) - let is_start = - actual_pos == 0 || !content.as_bytes()[actual_pos - 1].is_ascii_alphanumeric(); - let is_followed_by_space = actual_pos + 5 < content.len() - && (content.as_bytes()[actual_pos + 5] == b' ' - || content.as_bytes()[actual_pos + 5] == b'(' - || content.as_bytes()[actual_pos + 5] == b'\t' - || content.as_bytes()[actual_pos + 5] == b'\n'); - - if is_start && is_followed_by_space { - if let Some(previous_start) = start { - definitions.push(&content[previous_start..actual_pos]); - } - start = Some(actual_pos); - } - - search_start = actual_pos + 1; - } - - // Don't forget the last definition - if let Some(start) = start { - definitions.push(&content[start..]); - } - - definitions + model_definition_ranges(content) + .into_iter() + .map(|(start, end)| &content[start..end]) + .collect() } /// Add a metric/dimension/segment to the most recently created model @@ -753,16 +950,11 @@ pub extern "C" fn sidemantic_add_definition_for_context( } explicit_model } else { - // Use ACTIVE_MODEL or fall back to last model + // Use ACTIVE_MODEL. Multi-model loads intentionally require an explicit target. if let Some(ref name) = state.active_model { name.clone() } else { - // Fall back to last model - let model_names: Vec = state.graph.models().map(|m| m.name.clone()).collect(); - if model_names.is_empty() { - return to_c_string("Error: no model defined yet. Create a model first with SEMANTIC CREATE MODEL, or use SEMANTIC USE ."); - } - model_names.last().unwrap().clone() + return to_c_string("Error: no active model. Create a model first with SEMANTIC CREATE MODEL, select one with SEMANTIC MODEL , or use METRIC/DIMENSION/SEGMENT model.name syntax."); } }; @@ -817,15 +1009,29 @@ pub extern "C" fn sidemantic_add_definition_for_context( } } - if let Err(e) = state.graph.replace_model(updated_model) { + let mut candidate_state = state.clone(); + if let Err(e) = candidate_state.graph.replace_model(updated_model) { return to_c_string(&format!("Error updating model: {e}")); } // Persist the definition with the owning model so autoload sees the same graph. if let Some(definitions_path) = get_definitions_path(db_path) { - let result = if let Some(kind) = persisted_kind { - persist_model_item_definition_to_file( - &definitions_path, + let _definitions_lock = match lock_definitions_file(&definitions_path) { + Ok(lock) => lock, + Err(e) => return to_c_string(&format!("Error locking definitions file: {e}")), + }; + let content = match read_definitions_file(&definitions_path) { + Ok(content) => content, + Err(e) => return to_c_string(&format!("Error reading definitions file: {e}")), + }; + if !content_has_model_block(&content, &model_name) { + return to_c_string(&format!( + "Error: model '{model_name}' is not present in the persisted definitions file" + )); + } + let candidate_content = if let Some(kind) = persisted_kind { + persist_model_item_definition_to_content( + &content, &model_name, kind, &persisted_item_names, @@ -833,14 +1039,19 @@ pub extern "C" fn sidemantic_add_definition_for_context( is_replace, ) } else { - append_definition_to_file(&definitions_path, &sql_str) + append_definition_to_content(&content, &sql_str) }; - if let Err(e) = result { + if let Err(e) = validate_definitions_content(&candidate_content) { + return to_c_string(&format!("Error validating definitions file: {e}")); + } + if let Err(e) = write_definitions_file_atomic(&definitions_path, &candidate_content) { return to_c_string(&format!("Error writing to definitions file: {e}")); } } + *state = candidate_state; + ptr::null_mut() // Success } @@ -1151,6 +1362,13 @@ mod tests { sql } + fn take_rewrite_error(result: SidemanticRewriteResult) -> String { + assert!(!result.error.is_null()); + let message = unsafe { CStr::from_ptr(result.error).to_string_lossy().into_owned() }; + sidemantic_free_result(result); + message + } + fn unique_db_path(name: &str) -> PathBuf { let nanos = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -1262,6 +1480,73 @@ models: remove_definitions_file(&db_path); } + #[test] + fn test_define_duplicate_does_not_append_sidecar() { + let _guard = test_lock(); + sidemantic_clear(); + + let db_path = unique_db_path("define_duplicate_atomic"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + remove_definitions_file(&db_path); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + + let model = + CString::new("MODEL (name orders, table orders, primary_key order_id);").unwrap(); + assert_success(sidemantic_define(model.as_ptr(), db_path.as_ptr(), false)); + + let error = take_error(sidemantic_define(model.as_ptr(), db_path.as_ptr(), false)); + assert!(error.contains("already exists"), "{error}"); + + let content = fs::read_to_string(&definitions_path).unwrap(); + assert_eq!(split_definitions(&content).len(), 1, "{content}"); + assert_eq!( + content.matches("MODEL (name orders").count(), + 1, + "{content}" + ); + + remove_definitions_file(&db_path); + } + + #[test] + fn test_define_replace_invalid_rolls_back_sidecar_and_memory() { + let _guard = test_lock(); + sidemantic_clear(); + + let db_path = unique_db_path("define_replace_invalid_atomic"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + remove_definitions_file(&db_path); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + + let first = CString::new( + "MODEL (name orders, table orders, primary_key order_id);\nMETRIC revenue AS SUM(amount);", + ) + .unwrap(); + assert_success(sidemantic_define(first.as_ptr(), db_path.as_ptr(), false)); + let before = fs::read_to_string(&definitions_path).unwrap(); + + let invalid = CString::new( + "MODEL (name orders, table orders_v2, primary_key order_id);\nMETRIC revenue AS SUM(amount);\nMETRIC revenue AS SUM(net_amount);", + ) + .unwrap(); + let error = take_error(sidemantic_define(invalid.as_ptr(), db_path.as_ptr(), true)); + assert!(error.contains("duplicate metric"), "{error}"); + + let after = fs::read_to_string(&definitions_path).unwrap(); + assert_eq!(after, before); + + let rewritten = take_rewrite_sql(sidemantic_rewrite( + CString::new("SELECT orders.revenue FROM orders") + .unwrap() + .as_ptr(), + )); + assert!(rewritten.contains("amount"), "{rewritten}"); + assert!(!rewritten.contains("net_amount"), "{rewritten}"); + assert!(!rewritten.contains("orders_v2"), "{rewritten}"); + + remove_definitions_file(&db_path); + } + #[test] fn test_add_definition_updates_existing_model() { let _guard = test_lock(); @@ -1297,6 +1582,69 @@ models: remove_definitions_file(&db_path); } + #[test] + fn test_add_definition_accepts_simple_segment_syntax() { + let _guard = test_lock(); + sidemantic_clear(); + + let db_path = unique_db_path("add_segment_simple"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + remove_definitions_file(&db_path); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + + let model = + CString::new("MODEL (name orders, table orders, primary_key order_id);").unwrap(); + assert_success(sidemantic_define(model.as_ptr(), db_path.as_ptr(), false)); + + let segment = CString::new("SEGMENT completed AS status = 'completed';").unwrap(); + assert_success(sidemantic_add_definition( + segment.as_ptr(), + db_path.as_ptr(), + false, + )); + + let content = fs::read_to_string(&definitions_path).unwrap(); + let model = parse_sql_model(&content).unwrap(); + let completed = model.get_segment("completed").unwrap(); + assert_eq!(completed.sql, "status = 'completed'"); + + remove_definitions_file(&db_path); + } + + #[test] + fn test_add_definition_persistence_failure_rolls_back_memory() { + let _guard = test_lock(); + sidemantic_clear(); + + let model = + CString::new("MODEL (name orders, table orders, primary_key order_id);").unwrap(); + assert_success(sidemantic_define(model.as_ptr(), ptr::null(), false)); + + let db_path = unique_db_path("add_definition_missing_sidecar"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + remove_definitions_file(&db_path); + + let metric = CString::new("METRIC (name revenue, agg sum, sql amount);").unwrap(); + let error = take_error(sidemantic_add_definition( + metric.as_ptr(), + db_path.as_ptr(), + false, + )); + assert!( + error.contains("not present in the persisted definitions file"), + "{error}" + ); + + let rewrite_error = take_rewrite_error(sidemantic_rewrite( + CString::new("SELECT orders.revenue FROM orders") + .unwrap() + .as_ptr(), + )); + assert!(rewrite_error.contains("revenue"), "{rewrite_error}"); + + remove_definitions_file(&db_path); + } + #[test] fn test_replace_metric_dimension_and_segment_updates_persisted_definitions() { let _guard = test_lock(); @@ -1487,7 +1835,7 @@ models: false, )); - assert!(error.contains("no model defined yet"), "{error}"); + assert!(error.contains("no active model"), "{error}"); remove_definitions_file(&db_path); } @@ -1626,7 +1974,100 @@ models: } #[test] - fn test_autoload_sets_active_model_to_last_loaded_model() { + fn test_load_single_model_yaml_sets_active_model() { + let _guard = test_lock(); + + let context = CString::new("duckdb:single-load-active").unwrap(); + sidemantic_clear_for_context(context.as_ptr()); + + let yaml = CString::new( + r#" +models: + - name: orders + table: orders + primary_key: order_id +"#, + ) + .unwrap(); + assert_success(sidemantic_load_yaml_for_context( + context.as_ptr(), + yaml.as_ptr(), + )); + + let metric = CString::new("METRIC revenue AS SUM(amount);").unwrap(); + assert_success(sidemantic_add_definition_for_context( + context.as_ptr(), + metric.as_ptr(), + ptr::null(), + false, + )); + + let rewritten = take_rewrite_sql(sidemantic_rewrite_for_context( + context.as_ptr(), + CString::new("SELECT orders.revenue FROM orders") + .unwrap() + .as_ptr(), + )); + assert!(rewritten.contains("SUM"), "{rewritten}"); + assert!(rewritten.contains("amount"), "{rewritten}"); + + sidemantic_clear_for_context(context.as_ptr()); + } + + #[test] + fn test_load_multi_model_yaml_requires_explicit_active_model() { + let _guard = test_lock(); + + let context = CString::new("duckdb:multi-load-active").unwrap(); + sidemantic_clear_for_context(context.as_ptr()); + + let yaml = CString::new( + r#" +models: + - name: orders + table: orders + primary_key: order_id + - name: customers + table: customers + primary_key: customer_id +"#, + ) + .unwrap(); + assert_success(sidemantic_load_yaml_for_context( + context.as_ptr(), + yaml.as_ptr(), + )); + + let metric = CString::new("METRIC revenue AS SUM(amount);").unwrap(); + let error = take_error(sidemantic_add_definition_for_context( + context.as_ptr(), + metric.as_ptr(), + ptr::null(), + false, + )); + assert!(error.contains("no active model"), "{error}"); + + let explicit_metric = CString::new("METRIC orders.revenue AS SUM(amount);").unwrap(); + assert_success(sidemantic_add_definition_for_context( + context.as_ptr(), + explicit_metric.as_ptr(), + ptr::null(), + false, + )); + + let rewritten = take_rewrite_sql(sidemantic_rewrite_for_context( + context.as_ptr(), + CString::new("SELECT orders.revenue FROM orders") + .unwrap() + .as_ptr(), + )); + assert!(rewritten.contains("SUM"), "{rewritten}"); + + sidemantic_clear_for_context(context.as_ptr()); + } + + #[test] + fn test_autoload_sets_active_model_for_single_loaded_model() { let _guard = test_lock(); let context = CString::new("duckdb:autoload-active").unwrap(); @@ -1705,18 +2146,19 @@ models: } #[test] - fn test_autoload_invalid_definition_is_best_effort() { + fn test_autoload_invalid_definition_clears_context_and_returns_error() { let _guard = test_lock(); - let context = CString::new("duckdb:autoload-invalid-best-effort").unwrap(); + let context = CString::new("duckdb:autoload-invalid-clears").unwrap(); sidemantic_clear_for_context(context.as_ptr()); - let db_path = unique_db_path("autoload_invalid_best_effort"); + let db_path = unique_db_path("autoload_invalid_clears"); let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + fs::write( &definitions_path, - "MODEL (name events, table events, primary_key event_id);\nMETRIC event_count AS COUNT(*);\n\nMODEL (", + "MODEL (name events, table events, primary_key event_id);\nMETRIC event_count AS COUNT(*);", ) .unwrap(); @@ -1725,14 +2167,154 @@ models: db_path.as_ptr(), )); - let sql = CString::new("SELECT events.event_count FROM events").unwrap(); - let rewritten = take_rewrite_sql(sidemantic_rewrite_for_context( + let loaded = take_rewrite_sql(sidemantic_rewrite_for_context( context.as_ptr(), - sql.as_ptr(), + CString::new("SELECT events.event_count FROM events") + .unwrap() + .as_ptr(), )); - assert!(rewritten.contains("COUNT"), "{rewritten}"); + assert!(loaded.contains("COUNT"), "{loaded}"); + + fs::write(&definitions_path, "MODEL (").unwrap(); + let error = take_error(sidemantic_autoload_for_context( + context.as_ptr(), + db_path.as_ptr(), + )); + assert!(error.contains("Error loading definitions file"), "{error}"); + + let passthrough = sidemantic_rewrite_for_context( + context.as_ptr(), + CString::new("SELECT events.event_count FROM events") + .unwrap() + .as_ptr(), + ); + assert!(passthrough.error.is_null()); + assert!(!passthrough.was_rewritten); + sidemantic_free_result(passthrough); + + sidemantic_clear_for_context(context.as_ptr()); + let _ = fs::remove_file(definitions_path); + } + + #[test] + fn test_autoload_missing_sidecar_clears_existing_context() { + let _guard = test_lock(); + + let context = CString::new("duckdb:autoload-missing-clears").unwrap(); + sidemantic_clear_for_context(context.as_ptr()); + + let db_path = unique_db_path("autoload_missing_clears"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + fs::write( + &definitions_path, + "MODEL (name orders, table orders, primary_key order_id);\nMETRIC order_count AS COUNT(*);", + ) + .unwrap(); + + assert_success(sidemantic_autoload_for_context( + context.as_ptr(), + db_path.as_ptr(), + )); + + let loaded = take_rewrite_sql(sidemantic_rewrite_for_context( + context.as_ptr(), + CString::new("SELECT orders.order_count FROM orders") + .unwrap() + .as_ptr(), + )); + assert!(loaded.contains("COUNT"), "{loaded}"); + + fs::remove_file(&definitions_path).unwrap(); + assert_success(sidemantic_autoload_for_context( + context.as_ptr(), + db_path.as_ptr(), + )); + + let passthrough = sidemantic_rewrite_for_context( + context.as_ptr(), + CString::new("SELECT orders.order_count FROM orders") + .unwrap() + .as_ptr(), + ); + assert!(passthrough.error.is_null()); + assert!(!passthrough.was_rewritten); + sidemantic_free_result(passthrough); + + sidemantic_clear_for_context(context.as_ptr()); + } + + #[test] + fn test_autoload_memory_context_clears_existing_context() { + let _guard = test_lock(); + + let context = CString::new("duckdb:autoload-memory-clears").unwrap(); + sidemantic_clear_for_context(context.as_ptr()); + + let yaml = CString::new( + r#" +models: + - name: temp_orders + table: temp_orders + primary_key: order_id + metrics: + - name: order_count + agg: count +"#, + ) + .unwrap(); + + assert_success(sidemantic_load_yaml_for_context( + context.as_ptr(), + yaml.as_ptr(), + )); + let loaded = take_rewrite_sql(sidemantic_rewrite_for_context( + context.as_ptr(), + CString::new("SELECT temp_orders.order_count FROM temp_orders") + .unwrap() + .as_ptr(), + )); + assert!(loaded.contains("COUNT"), "{loaded}"); + + assert_success(sidemantic_autoload_for_context( + context.as_ptr(), + ptr::null(), + )); + + let passthrough = sidemantic_rewrite_for_context( + context.as_ptr(), + CString::new("SELECT temp_orders.order_count FROM temp_orders") + .unwrap() + .as_ptr(), + ); + assert!(passthrough.error.is_null()); + assert!(!passthrough.was_rewritten); + sidemantic_free_result(passthrough); sidemantic_clear_for_context(context.as_ptr()); + } + + #[test] + fn test_atomic_write_replaces_existing_definitions_file_and_cleans_lock() { + let _guard = test_lock(); + + let db_path = unique_db_path("atomic_replace"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + let lock_path = definitions_lock_path(&definitions_path); + + { + let _lock = lock_definitions_file(&definitions_path).unwrap(); + assert!(lock_path.exists()); + } + assert!(!lock_path.exists()); + + write_definitions_file_atomic(&definitions_path, "first\n").unwrap(); + write_definitions_file_atomic(&definitions_path, "second\n").unwrap(); + + assert_eq!(fs::read_to_string(&definitions_path).unwrap(), "second\n"); + assert!(!lock_path.exists()); + let _ = fs::remove_file(definitions_path); } @@ -1763,4 +2345,38 @@ MODEL (name customers, table customers, primary_key customer_id); let _ = fs::remove_file(definitions_path); } + + #[test] + fn test_model_block_splitting_ignores_model_keyword_in_sql_string_and_comments() { + let _guard = test_lock(); + let db_path = unique_db_path("remove_model_keyword_string"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + let content = r#" +-- MODEL (name ignored, table ignored); +MODEL (name orders, table orders, primary_key order_id); +METRIC suspicious AS SUM(CASE WHEN note = 'MODEL (' THEN amount ELSE 0 END); + +MODEL (name customers, table customers, primary_key customer_id); +METRIC customer_count AS COUNT(*); +"#; + fs::write(&definitions_path, content).unwrap(); + + let original = fs::read_to_string(&definitions_path).unwrap(); + assert_eq!(split_definitions(&original).len(), 2, "{original}"); + + remove_model_from_file(&definitions_path, "customers").unwrap(); + + let updated = fs::read_to_string(&definitions_path).unwrap(); + assert!(updated.contains("name orders"), "{updated}"); + assert!(updated.contains("'MODEL ('"), "{updated}"); + assert!(updated.contains("suspicious"), "{updated}"); + assert!(!updated.contains("name customers"), "{updated}"); + assert!(!updated.contains("customer_count"), "{updated}"); + + let loaded = load_from_sql_string_with_metadata(&updated).unwrap(); + assert_eq!(loaded.model_order, vec!["orders"]); + + let _ = fs::remove_file(definitions_path); + } } From 240806c0334a3c8565110c66ef25cc2d8cbf68d6 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 16:54:28 -0700 Subject: [PATCH 2/8] Guard DuckDB definition lock cleanup --- sidemantic-rs/src/ffi.rs | 58 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/sidemantic-rs/src/ffi.rs b/sidemantic-rs/src/ffi.rs index 5ee52446..b747e1e6 100644 --- a/sidemantic-rs/src/ffi.rs +++ b/sidemantic-rs/src/ffi.rs @@ -13,7 +13,10 @@ use std::io::{self, Write}; use std::os::raw::c_char; use std::path::{Path, PathBuf}; use std::ptr; -use std::sync::Mutex; +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Mutex, +}; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use once_cell::sync::Lazy; @@ -38,6 +41,7 @@ struct FfiState { /// Semantic graph state keyed by DuckDB database/session context. static FFI_STATES: Lazy>> = Lazy::new(|| Mutex::new(HashMap::new())); +static DEFINITIONS_LOCK_TOKEN_COUNTER: AtomicU64 = AtomicU64::new(1); /// Result from rewrite operation #[repr(C)] @@ -692,12 +696,15 @@ fn validate_definitions_content(content: &str) -> Result<(), String> { struct DefinitionsFileLock { path: PathBuf, file: Option, + owner_token: String, } impl Drop for DefinitionsFileLock { fn drop(&mut self) { self.file.take(); - let _ = fs::remove_file(&self.path); + if definitions_lock_owned_by(&self.path, &self.owner_token) { + let _ = fs::remove_file(&self.path); + } } } @@ -710,6 +717,22 @@ fn definitions_lock_path(path: &Path) -> PathBuf { parent.join(format!(".{file_name}.lock")) } +fn definitions_lock_owner_token() -> String { + let unique = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_nanos()) + .unwrap_or_default(); + let sequence = DEFINITIONS_LOCK_TOKEN_COUNTER.fetch_add(1, Ordering::Relaxed); + format!("pid={};time={unique};seq={sequence}", std::process::id()) +} + +fn definitions_lock_owned_by(lock_path: &Path, owner_token: &str) -> bool { + fs::read_to_string(lock_path) + .ok() + .and_then(|content| content.lines().next().map(str::to_owned)) + .is_some_and(|token| token == owner_token) +} + fn lock_definitions_file(path: &Path) -> io::Result { let lock_path = definitions_lock_path(path); let deadline = Instant::now() + DEFINITIONS_LOCK_TIMEOUT; @@ -721,11 +744,13 @@ fn lock_definitions_file(path: &Path) -> io::Result { .open(&lock_path) { Ok(mut file) => { - writeln!(file, "pid={}", std::process::id())?; + let owner_token = definitions_lock_owner_token(); + writeln!(file, "{owner_token}")?; file.sync_all()?; return Ok(DefinitionsFileLock { path: lock_path, file: Some(file), + owner_token, }); } Err(error) if error.kind() == io::ErrorKind::AlreadyExists => { @@ -2318,6 +2343,33 @@ models: let _ = fs::remove_file(definitions_path); } + #[test] + fn test_lock_drop_does_not_remove_recreated_lock() { + let _guard = test_lock(); + + let db_path = unique_db_path("lock_recreated"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + let lock_path = definitions_lock_path(&definitions_path); + + let original_lock = lock_definitions_file(&definitions_path).unwrap(); + assert!(lock_path.exists()); + + fs::remove_file(&lock_path).unwrap(); + let recreated_token = "pid=recreated;time=1;seq=1"; + fs::write(&lock_path, format!("{recreated_token}\n")).unwrap(); + + drop(original_lock); + + assert_eq!( + fs::read_to_string(&lock_path).unwrap(), + format!("{recreated_token}\n") + ); + + let _ = fs::remove_file(lock_path); + let _ = fs::remove_file(definitions_path); + } + #[test] fn test_remove_model_from_file_handles_multiline_models() { let _guard = test_lock(); From 6768507739b7c5bca0c3b6d5540b9a0907372f0e Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 17:45:18 -0700 Subject: [PATCH 3/8] Split semicolonless persisted definitions --- sidemantic-rs/src/ffi.rs | 131 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/sidemantic-rs/src/ffi.rs b/sidemantic-rs/src/ffi.rs index b747e1e6..b4a2159a 100644 --- a/sidemantic-rs/src/ffi.rs +++ b/sidemantic-rs/src/ffi.rs @@ -416,6 +416,55 @@ fn starts_with_definition_keyword(sql: &str, keyword: &str) -> bool { .unwrap_or(true) } +const DEFINITION_KEYWORDS: &[&str] = &[ + "MODEL", + "METRIC", + "DIMENSION", + "SEGMENT", + "RELATIONSHIP", + "PARAMETER", + "PRE_AGGREGATION", +]; + +fn definition_keyword_at_line_start(bytes: &[u8], idx: usize) -> bool { + let Some(byte) = bytes.get(idx) else { + return false; + }; + if !byte.is_ascii_alphabetic() { + return false; + } + + let line_start = bytes[..idx] + .iter() + .rposition(|byte| *byte == b'\n' || *byte == b'\r') + .map(|pos| pos + 1) + .unwrap_or(0); + if !bytes[line_start..idx] + .iter() + .all(|byte| byte.is_ascii_whitespace()) + { + return false; + } + + DEFINITION_KEYWORDS + .iter() + .any(|keyword| keyword_matches_at(bytes, idx, keyword.as_bytes())) +} + +fn keyword_matches_at(bytes: &[u8], idx: usize, keyword: &[u8]) -> bool { + let Some(candidate) = bytes.get(idx..idx + keyword.len()) else { + return false; + }; + if !candidate.eq_ignore_ascii_case(keyword) { + return false; + } + + bytes + .get(idx + keyword.len()) + .map(|byte| byte.is_ascii_whitespace()) + .unwrap_or(true) +} + fn statement_ranges(block: &str) -> Vec<(usize, usize)> { let mut ranges = Vec::new(); let mut start = None; @@ -423,6 +472,7 @@ fn statement_ranges(block: &str) -> Vec<(usize, usize)> { let mut in_double_quote = false; let mut in_line_comment = false; let mut in_block_comment = false; + let mut paren_depth = 0usize; let bytes = block.as_bytes(); let mut idx = 0; @@ -486,6 +536,16 @@ fn statement_ranges(block: &str) -> Vec<(usize, usize)> { start = Some(idx); } + if let Some(statement_start) = start { + if paren_depth == 0 + && idx != statement_start + && definition_keyword_at_line_start(bytes, idx) + { + ranges.push((statement_start, idx)); + start = Some(idx); + } + } + if byte == b'-' && bytes.get(idx + 1) == Some(&b'-') { in_line_comment = true; idx += 2; @@ -500,10 +560,13 @@ fn statement_ranges(block: &str) -> Vec<(usize, usize)> { match byte { b'\'' => in_single_quote = true, b'"' => in_double_quote = true, + b'(' => paren_depth = paren_depth.saturating_add(1), + b')' => paren_depth = paren_depth.saturating_sub(1), b';' => { if let Some(statement_start) = start.take() { ranges.push((statement_start, idx + 1)); } + paren_depth = 0; } _ => {} } @@ -1838,6 +1901,74 @@ models: remove_definitions_file(&db_path); } + #[test] + fn test_semicolonless_persisted_model_blocks_stay_separate_for_updates() { + let _guard = test_lock(); + sidemantic_clear(); + + let db_path = unique_db_path("semicolonless_model_blocks"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + remove_definitions_file(&db_path); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + + let orders = + CString::new("MODEL (name orders, table orders, primary_key order_id)").unwrap(); + let customers = + CString::new("MODEL (name customers, table customers, primary_key customer_id)") + .unwrap(); + assert_success(sidemantic_define(orders.as_ptr(), db_path.as_ptr(), false)); + assert_success(sidemantic_define( + customers.as_ptr(), + db_path.as_ptr(), + false, + )); + + let initial = fs::read_to_string(&definitions_path).unwrap(); + assert_eq!(split_definitions(&initial).len(), 2, "{initial}"); + + let metric = CString::new("METRIC customers.customer_count AS COUNT(*)").unwrap(); + assert_success(sidemantic_add_definition( + metric.as_ptr(), + db_path.as_ptr(), + false, + )); + + let replacement = + CString::new("MODEL (name orders, table orders_v2, primary_key order_id)").unwrap(); + assert_success(sidemantic_define( + replacement.as_ptr(), + db_path.as_ptr(), + true, + )); + + let updated = fs::read_to_string(&definitions_path).unwrap(); + let blocks = split_definitions(&updated); + assert_eq!(blocks.len(), 2, "{updated}"); + assert!(updated.contains("orders_v2"), "{updated}"); + assert!(updated.contains("customer_count"), "{updated}"); + + let orders_model = blocks + .iter() + .find_map(|block| { + let model = parse_sql_model(block).ok()?; + (model.name == "orders").then_some(model) + }) + .unwrap(); + let customers_model = blocks + .iter() + .find_map(|block| { + let model = parse_sql_model(block).ok()?; + (model.name == "customers").then_some(model) + }) + .unwrap(); + + assert_eq!(orders_model.table.as_deref(), Some("orders_v2")); + assert_eq!(customers_model.metrics.len(), 1); + assert_eq!(customers_model.metrics[0].name, "customer_count"); + + remove_definitions_file(&db_path); + } + #[test] fn test_clear_resets_active_model() { let _guard = test_lock(); From b178553c73d7d2fad10f805a4a9d4f813f3e30bd Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 18:24:19 -0700 Subject: [PATCH 4/8] Verify stale definition locks before cleanup --- sidemantic-rs/src/ffi.rs | 85 +++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/sidemantic-rs/src/ffi.rs b/sidemantic-rs/src/ffi.rs index b4a2159a..6143e1a7 100644 --- a/sidemantic-rs/src/ffi.rs +++ b/sidemantic-rs/src/ffi.rs @@ -796,6 +796,51 @@ fn definitions_lock_owned_by(lock_path: &Path, owner_token: &str) -> bool { .is_some_and(|token| token == owner_token) } +#[derive(Debug, PartialEq, Eq)] +struct DefinitionsLockSnapshot { + owner_token: String, + modified: SystemTime, + len: u64, +} + +fn definitions_lock_snapshot(lock_path: &Path) -> io::Result { + let content = fs::read_to_string(lock_path)?; + let metadata = fs::metadata(lock_path)?; + let owner_token = content.lines().next().unwrap_or_default().to_string(); + Ok(DefinitionsLockSnapshot { + owner_token, + modified: metadata.modified()?, + len: metadata.len(), + }) +} + +fn stale_definitions_lock_snapshot(lock_path: &Path) -> Option { + let snapshot = definitions_lock_snapshot(lock_path).ok()?; + SystemTime::now() + .duration_since(snapshot.modified) + .ok() + .filter(|age| *age > DEFINITIONS_STALE_LOCK_AFTER) + .map(|_| snapshot) +} + +fn remove_stale_definitions_lock( + lock_path: &Path, + stale_snapshot: &DefinitionsLockSnapshot, +) -> io::Result { + match definitions_lock_snapshot(lock_path) { + Ok(current_snapshot) if current_snapshot == *stale_snapshot => { + match fs::remove_file(lock_path) { + Ok(()) => Ok(true), + Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(false), + Err(error) => Err(error), + } + } + Ok(_) => Ok(false), + Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(false), + Err(error) => Err(error), + } +} + fn lock_definitions_file(path: &Path) -> io::Result { let lock_path = definitions_lock_path(path); let deadline = Instant::now() + DEFINITIONS_LOCK_TIMEOUT; @@ -817,8 +862,10 @@ fn lock_definitions_file(path: &Path) -> io::Result { }); } Err(error) if error.kind() == io::ErrorKind::AlreadyExists => { - if is_stale_definitions_lock(&lock_path) { - let _ = fs::remove_file(&lock_path); + if let Some(stale_snapshot) = stale_definitions_lock_snapshot(&lock_path) { + if remove_stale_definitions_lock(&lock_path, &stale_snapshot)? { + continue; + } continue; } if Instant::now() >= deadline { @@ -834,14 +881,6 @@ fn lock_definitions_file(path: &Path) -> io::Result { } } -fn is_stale_definitions_lock(lock_path: &Path) -> bool { - fs::metadata(lock_path) - .and_then(|metadata| metadata.modified()) - .ok() - .and_then(|modified| SystemTime::now().duration_since(modified).ok()) - .is_some_and(|age| age > DEFINITIONS_STALE_LOCK_AFTER) -} - #[cfg(not(windows))] fn replace_file_atomic(temp_path: &Path, path: &Path) -> io::Result<()> { fs::rename(temp_path, path) @@ -2501,6 +2540,32 @@ models: let _ = fs::remove_file(definitions_path); } + #[test] + fn test_stale_lock_cleanup_does_not_remove_recreated_lock() { + let _guard = test_lock(); + + let db_path = unique_db_path("stale_lock_recreated"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + let lock_path = definitions_lock_path(&definitions_path); + + let stale_token = "pid=stale;time=1;seq=1"; + fs::write(&lock_path, format!("{stale_token}\n")).unwrap(); + let stale_snapshot = definitions_lock_snapshot(&lock_path).unwrap(); + + let recreated_token = "pid=recreated;time=2;seq=2"; + fs::write(&lock_path, format!("{recreated_token}\n")).unwrap(); + + assert!(!remove_stale_definitions_lock(&lock_path, &stale_snapshot).unwrap()); + assert_eq!( + fs::read_to_string(&lock_path).unwrap(), + format!("{recreated_token}\n") + ); + + let _ = fs::remove_file(lock_path); + let _ = fs::remove_file(definitions_path); + } + #[test] fn test_remove_model_from_file_handles_multiline_models() { let _guard = test_lock(); From ce1456bb27c48ffca49b900c10e5fdc09e913e0e Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 19:08:20 -0700 Subject: [PATCH 5/8] Track nested model properties in DuckDB parser --- .../src/sidemantic_extension.cpp | 36 ++++++++++++++----- sidemantic-duckdb/test/sql/sidemantic.test | 8 +++++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/sidemantic-duckdb/src/sidemantic_extension.cpp b/sidemantic-duckdb/src/sidemantic_extension.cpp index 4e1bd23d..6eae71db 100644 --- a/sidemantic-duckdb/src/sidemantic_extension.cpp +++ b/sidemantic-duckdb/src/sidemantic_extension.cpp @@ -312,18 +312,22 @@ static bool ExtractModelNameProperty(const std::string &body, std::string &name, size_t pos = open + 1; bool in_single_quote = false; bool in_double_quote = false; - int depth = 0; + int paren_depth = 0; + int bracket_depth = 0; + int brace_depth = 0; while (pos < body.size()) { - while (pos < body.size() && depth == 0 && - (std::isspace(body[pos]) || body[pos] == ',')) { + while (pos < body.size() && paren_depth == 0 && bracket_depth == 0 && + brace_depth == 0 && (std::isspace(body[pos]) || body[pos] == ',')) { pos++; } - if (pos >= body.size() || (depth == 0 && body[pos] == ')')) { + if (pos >= body.size() || + (paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 && body[pos] == ')')) { return false; } - if (depth == 0 && StartsWithNameProperty(body, pos)) { + if (paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 && + StartsWithNameProperty(body, pos)) { pos += 4; while (pos < body.size() && std::isspace(body[pos])) { pos++; @@ -391,13 +395,27 @@ static bool ExtractModelNameProperty(const std::string &body, std::string &name, } else if (ch == '"') { in_double_quote = true; } else if (ch == '(') { - depth++; + paren_depth++; } else if (ch == ')') { - if (depth == 0) { + if (paren_depth > 0) { + paren_depth--; + } else if (bracket_depth == 0 && brace_depth == 0) { return false; } - depth--; - } else if (ch == ',' && depth == 0) { + } else if (ch == '[') { + bracket_depth++; + } else if (ch == ']') { + if (bracket_depth > 0) { + bracket_depth--; + } + } else if (ch == '{') { + brace_depth++; + } else if (ch == '}') { + if (brace_depth > 0) { + brace_depth--; + } + } else if (ch == ',' && paren_depth == 0 && bracket_depth == 0 && + brace_depth == 0) { break; } pos++; diff --git a/sidemantic-duckdb/test/sql/sidemantic.test b/sidemantic-duckdb/test/sql/sidemantic.test index a2b53ed3..951648ce 100644 --- a/sidemantic-duckdb/test/sql/sidemantic.test +++ b/sidemantic-duckdb/test/sql/sidemantic.test @@ -117,6 +117,14 @@ orders products test_model +statement ok +SEMANTIC CREATE MODEL nested_props_model (metadata {owner analytics, name internal}, table orders, primary_key order_id); + +query I +SELECT count(*) FROM sidemantic_models() WHERE model_name = 'nested_props_model'; +---- +1 + # Test repeated YAML load replaces same-name model instead of failing on duplicates statement ok SELECT * FROM sidemantic_load(' From ab3938df655e870f87f3ae13e5d8d403b1622119 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 20:48:16 -0700 Subject: [PATCH 6/8] Validate DuckDB model name separators --- sidemantic-duckdb/src/sidemantic_extension.cpp | 9 ++++++++- sidemantic-duckdb/test/sql/sidemantic.test | 10 ++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/sidemantic-duckdb/src/sidemantic_extension.cpp b/sidemantic-duckdb/src/sidemantic_extension.cpp index 6eae71db..c8e901bb 100644 --- a/sidemantic-duckdb/src/sidemantic_extension.cpp +++ b/sidemantic-duckdb/src/sidemantic_extension.cpp @@ -300,7 +300,8 @@ static bool StartsWithNameProperty(const std::string &value, size_t pos) { return false; } } - return std::isspace(value[pos + keyword.size()]); + auto separator = value[pos + keyword.size()]; + return std::isspace(separator) || separator == ':' || separator == '='; } static bool ExtractModelNameProperty(const std::string &body, std::string &name, std::string &error) { @@ -332,6 +333,12 @@ static bool ExtractModelNameProperty(const std::string &body, std::string &name, while (pos < body.size() && std::isspace(body[pos])) { pos++; } + if (pos < body.size() && (body[pos] == ':' || body[pos] == '=')) { + pos++; + while (pos < body.size() && std::isspace(body[pos])) { + pos++; + } + } if (pos < body.size() && (body[pos] == '\'' || body[pos] == '"')) { auto quote = body[pos]; pos++; diff --git a/sidemantic-duckdb/test/sql/sidemantic.test b/sidemantic-duckdb/test/sql/sidemantic.test index 951648ce..952ddbc0 100644 --- a/sidemantic-duckdb/test/sql/sidemantic.test +++ b/sidemantic-duckdb/test/sql/sidemantic.test @@ -109,6 +109,16 @@ SEMANTIC CREATE MODEL mismatched_quoted_model (name 'other_quoted_model', table ---- CREATE MODEL name 'mismatched_quoted_model' does not match body name 'other_quoted_model' +statement error +SEMANTIC CREATE MODEL mismatched_colon_model (name: other_colon_model, table orders, primary_key order_id); +---- +CREATE MODEL name 'mismatched_colon_model' does not match body name 'other_colon_model' + +statement error +SEMANTIC CREATE MODEL mismatched_equals_model (name=other_equals_model, table orders, primary_key order_id); +---- +CREATE MODEL name 'mismatched_equals_model' does not match body name 'other_equals_model' + # Verify test_model is loaded query I rowsort SELECT * FROM sidemantic_models(); From 14aeffbb61e4cf56a2833f27c6e863489d541445 Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 21:01:39 -0700 Subject: [PATCH 7/8] Terminate persisted SQL definitions --- sidemantic-rs/src/ffi.rs | 78 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/sidemantic-rs/src/ffi.rs b/sidemantic-rs/src/ffi.rs index 6143e1a7..2aff2479 100644 --- a/sidemantic-rs/src/ffi.rs +++ b/sidemantic-rs/src/ffi.rs @@ -715,18 +715,21 @@ fn should_remove_item_statement( fn insert_definition_at_block_end(block: &str, definition: &str) -> String { let trimmed_len = block.trim_end().len(); let (body, trailing) = block.split_at(trimmed_len); - let trimmed_definition = definition.trim(); + let terminated_definition = terminate_definition(definition); + if terminated_definition.is_empty() { + return block.to_string(); + } if body.is_empty() { - return format!("{trimmed_definition}{trailing}"); + return format!("{terminated_definition}{trailing}"); } - format!("{body}\n\n{trimmed_definition}{trailing}") + format!("{body}\n\n{terminated_definition}{trailing}") } fn append_definition_to_content(content: &str, definition: &str) -> String { - let trimmed_definition = definition.trim(); - if trimmed_definition.is_empty() { + let terminated_definition = terminate_definition(definition); + if terminated_definition.is_empty() { return content.trim_end().to_string(); } @@ -734,11 +737,20 @@ fn append_definition_to_content(content: &str, definition: &str) -> String { if !result.is_empty() { result.push_str("\n\n"); } - result.push_str(trimmed_definition); + result.push_str(&terminated_definition); result.push('\n'); result } +fn terminate_definition(definition: &str) -> String { + let trimmed_definition = definition.trim(); + if trimmed_definition.is_empty() || trimmed_definition.ends_with(';') { + trimmed_definition.to_string() + } else { + format!("{trimmed_definition};") + } +} + fn read_definitions_file(path: &Path) -> io::Result { match fs::read_to_string(path) { Ok(content) => Ok(content), @@ -2008,6 +2020,60 @@ models: remove_definitions_file(&db_path); } + #[test] + fn test_semicolonless_simple_as_definition_is_terminated_before_append() { + let _guard = test_lock(); + sidemantic_clear(); + + let db_path = unique_db_path("semicolonless_simple_as_append"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + remove_definitions_file(&db_path); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + + let orders = + CString::new("MODEL (name orders, table orders, primary_key order_id)").unwrap(); + assert_success(sidemantic_define(orders.as_ptr(), db_path.as_ptr(), false)); + + let metric = CString::new("METRIC revenue AS SUM(amount)").unwrap(); + assert_success(sidemantic_add_definition( + metric.as_ptr(), + db_path.as_ptr(), + false, + )); + + let customers = + CString::new("MODEL (name customers, table customers, primary_key customer_id)") + .unwrap(); + assert_success(sidemantic_define( + customers.as_ptr(), + db_path.as_ptr(), + false, + )); + + let content = fs::read_to_string(&definitions_path).unwrap(); + assert!( + content.contains("METRIC revenue AS SUM(amount);\n\nMODEL (name customers"), + "{content}" + ); + + sidemantic_clear(); + assert_success(sidemantic_autoload(db_path.as_ptr())); + + let customers_name = CString::new("customers").unwrap(); + assert!(sidemantic_is_model(customers_name.as_ptr())); + + let rewritten = take_rewrite_sql(sidemantic_rewrite( + CString::new("SELECT orders.revenue FROM orders") + .unwrap() + .as_ptr(), + )); + assert!(rewritten.contains("SUM"), "{rewritten}"); + assert!(rewritten.contains("amount"), "{rewritten}"); + assert!(!rewritten.contains("MODEL (name customers"), "{rewritten}"); + + remove_definitions_file(&db_path); + } + #[test] fn test_clear_resets_active_model() { let _guard = test_lock(); From f64b09a6a5b9d7ead0ad653244f5110d568b058f Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Sun, 31 May 2026 21:17:18 -0700 Subject: [PATCH 8/8] Accept compact persisted definitions --- sidemantic-rs/src/ffi.rs | 54 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/sidemantic-rs/src/ffi.rs b/sidemantic-rs/src/ffi.rs index 2aff2479..a6c0da2d 100644 --- a/sidemantic-rs/src/ffi.rs +++ b/sidemantic-rs/src/ffi.rs @@ -412,7 +412,7 @@ fn starts_with_definition_keyword(sql: &str, keyword: &str) -> bool { trimmed[keyword.len()..] .chars() .next() - .map(|ch| ch.is_whitespace()) + .map(|ch| ch.is_whitespace() || ch == '(') .unwrap_or(true) } @@ -461,7 +461,7 @@ fn keyword_matches_at(bytes: &[u8], idx: usize, keyword: &[u8]) -> bool { bytes .get(idx + keyword.len()) - .map(|byte| byte.is_ascii_whitespace()) + .map(|byte| byte.is_ascii_whitespace() || *byte == b'(') .unwrap_or(true) } @@ -1887,6 +1887,56 @@ models: remove_definitions_file(&db_path); } + #[test] + fn test_compact_parenthesized_definitions_are_detected_for_persistence_updates() { + let _guard = test_lock(); + sidemantic_clear(); + + let db_path = unique_db_path("compact_parenthesized_persistence"); + let db_path = CString::new(db_path.to_string_lossy().to_string()).unwrap(); + remove_definitions_file(&db_path); + let definitions_path = get_definitions_path(db_path.as_ptr()).unwrap(); + + let model = CString::new("MODEL(name orders, table orders, primary_key order_id)").unwrap(); + assert_success(sidemantic_define(model.as_ptr(), db_path.as_ptr(), false)); + + let old_metric = CString::new("METRIC(name revenue, agg sum, sql gross_amount)").unwrap(); + assert_success(sidemantic_add_definition( + old_metric.as_ptr(), + db_path.as_ptr(), + false, + )); + + let new_metric = CString::new("METRIC(name revenue, agg sum, sql net_amount)").unwrap(); + assert_success(sidemantic_add_definition( + new_metric.as_ptr(), + db_path.as_ptr(), + true, + )); + + let content = fs::read_to_string(&definitions_path).unwrap(); + assert_eq!( + content.matches("METRIC(name revenue").count(), + 1, + "{content}" + ); + assert!(!content.contains("gross_amount"), "{content}"); + assert!(content.contains("net_amount"), "{content}"); + + sidemantic_clear(); + assert_success(sidemantic_autoload(db_path.as_ptr())); + + let rewritten = take_rewrite_sql(sidemantic_rewrite( + CString::new("SELECT orders.revenue FROM orders") + .unwrap() + .as_ptr(), + )); + assert!(rewritten.contains("net_amount"), "{rewritten}"); + assert!(!rewritten.contains("gross_amount"), "{rewritten}"); + + remove_definitions_file(&db_path); + } + #[test] fn test_prefixed_definition_persists_under_target_model_block() { let _guard = test_lock();