From 1e0fac337edcb8d6e6bc68e3c2a333a913c74e03 Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 03:07:43 -0700 Subject: [PATCH 1/9] refactor(mcp): two-database engine model + platform-default persistent path Begins migrating hyperdb-mcp toward an ephemeral-primary, persistent-attached session model. Each engine now holds two .hyper files at all times (when not in --ephemeral-only mode): - Ephemeral primary: $TMPDIR/hyperdb-mcp--/scratch.hyper Created fresh per-engine, deleted on Drop. The connection is bound here; unqualified SQL routes here. - Persistent attachment: at the platform-default location (or user-supplied path), attached under the reserved alias "persistent" during Engine::new. Survives across sessions. Per-engine sequence number lets multiple Engines coexist in the same PID (parallel test runners, restart-after-ConnectionLost) without colliding. New module: - paths: cross-platform resolution for the persistent-db default location using `dirs::data_dir()`. Override via HYPERDB_PERSISTENT_DB env var. Engine API changes: - workspace_path() -> ephemeral_path() and new persistent_path() - is_persistent() -> has_persistent() + persistent_was_just_created() - new resolve_target_db() resolves an optional database alias for tools - PERSISTENT_ALIAS = "persistent" const exposed - Drop always cleans up ephemeral; persistent stays where the user put it attach::reset_search_path now respects the always-on persistent attachment: re-pins to the primary's name instead of issuing RESET (which would leave the persistent attachment without a working unqualified-name resolver). Tests are all passing except for two clusters being addressed in later iterations: - saved_queries_tests (2 failures): WorkspaceStore still targets primary; iteration 7 will move it to the persistent attachment. - table_catalog_tests (2 failures): catalog ensure_exists still targets primary; iteration 5 will make it presence-conditional and per-DB. This is the second commit in a multi-step migration; --bare and --workspace CLI flags still work (deprecation comes in iteration 3). --- Cargo.lock | 105 +++++- hyperdb-mcp/Cargo.toml | 1 + hyperdb-mcp/examples/demo.rs | 2 +- hyperdb-mcp/src/attach.rs | 24 +- hyperdb-mcp/src/engine.rs | 405 +++++++++++++++++++----- hyperdb-mcp/src/lib.rs | 1 + hyperdb-mcp/src/paths.rs | 174 ++++++++++ hyperdb-mcp/src/server.rs | 8 +- hyperdb-mcp/src/watcher.rs | 5 +- hyperdb-mcp/tests/attach_tests.rs | 48 +-- hyperdb-mcp/tests/daemon_tests.rs | 22 +- hyperdb-mcp/tests/ingest_arrow_tests.rs | 2 +- hyperdb-mcp/tests/resource_tests.rs | 27 +- 13 files changed, 690 insertions(+), 134 deletions(-) create mode 100644 hyperdb-mcp/src/paths.rs diff --git a/Cargo.lock b/Cargo.lock index 15c8a7d..5e10a79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1058,13 +1058,34 @@ dependencies = [ "ctutils", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs" version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" dependencies = [ - "dirs-sys", + "dirs-sys 0.5.0", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", ] [[package]] @@ -1075,7 +1096,7 @@ checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" dependencies = [ "libc", "option-ext", - "redox_users", + "redox_users 0.5.2", "windows-sys 0.61.2", ] @@ -1307,7 +1328,7 @@ dependencies = [ "core-foundation 0.9.4", "core-graphics", "core-text", - "dirs", + "dirs 6.0.0", "dwrote", "float-ord", "freetype-sys", @@ -1930,6 +1951,7 @@ dependencies = [ "chrono", "clap", "csv", + "dirs 5.0.1", "hyperdb-api", "libc", "notify", @@ -3381,6 +3403,17 @@ dependencies = [ "bitflags 2.11.1", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 1.0.69", +] + [[package]] name = "redox_users" version = "0.5.2" @@ -4950,6 +4983,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -4977,6 +5019,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -5019,6 +5076,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -5031,6 +5094,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -5043,6 +5112,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -5067,6 +5142,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -5079,6 +5160,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -5091,6 +5178,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -5103,6 +5196,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" diff --git a/hyperdb-mcp/Cargo.toml b/hyperdb-mcp/Cargo.toml index ae36e8f..965b654 100644 --- a/hyperdb-mcp/Cargo.toml +++ b/hyperdb-mcp/Cargo.toml @@ -41,6 +41,7 @@ notify = "8" tokio-util = { version = "0.7", features = ["rt"] } tempfile = { workspace = true } sqlformat = "0.5.0" +dirs = "5" [target.'cfg(unix)'.dependencies] libc = "0.2" diff --git a/hyperdb-mcp/examples/demo.rs b/hyperdb-mcp/examples/demo.rs index e4c8880..863efe2 100644 --- a/hyperdb-mcp/examples/demo.rs +++ b/hyperdb-mcp/examples/demo.rs @@ -230,7 +230,7 @@ fn main() -> Result<(), Box> { // ── Step 1: spin up engine ───────────────────────────────────────── section("Step 1 · Launch the engine (ephemeral workspace)"); let engine = Engine::new(None)?; - println!(" Workspace: {}", engine.workspace_path().display()); + println!(" Ephemeral DB: {}", engine.ephemeral_path().display()); println!(" Log dir: {}", engine.log_dir().display()); println!(" hyperd is running: {}", engine.is_running()); diff --git a/hyperdb-mcp/src/attach.rs b/hyperdb-mcp/src/attach.rs index 4152bfb..4671998 100644 --- a/hyperdb-mcp/src/attach.rs +++ b/hyperdb-mcp/src/attach.rs @@ -67,13 +67,25 @@ fn set_primary_search_path(engine: &Engine) -> Result<(), McpError> { Ok(()) } -/// Inverse of [`set_primary_search_path`] — restores Hyper's default -/// `"$single"` search mode. Called when the registry has just -/// transitioned back to zero attachments so the connection behaves -/// exactly like a fresh single-database session. +/// Restore the connection's search-path posture when the user-visible +/// attachment registry transitions back to zero attachments. +/// +/// - When the engine has the default persistent attachment, leave the +/// pin in place: even with no user attachments, the persistent DB is +/// *still attached*, so Hyper's `"$single"` resolution would fail. +/// We pin explicitly to the ephemeral primary's name. +/// - When `--ephemeral-only` (no persistent attachment), restore Hyper's +/// default `"$single"` mode so the connection behaves exactly like a +/// fresh single-database session. fn reset_search_path(engine: &Engine) -> Result<(), McpError> { - engine.execute_command("RESET schema_search_path")?; - Ok(()) + if engine.has_persistent() { + // Re-pin to the primary's name so unqualified resolution keeps + // working alongside the ever-present persistent attachment. + set_primary_search_path(engine) + } else { + engine.execute_command("RESET schema_search_path")?; + Ok(()) + } } /// Policy for what [`AttachRegistry::attach`] should do when the diff --git a/hyperdb-mcp/src/engine.rs b/hyperdb-mcp/src/engine.rs index 29eef58..c0488a1 100644 --- a/hyperdb-mcp/src/engine.rs +++ b/hyperdb-mcp/src/engine.rs @@ -21,13 +21,24 @@ //! path covers both transport-level failures and the `"desynchronized"` state //! surfaced by the `hyper-client` layer's bounded drain. //! -//! # Workspace Modes +//! # Workspace Model //! -//! - **Persistent** — caller supplies a path via `--workspace`; the `.hyper` -//! file survives across sessions. Tables can be built up incrementally and -//! exported to Tableau Desktop. -//! - **Ephemeral** — a temp directory is created per process; everything is -//! discarded when the server exits. No configuration needed. +//! Every session has an **ephemeral primary database** at +//! `$TMPDIR/hyperdb-mcp-/scratch.hyper`. This is where unqualified +//! tool calls land — exploratory loads, ad-hoc queries, scratch tables. +//! It is created fresh on engine start and deleted (DETACH + remove) when +//! the engine drops. +//! +//! When a persistent path is supplied (CLI `--persistent-db`, env var +//! `HYPERDB_PERSISTENT_DB`, or the platform default), the engine records +//! it; the [`crate::server::HyperMcpServer`] then ATTACHes that file under +//! alias `"persistent"` after construction so the LLM can target it via +//! the `database` parameter on data tools, or via `persist: true` on +//! load tools. The persistent file lives across sessions. +//! +//! Passing `None` (or `--ephemeral-only` at the CLI) skips the persistent +//! attachment; the only available database is the ephemeral primary plus +//! any user-attached DBs. //! //! # Sync Calls in an Async Server //! @@ -41,20 +52,104 @@ use crate::daemon; use crate::error::{ErrorCode, McpError}; use crate::schema::ColumnSchema; -use hyperdb_api::{Catalog, Connection, CreateMode, HyperProcess, Parameters, SqlType}; +use hyperdb_api::{ + escape_sql_path, Catalog, Connection, CreateMode, HyperProcess, Parameters, SqlType, +}; use serde_json::{json, Value}; use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU64, Ordering}; + +/// Per-process counter so multiple `Engine` instances in the same PID get +/// distinct ephemeral directories (parallel test runners, embedded uses). +static EPHEMERAL_SEQ: AtomicU64 = AtomicU64::new(0); + +/// Reserved alias under which the default persistent database is attached. +/// Mirrored as [`Engine::PERSISTENT_ALIAS`] for the public API. +const PERSISTENT_ALIAS: &str = "persistent"; -/// Owns a connection to `hyperd` and the workspace `.hyper` file. All SQL -/// execution flows through this struct. +/// Outcome of [`attach_default_persistent`] — flags whether the file was +/// freshly created so the catalog-seed step can fire (or skip). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PersistentAttachOutcome { + /// `true` when MCP just created the `.hyper` file as part of the + /// attach; `false` when the file already existed and we attached it + /// as-is. + pub file_was_created: bool, +} + +/// Attach the persistent database under the reserved `"persistent"` +/// alias on `connection`, creating the underlying `.hyper` file if it +/// doesn't yet exist. Also pins `schema_search_path` to `primary_db_name` +/// so unqualified SQL keeps routing to the ephemeral primary. +fn attach_default_persistent( + connection: &Connection, + persistent_path: &Path, + primary_db_name: &str, +) -> Result { + let path_str = persistent_path.to_string_lossy(); + let file_was_created = !persistent_path.exists(); + if file_was_created { + let create_sql = format!( + "CREATE DATABASE IF NOT EXISTS {}", + escape_sql_path(&path_str) + ); + connection.execute_command(&create_sql).map_err(|e| { + McpError::new( + ErrorCode::InternalError, + format!("Failed to create persistent database: {e}"), + ) + })?; + } + let attach_sql = format!( + "ATTACH DATABASE {path} AS \"{alias}\"", + path = escape_sql_path(&path_str), + alias = PERSISTENT_ALIAS, + ); + connection.execute_command(&attach_sql).map_err(|e| { + McpError::new( + ErrorCode::InternalError, + format!("Failed to attach persistent database: {e}"), + ) + })?; + // Pin search_path to the primary so unqualified SQL keeps routing + // there even with the persistent attachment present. Mirrors the + // logic AttachRegistry uses for user-attached databases. + let pin_sql = format!( + "SET schema_search_path = '{}'", + primary_db_name.replace('\'', "''") + ); + connection.execute_command(&pin_sql).map_err(|e| { + McpError::new( + ErrorCode::InternalError, + format!("Failed to pin schema_search_path: {e}"), + ) + })?; + Ok(PersistentAttachOutcome { file_was_created }) +} + +/// File-stem of a `.hyper` path as the unqualified database name Hyper +/// uses internally. Falls back to `"scratch"` if the stem can't be read. +fn path_stem(path: &Path) -> String { + path.file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("scratch") + .to_string() +} + +/// Owns a connection to `hyperd`, the ephemeral primary database, and an +/// optional persistent attachment path. All SQL execution flows through +/// this struct. /// /// Two process modes: /// - **Local** — this engine owns the `HyperProcess` subprocess directly. /// - **Daemon** — a shared daemon manages `hyperd`; the engine only holds a connection. /// -/// Two workspace modes: -/// - **Persistent** — caller supplies a path; the `.hyper` file survives across sessions. -/// - **Ephemeral** — a temp directory is created per process; discarded on exit. +/// Database layout: +/// - The connection is *bound* to the ephemeral primary at +/// [`Self::ephemeral_path`]. Unqualified SQL routes here. +/// - When [`Self::persistent_path`] is `Some`, the server attaches that +/// file as `"persistent"` after engine construction. When `None`, no +/// persistent storage is available this session (`--ephemeral-only`). #[derive(Debug)] pub struct Engine { /// `None` in daemon mode (the daemon owns the process). @@ -62,20 +157,31 @@ pub struct Engine { /// Stored endpoint for daemon mode (the daemon advertises this). daemon_endpoint: Option, connection: Connection, - workspace_path: PathBuf, + /// The primary database for this session. Lives in a temp dir and is + /// deleted on `Drop`. + ephemeral_path: PathBuf, + /// User-data persistent database. Attached under alias `"persistent"` + /// during [`Engine::new`]. `None` in `--ephemeral-only` mode. + persistent_path: Option, + /// `true` when the persistent `.hyper` file was just created during + /// engine construction (so the catalog-seed step should fire). Reset + /// to `false` after the server consumes it via + /// [`Self::take_persistent_was_created`]. + persistent_was_created: bool, log_dir: PathBuf, - is_persistent: bool, } impl Engine { - /// Create a new Engine. If `workspace_path` is Some, use that path (persistent mode). - /// If None, use a temp file (ephemeral mode). + /// Create a new Engine. The connection is bound to a fresh ephemeral + /// primary in a temp directory. If `persistent_db_path` is `Some`, + /// the path is recorded so the server can ATTACH it post-construction; + /// passing `None` means `--ephemeral-only`. /// /// Connects to the shared daemon if available, falling back to a local `hyperd`. /// /// # Errors /// - /// - Returns [`ErrorCode::PermissionDenied`] if the workspace parent + /// - Returns [`ErrorCode::PermissionDenied`] if the persistent parent /// directory or the log directory cannot be created. /// - Returns [`ErrorCode::InternalError`] if the ephemeral temp /// directory cannot be created, if the `public` schema bootstrap @@ -83,46 +189,62 @@ impl Engine { /// - Returns [`ErrorCode::HyperdNotFound`] when [`HyperProcess::new`] /// reports the `hyperd` executable is missing or unreachable via /// `HYPERD_PATH`. - pub fn new(workspace_path: Option) -> Result { - Self::new_with_mode(workspace_path, false) + pub fn new(persistent_db_path: Option) -> Result { + Self::new_with_mode(persistent_db_path, false) } /// Create an engine that bypasses the shared daemon and spawns a private `hyperd`. /// /// # Errors /// Same as [`Self::new`]. - pub fn new_no_daemon(workspace_path: Option) -> Result { - Self::new_with_mode(workspace_path, true) + pub fn new_no_daemon(persistent_db_path: Option) -> Result { + Self::new_with_mode(persistent_db_path, true) } #[expect( clippy::needless_pass_by_value, - reason = "Option is consumed by the workspace path resolution logic" + reason = "Option is consumed by the path-expansion logic below" )] - fn new_with_mode(workspace_path: Option, no_daemon: bool) -> Result { - let (path, is_persistent) = if let Some(ref p) = workspace_path { - let path = PathBuf::from(shellexpand_tilde(p)); - if let Some(parent) = path.parent() { - std::fs::create_dir_all(parent).map_err(|e| { - McpError::new( - ErrorCode::PermissionDenied, - format!("Cannot create workspace directory: {e}"), - ) - })?; + fn new_with_mode( + persistent_db_path: Option, + no_daemon: bool, + ) -> Result { + // Resolve persistent path (if requested) and pre-create its parent dir. + let persistent_path = match persistent_db_path.as_deref() { + Some(p) => { + let path = PathBuf::from(shellexpand_tilde(p)); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).map_err(|e| { + McpError::new( + ErrorCode::PermissionDenied, + format!("Cannot create persistent-db directory: {e}"), + ) + })?; + } + Some(path) } - (path, true) - } else { - let dir = std::env::temp_dir().join(format!("hyperdb-mcp-{}", std::process::id())); - std::fs::create_dir_all(&dir).map_err(|e| { - McpError::new( - ErrorCode::InternalError, - format!("Cannot create temp directory: {e}"), - ) - })?; - (dir.join("workspace.hyper"), false) + None => None, }; - let log_dir = resolve_log_dir(workspace_path.as_deref()); + // Always allocate a fresh ephemeral primary in a per-engine temp dir. + // The directory name combines the PID and a process-wide counter so + // multiple Engine instances in the same process (parallel tests, + // embedded uses, restart-after-ConnectionLost) never collide. + let seq = EPHEMERAL_SEQ.fetch_add(1, Ordering::Relaxed); + let ephemeral_dir = + std::env::temp_dir().join(format!("hyperdb-mcp-{}-{seq}", std::process::id())); + std::fs::create_dir_all(&ephemeral_dir).map_err(|e| { + McpError::new( + ErrorCode::InternalError, + format!("Cannot create ephemeral directory: {e}"), + ) + })?; + let ephemeral_path = ephemeral_dir.join("scratch.hyper"); + + // Logs live next to the persistent file when one was supplied so + // operators find them in a stable location; otherwise next to the + // ephemeral primary. + let log_dir = resolve_log_dir(persistent_db_path.as_deref()); std::fs::create_dir_all(&log_dir).map_err(|e| { McpError::new( ErrorCode::PermissionDenied, @@ -132,7 +254,9 @@ impl Engine { // Try daemon mode first unless disabled if !no_daemon { - if let Some(engine) = Self::try_daemon_mode(&path, &log_dir, is_persistent)? { + if let Some(engine) = + Self::try_daemon_mode(&ephemeral_path, persistent_path.clone(), &log_dir)? + { return Ok(engine); } } @@ -153,29 +277,56 @@ impl Engine { } })?; - let connection = - Connection::new(&hyper, &path, CreateMode::CreateIfNotExists).map_err(|e| { + // Bind to the ephemeral primary. CreateAndReplace because a stale + // file in the per-pid temp dir from a crashed prior session would + // otherwise leak into this one. + let connection = Connection::new(&hyper, &ephemeral_path, CreateMode::CreateAndReplace) + .map_err(|e| { McpError::new(ErrorCode::InternalError, format!("Failed to connect: {e}")) })?; bootstrap_public_schema(&connection)?; + let primary_db_name = path_stem(&ephemeral_path); + let persistent_was_created = Self::attach_persistent_if_present( + &connection, + persistent_path.as_deref(), + &primary_db_name, + )?; + Ok(Self { hyper: Some(hyper), daemon_endpoint: None, connection, - workspace_path: path, + ephemeral_path, + persistent_path, + persistent_was_created, log_dir, - is_persistent, }) } + /// If `persistent_path` is `Some`, attach the file under the reserved + /// `"persistent"` alias and pin the search path. Returns `true` if + /// the file was just created, `false` if it already existed or if + /// `persistent_path` is `None`. + fn attach_persistent_if_present( + connection: &Connection, + persistent_path: Option<&Path>, + primary_db_name: &str, + ) -> Result { + let Some(path) = persistent_path else { + return Ok(false); + }; + let outcome = attach_default_persistent(connection, path, primary_db_name)?; + Ok(outcome.file_was_created) + } + /// Attempt to connect via the shared daemon. Returns `None` if the daemon /// cannot be reached (falls back to local mode). fn try_daemon_mode( - path: &Path, + ephemeral_path: &Path, + persistent_path: Option, log_dir: &Path, - is_persistent: bool, ) -> Result, McpError> { let port = daemon::discovery::resolve_port(); let info = match daemon::spawn::ensure_daemon(port) { @@ -187,10 +338,12 @@ impl Engine { }; let endpoint = &info.hyperd_endpoint; + // CreateAndReplace: same rationale as the local path — a per-pid + // temp file from a crashed prior session shouldn't leak in. let connection = Connection::connect( endpoint, - &path.to_string_lossy(), - CreateMode::CreateIfNotExists, + &ephemeral_path.to_string_lossy(), + CreateMode::CreateAndReplace, ) .map_err(|e| { // The daemon's discovery file points at this endpoint but we can't @@ -208,13 +361,21 @@ impl Engine { // Send heartbeat so daemon knows we're active let _ = daemon::health::send_command(info.health_port, "HEARTBEAT"); + let primary_db_name = path_stem(ephemeral_path); + let persistent_was_created = Self::attach_persistent_if_present( + &connection, + persistent_path.as_deref(), + &primary_db_name, + )?; + Ok(Some(Self { hyper: None, daemon_endpoint: Some(info.hyperd_endpoint), connection, - workspace_path: path.to_path_buf(), + ephemeral_path: ephemeral_path.to_path_buf(), + persistent_path, + persistent_was_created, log_dir: log_dir.to_path_buf(), - is_persistent, })) } @@ -253,16 +414,27 @@ impl Engine { .map_err(|e| McpError::new(ErrorCode::InternalError, e.to_string())) } - /// Absolute path to the `.hyper` workspace file on disk. - pub fn workspace_path(&self) -> &Path { - &self.workspace_path + /// Absolute path to the ephemeral primary `.hyper` file on disk. + pub fn ephemeral_path(&self) -> &Path { + &self.ephemeral_path + } + + /// Absolute path to the persistent `.hyper` file, or `None` when the + /// session is `--ephemeral-only`. + pub fn persistent_path(&self) -> Option<&Path> { + self.persistent_path.as_deref() } - /// Unqualified database name Hyper uses for the primary workspace — - /// the stem of [`Self::workspace_path`]. Matches what - /// [`hyperdb_api::Connection::new`] registers when it issues its implicit - /// `ATTACH DATABASE`, so fully-qualified SQL built with this value - /// resolves to the primary workspace. + /// Reserved alias under which the persistent database is attached + /// when [`Self::persistent_path`] is set. Visible to the LLM via the + /// `database` parameter and via `list_attached_databases`. + pub const PERSISTENT_ALIAS: &'static str = "persistent"; + + /// Unqualified database name Hyper uses for the ephemeral primary — + /// the stem of [`Self::ephemeral_path`]. Matches what + /// [`hyperdb_api::Connection::new`] registers when it issues its + /// implicit `ATTACH DATABASE`, so fully-qualified SQL built with this + /// value resolves to the primary. /// /// Also the correct value for `SET schema_search_path = '…'` while /// additional databases are attached: Hyper's default search path @@ -270,13 +442,46 @@ impl Engine { /// databases are attached, and starts resolving unqualified names to /// nothing the moment an `ATTACH DATABASE` runs. pub fn primary_db_name(&self) -> String { - self.workspace_path + self.ephemeral_path .file_stem() .and_then(|s| s.to_str()) - .unwrap_or("workspace") + .unwrap_or("scratch") .to_string() } + /// Resolve a tool's optional `database` parameter to a concrete + /// alias suitable for fully-qualifying SQL. `None` and `Some("")` + /// mean "the primary (ephemeral)"; `Some("persistent")` requires the + /// persistent attachment exists; any other value is returned + /// verbatim and assumed to be a user-attached alias. + /// + /// Returns the database alias to qualify against, or `None` to mean + /// "use the primary's name". This lets callers build qualified SQL + /// uniformly: `format!("\"{}\".\"public\".\"{}\"", alias_or_primary, table)`. + /// + /// # Errors + /// + /// Returns [`ErrorCode::InvalidArgument`] when `Some("persistent")` + /// is passed but [`Self::persistent_path`] is `None` + /// (`--ephemeral-only` mode). + pub fn resolve_target_db(&self, requested: Option<&str>) -> Result { + match requested.map(str::trim) { + None | Some("") => Ok(self.primary_db_name()), + Some(Self::PERSISTENT_ALIAS) => { + if self.persistent_path.is_none() { + return Err(McpError::new( + ErrorCode::InvalidArgument, + "no persistent database in this session — \ + hyperdb-mcp was started with --ephemeral-only" + .to_string(), + )); + } + Ok(Self::PERSISTENT_ALIAS.to_string()) + } + Some(other) => Ok(other.to_string()), + } + } + /// Directory where `hyperd` writes its log files. The MCP binary should /// also drop its own client-side log here so debugging starts in one /// place. @@ -313,10 +518,19 @@ impl Engine { candidates.into_iter().next().map(|(_, p)| p) } - /// `true` if the workspace was created from a user-supplied path - /// (data survives across sessions). - pub fn is_persistent(&self) -> bool { - self.is_persistent + /// `true` if a persistent database is attached to this session. + /// Equivalent to [`Self::persistent_path`] being `Some`. + pub fn has_persistent(&self) -> bool { + self.persistent_path.is_some() + } + + /// `true` when this engine just created the persistent `.hyper` file + /// during construction. The server consumes this signal once to + /// decide whether to seed `_table_catalog`; subsequent reads stay + /// `true` (the flag isn't reset — it's a fact about the engine's + /// startup, not a one-shot signal). + pub fn persistent_was_just_created(&self) -> bool { + self.persistent_was_created } /// Direct access to the underlying connection for operations not @@ -829,7 +1043,15 @@ impl Engine { .map(|name| catalog.get_row_count(name.as_str()).unwrap_or(0)) .sum(); - let disk_bytes = std::fs::metadata(&self.workspace_path).map_or(0, |m| m.len()); + // Disk size of the ephemeral primary. The persistent file is + // reported separately when present. + let ephemeral_bytes = std::fs::metadata(&self.ephemeral_path).map_or(0, |m| m.len()); + let persistent_bytes = self + .persistent_path + .as_ref() + .and_then(|p| std::fs::metadata(p).ok()) + .map_or(0u64, |m| m.len()); + let disk_bytes = ephemeral_bytes.saturating_add(persistent_bytes); let hyperd_log = self.hyperd_log_path().map_or(Value::Null, |p| { Value::String(p.to_string_lossy().into_owned()) @@ -841,10 +1063,15 @@ impl Engine { Value::Null }; + let persistent_path_value = self.persistent_path.as_ref().map_or(Value::Null, |p| { + Value::String(p.to_string_lossy().into_owned()) + }); + Ok(json!({ "hyperd_running": self.is_running(), - "workspace_path": self.workspace_path.to_string_lossy(), - "workspace_mode": if self.is_persistent { "persistent" } else { "ephemeral" }, + "ephemeral_path": self.ephemeral_path.to_string_lossy(), + "persistent_path": persistent_path_value, + "has_persistent": self.has_persistent(), "table_count": table_count, "total_rows": total_rows, "disk_usage_bytes": disk_bytes, @@ -1005,14 +1232,16 @@ pub fn is_internal_table(name: &str) -> bool { /// tracing log. Shared by [`Engine::new`] and `main` so both land in the /// same place. /// -/// - Persistent mode: same directory as the workspace file (with `~` -/// expansion applied). This way a project workspace like +/// - When a persistent path is supplied: same directory as that file +/// (with `~` expansion applied). A project DB like /// `~/projects/foo.hyper` gets logs in `~/projects/`. -/// - Ephemeral mode: same temp directory the Engine creates for the -/// workspace (`$TMPDIR/hyperdb-mcp-/`). +/// - When no persistent path is supplied (ephemeral-only sessions): +/// `$TMPDIR/hyperdb-mcp-/`. Multiple engines in the same PID +/// share this log dir, which is fine — `tracing` is process-wide and +/// the `.hyper` files themselves live in distinct per-engine subdirs. #[must_use] -pub fn resolve_log_dir(workspace_path: Option<&str>) -> PathBuf { - match workspace_path { +pub fn resolve_log_dir(persistent_db_path: Option<&str>) -> PathBuf { + match persistent_db_path { Some(p) => { let expanded = PathBuf::from(shellexpand_tilde(p)); expanded @@ -1148,16 +1377,24 @@ fn strip_leading_sql_comments(sql: &str) -> &str { impl Drop for Engine { fn drop(&mut self) { - // In daemon mode with ephemeral databases, DETACH the workspace from hyperd - // (releases the file handle — critical on Windows) then delete the temp file. - if !self.is_persistent && self.daemon_endpoint.is_some() { + // The ephemeral primary is always cleaned up. In daemon mode the + // shared hyperd holds the file handle even after this engine is + // dropped, so we DETACH first (Windows enforces file locks; this + // is a no-op on Unix but keeps behavior identical across platforms). + // The persistent attachment is left in place — its lifetime + // outlives the engine. + if self.daemon_endpoint.is_some() { let db_name = self.primary_db_name(); let detach = format!("DETACH DATABASE \"{db_name}\""); let _ = self.connection.execute_command(&detach); - // Remove the temp directory containing the ephemeral .hyper file - if let Some(parent) = self.workspace_path.parent() { - let _ = std::fs::remove_dir_all(parent); - } + } + // Remove the per-pid temp directory holding the ephemeral file. + // Safe in both daemon and local modes: in local mode the + // HyperProcess Drop tears down hyperd before this fires (Drop + // runs in field-declaration order), so the file is no longer + // open by the time we delete it. + if let Some(parent) = self.ephemeral_path.parent() { + let _ = std::fs::remove_dir_all(parent); } } } diff --git a/hyperdb-mcp/src/lib.rs b/hyperdb-mcp/src/lib.rs index 2d187ca..a7179a9 100644 --- a/hyperdb-mcp/src/lib.rs +++ b/hyperdb-mcp/src/lib.rs @@ -46,6 +46,7 @@ pub mod ingest; pub mod ingest_arrow; pub mod inspect; pub mod lakehouse; +pub mod paths; pub mod readme; pub mod saved_queries; pub mod schema; diff --git a/hyperdb-mcp/src/paths.rs b/hyperdb-mcp/src/paths.rs new file mode 100644 index 0000000..b36b789 --- /dev/null +++ b/hyperdb-mcp/src/paths.rs @@ -0,0 +1,174 @@ +// Copyright (c) 2026, Salesforce, Inc. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Cross-platform resolution for the persistent-database default path. +//! +//! The persistent database lives in the platform-standard data directory: +//! +//! - **macOS:** `~/Library/Application Support/hyperdb/workspace.hyper` +//! - **Linux:** `$XDG_DATA_HOME/hyperdb/workspace.hyper` +//! (defaults to `~/.local/share/hyperdb/workspace.hyper`) +//! - **Windows:** `%APPDATA%\hyperdb\workspace.hyper` +//! +//! Note this is intentionally distinct from `~/.hyperdb/`, which is the +//! daemon's state directory (`daemon.json`, `logs/`). Daemon coordination +//! and user data have different lifecycles, so they live in different +//! places. +//! +//! Resolution precedence: +//! 1. Explicit CLI value (`--persistent-db ` or the deprecated +//! `--workspace `). +//! 2. `HYPERDB_PERSISTENT_DB` environment variable. +//! 3. Platform default via [`dirs::data_dir`]. + +use std::path::PathBuf; + +/// Application directory name used inside the platform data dir. +const APP_DIR_NAME: &str = "hyperdb"; + +/// Filename of the persistent workspace inside the app dir. +const PERSISTENT_DB_FILENAME: &str = "workspace.hyper"; + +/// Environment variable that overrides the platform-default path. +pub const ENV_PERSISTENT_DB: &str = "HYPERDB_PERSISTENT_DB"; + +/// Returns the platform-default path for the persistent database. Returns +/// `None` if the home / data directory cannot be determined (rare; usually +/// indicates a misconfigured environment). +#[must_use] +pub fn default_persistent_db_path() -> Option { + Some( + dirs::data_dir()? + .join(APP_DIR_NAME) + .join(PERSISTENT_DB_FILENAME), + ) +} + +/// Resolve where the persistent database should live, applying the +/// CLI > env-var > platform-default precedence. Returns `None` only when +/// no source supplied a path (the platform default failed *and* nothing +/// was set explicitly), which the caller should treat as an error. +/// +/// `cli_value` is the value of `--persistent-db` (or `--workspace` after +/// deprecation translation). When `Some`, takes precedence over both +/// the env var and the platform default. +#[must_use] +pub fn resolve_persistent_db_path(cli_value: Option<&str>) -> Option { + if let Some(p) = cli_value { + return Some(PathBuf::from(p)); + } + if let Some(p) = std::env::var_os(ENV_PERSISTENT_DB) { + return Some(PathBuf::from(p)); + } + default_persistent_db_path() +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Process-wide lock for env-var tests. `std::env::set_var` is + /// `unsafe` in newer toolchains because it's not thread-safe; we + /// serialize all env-touching tests to keep them sound. + static ENV_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + + fn with_env_lock(f: impl FnOnce() -> R) -> R { + let _guard = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + f() + } + + /// Sets an env var. Marked `unsafe` because [`std::env::set_var`] is + /// `unsafe` in newer toolchains; callers hold `ENV_LOCK`. + unsafe fn set_env(key: &str, value: &str) { + // SAFETY: serialized by ENV_LOCK; matches std::env contract. + unsafe { std::env::set_var(key, value) } + } + + /// Removes an env var. Marked `unsafe` for the same reason as + /// [`set_env`]; callers hold `ENV_LOCK`. + unsafe fn remove_env(key: &str) { + // SAFETY: serialized by ENV_LOCK; matches std::env contract. + unsafe { std::env::remove_var(key) } + } + + #[test] + fn default_persistent_db_path_returns_some_on_supported_platforms() { + // On macOS, Linux, and Windows the platform helpers always + // resolve to a usable path. CI runs on these three; if this + // fails on a new platform we want a loud signal. + let p = default_persistent_db_path().expect("platform data_dir resolves"); + assert!(p.ends_with("hyperdb/workspace.hyper") || p.ends_with("hyperdb\\workspace.hyper")); + } + + #[cfg(target_os = "macos")] + #[test] + fn default_persistent_db_path_uses_app_support_on_macos() { + let p = default_persistent_db_path().unwrap(); + let s = p.to_string_lossy(); + assert!( + s.contains("Library/Application Support/hyperdb/"), + "expected macOS Application Support path, got {s}" + ); + } + + #[cfg(target_os = "linux")] + #[test] + fn default_persistent_db_path_uses_xdg_share_on_linux() { + let p = default_persistent_db_path().unwrap(); + let s = p.to_string_lossy(); + assert!( + s.contains(".local/share/hyperdb/") || s.contains("share/hyperdb/"), + "expected XDG share path, got {s}" + ); + } + + #[cfg(windows)] + #[test] + fn default_persistent_db_path_uses_appdata_on_windows() { + let p = default_persistent_db_path().unwrap(); + let s = p.to_string_lossy(); + assert!( + s.contains("hyperdb"), + "expected APPDATA path containing hyperdb, got {s}" + ); + } + + #[test] + fn resolve_persistent_db_path_cli_takes_precedence() { + with_env_lock(|| { + // SAFETY: serialized by ENV_LOCK. + unsafe { set_env(ENV_PERSISTENT_DB, "/from/env.hyper") }; + let p = resolve_persistent_db_path(Some("/from/cli.hyper")) + .expect("CLI path always resolves"); + assert_eq!(p, PathBuf::from("/from/cli.hyper")); + // SAFETY: serialized by ENV_LOCK. + unsafe { remove_env(ENV_PERSISTENT_DB) }; + }); + } + + #[test] + fn resolve_persistent_db_path_env_used_when_no_cli() { + with_env_lock(|| { + // SAFETY: serialized by ENV_LOCK. + unsafe { set_env(ENV_PERSISTENT_DB, "/from/env.hyper") }; + let p = resolve_persistent_db_path(None).expect("env path resolves"); + assert_eq!(p, PathBuf::from("/from/env.hyper")); + // SAFETY: serialized by ENV_LOCK. + unsafe { remove_env(ENV_PERSISTENT_DB) }; + }); + } + + #[test] + fn resolve_persistent_db_path_falls_back_to_default() { + with_env_lock(|| { + // SAFETY: serialized by ENV_LOCK. + unsafe { remove_env(ENV_PERSISTENT_DB) }; + let p = resolve_persistent_db_path(None).expect("default resolves"); + // Just check it's under hyperdb/ — exact location varies by + // platform and is covered by the default-path tests above. + assert!(p.to_string_lossy().contains("hyperdb")); + }); + } +} diff --git a/hyperdb-mcp/src/server.rs b/hyperdb-mcp/src/server.rs index 3e24d0b..8029f5b 100644 --- a/hyperdb-mcp/src/server.rs +++ b/hyperdb-mcp/src/server.rs @@ -958,7 +958,8 @@ impl HyperMcpServer { Engine::new(self.workspace_path.clone())? }; tracing::info!( - workspace_path = %engine.workspace_path().display(), + ephemeral_path = %engine.ephemeral_path().display(), + persistent_path = ?engine.persistent_path(), log_dir = %engine.log_dir().display(), "engine ready" ); @@ -1574,7 +1575,10 @@ impl HyperMcpServer { // connection. let (endpoint, workspace) = match self.with_engine(|engine| { let endpoint = engine.hyperd_endpoint()?; - let workspace = engine.workspace_path().to_string_lossy().to_string(); + // The pool connects to the engine's primary database (ephemeral). + // Tools that target the persistent attachment use qualified SQL + // through the same connection. + let workspace = engine.ephemeral_path().to_string_lossy().to_string(); Ok((endpoint, workspace)) }) { Ok(v) => v, diff --git a/hyperdb-mcp/src/watcher.rs b/hyperdb-mcp/src/watcher.rs index 90c8044..226f4fc 100644 --- a/hyperdb-mcp/src/watcher.rs +++ b/hyperdb-mcp/src/watcher.rs @@ -336,7 +336,10 @@ pub fn start_watching( ) })?; let endpoint = eng.hyperd_endpoint()?; - let workspace = eng.workspace_path().to_string_lossy().to_string(); + // Watcher pool connects to the engine's primary (ephemeral). The + // user-supplied table on `watch_directory` always lives there + // unless a future flag routes it to the persistent attachment. + let workspace = eng.ephemeral_path().to_string_lossy().to_string(); let cfg = PoolConfig::new(endpoint, workspace) .create_mode(CreateMode::DoNotCreate) .max_size(concurrency); diff --git a/hyperdb-mcp/tests/attach_tests.rs b/hyperdb-mcp/tests/attach_tests.rs index f4f057c..e1161e3 100644 --- a/hyperdb-mcp/tests/attach_tests.rs +++ b/hyperdb-mcp/tests/attach_tests.rs @@ -40,14 +40,20 @@ fn primary_workspace() -> (Engine, TempDir) { fn build_source_hyper_file(dir: &TempDir, name: &str, rows: &[(i32, &str)]) -> std::path::PathBuf { let path = dir.path().join(name); { + // Spin up a throwaway engine with this path as the persistent + // attachment, write data into it via fully-qualified SQL, then + // drop the engine. The `.hyper` file at `path` survives with the + // populated table; the engine's ephemeral primary is cleaned up. let engine = Engine::new_no_daemon(Some(path.to_string_lossy().into())).unwrap(); engine - .execute_command("CREATE TABLE t (a INT, b TEXT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".\"t\" (a INT, b TEXT)") .unwrap(); for (a, b) in rows { let escaped = b.replace('\'', "''"); engine - .execute_command(&format!("INSERT INTO t VALUES ({a}, '{escaped}')")) + .execute_command(&format!( + "INSERT INTO \"persistent\".\"public\".\"t\" VALUES ({a}, '{escaped}')" + )) .unwrap(); } } @@ -529,12 +535,7 @@ fn copy_create_from_attached_source() { // an unqualified `CREATE TABLE imported` would fail with // "create statement could not resolve the schema (3F000)". These // direct SQL statements mirror what `perform_copy` emits. - let primary_db = engine - .workspace_path() - .file_stem() - .and_then(|s| s.to_str()) - .unwrap() - .to_string(); + let primary_db = engine.primary_db_name(); let target = format!("\"{primary_db}\".\"public\".\"imported\""); // "create" mode — target does not exist yet. @@ -634,12 +635,7 @@ fn copy_create_stubs_table_catalog_on_primary_workspace() { // Seed a source table that lives inside the primary workspace — // no attachment needed for this regression. Matches the shape a // `copy_query` call would produce. - let primary_alias = engine - .workspace_path() - .file_stem() - .and_then(|s| s.to_str()) - .unwrap() - .to_string(); + let primary_alias = engine.primary_db_name(); let qualified_target = format!("\"{primary_alias}\".\"public\".\"derived\""); engine .execute_command(&format!( @@ -802,7 +798,10 @@ fn detach_resets_schema_search_path_and_preserves_primary_access() { assert!(registry.detach(&engine, "src").unwrap()); assert!(registry.list().is_empty()); - // Post-detach: the RESET landed, `"$single"` is back. + // Post-detach: with the default persistent attachment in place, + // search_path stays pinned to the primary's name (we cannot RESET to + // `"$single"` because the persistent DB is still attached). Without + // the pin, unqualified resolution would break. let rows = engine .execute_query_to_json("SHOW schema_search_path") .unwrap(); @@ -811,8 +810,9 @@ fn detach_resets_schema_search_path_and_preserves_primary_access() { .and_then(|v| v.as_str()) .unwrap_or(""); assert_eq!( - setting, "\"$single\"", - "last detach should restore Hyper's default; got {setting:?}" + setting, + engine.primary_db_name(), + "last detach should pin to primary while persistent stays attached; got {setting:?}" ); // Unqualified SELECT still works — the real user contract. @@ -871,12 +871,15 @@ fn replay_restores_schema_search_path_pin() { let dir = TempDir::new().unwrap(); let primary_path = dir.path().join("primary.hyper"); { + // Seed the persistent file directly via fully-qualified SQL. let engine = Engine::new_no_daemon(Some(primary_path.to_string_lossy().into())).unwrap(); engine - .execute_command("CREATE TABLE primary_t (x INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".\"primary_t\" (x INT)") .unwrap(); engine - .execute_command("INSERT INTO primary_t VALUES (1), (2), (3)") + .execute_command( + "INSERT INTO \"persistent\".\"public\".\"primary_t\" VALUES (1), (2), (3)", + ) .unwrap(); } let source = build_source_hyper_file(&dir, "source.hyper", &[(1, "a")]); @@ -902,8 +905,11 @@ fn replay_restores_schema_search_path_pin() { let engine_b = Engine::new_no_daemon(Some(primary_path.to_string_lossy().into())).unwrap(); registry.replay_all(&engine_b).unwrap(); - // Unqualified access to the primary must work after replay. - assert_eq!(row_count(&engine_b, "primary_t"), 3); + // Qualified access to the seeded persistent table. + assert_eq!( + row_count(&engine_b, "\"persistent\".\"public\".\"primary_t\""), + 3 + ); // Qualified access to the replayed attachment must also work. assert_eq!(row_count(&engine_b, "\"src\".public.t"), 1); } diff --git a/hyperdb-mcp/tests/daemon_tests.rs b/hyperdb-mcp/tests/daemon_tests.rs index 2194839..c1d58c1 100644 --- a/hyperdb-mcp/tests/daemon_tests.rs +++ b/hyperdb-mcp/tests/daemon_tests.rs @@ -665,14 +665,18 @@ fn engine_recovers_after_hyperd_killed() { let path = tmp.path().join("recover.hyper"); let path_str = path.to_str().unwrap().to_string(); - // Engine #1: pre-kill + // Engine #1: pre-kill. Write data into the persistent attachment via + // fully-qualified SQL so it survives the engine drop and the + // hyperd restart (the engine's ephemeral primary would not). { let engine = hyperdb_mcp::engine::Engine::new(Some(path_str.clone())).unwrap(); engine - .execute_command("CREATE TABLE keepers (n INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".\"keepers\" (n INT)") .unwrap(); engine - .execute_command("INSERT INTO keepers VALUES (1), (2), (3)") + .execute_command( + "INSERT INTO \"persistent\".\"public\".\"keepers\" VALUES (1), (2), (3)", + ) .unwrap(); } @@ -688,12 +692,10 @@ fn engine_recovers_after_hyperd_killed() { // Engine #2: post-restart. This mirrors what `with_engine` does after a // ConnectionLost — drop the old engine (already done above) and create a // fresh one. The fresh engine re-discovers the daemon and connects to the - // new endpoint. + // new endpoint, then re-attaches the persistent file. let engine = hyperdb_mcp::engine::Engine::new(Some(path_str)).unwrap(); - // The persistent .hyper file is intact on disk. Re-attaching via a new - // engine should let us see the data we wrote pre-kill. let rows = engine - .execute_query_to_json("SELECT n FROM keepers ORDER BY n") + .execute_query_to_json("SELECT n FROM \"persistent\".\"public\".\"keepers\" ORDER BY n") .unwrap(); assert_eq!(rows.len(), 3, "data persisted across hyperd restart"); } @@ -704,9 +706,9 @@ fn daemon_mode_ephemeral_database_cleaned_up_on_drop() { let _daemon = TestDaemon::start(); let engine = hyperdb_mcp::engine::Engine::new(None).unwrap(); - let workspace_path = engine.workspace_path().to_path_buf(); + let ephemeral_path = engine.ephemeral_path().to_path_buf(); - assert!(workspace_path.exists()); + assert!(ephemeral_path.exists()); engine .execute_command("CREATE TABLE ephemeral_test (id INT)") @@ -715,7 +717,7 @@ fn daemon_mode_ephemeral_database_cleaned_up_on_drop() { drop(engine); assert!( - !workspace_path.exists(), + !ephemeral_path.exists(), "ephemeral .hyper file should be deleted after engine drop" ); } diff --git a/hyperdb-mcp/tests/ingest_arrow_tests.rs b/hyperdb-mcp/tests/ingest_arrow_tests.rs index e5029a7..dff5c29 100644 --- a/hyperdb-mcp/tests/ingest_arrow_tests.rs +++ b/hyperdb-mcp/tests/ingest_arrow_tests.rs @@ -521,7 +521,7 @@ async fn load_files_runs_parallel_parquet_ingests() { // Build a pool against the running hyperd — same recipe the MCP's // `load_files` tool uses. let endpoint = te.engine.hyperd_endpoint().unwrap(); - let workspace = te.engine.workspace_path().to_string_lossy().to_string(); + let workspace = te.engine.ephemeral_path().to_string_lossy().to_string(); let pool = Arc::new( create_pool( PoolConfig::new(endpoint, workspace) diff --git a/hyperdb-mcp/tests/resource_tests.rs b/hyperdb-mcp/tests/resource_tests.rs index 666a1f4..d6221b9 100644 --- a/hyperdb-mcp/tests/resource_tests.rs +++ b/hyperdb-mcp/tests/resource_tests.rs @@ -7,8 +7,9 @@ use hyperdb_mcp::server::HyperMcpServer; use tempfile::TempDir; -/// Build a server with a fresh temp workspace, populate it with a test table, -/// and return both the server and the temp dir (held for lifetime). +/// Build a server with a fresh temp workspace, populate the engine's +/// ephemeral primary with a test table, and return both the server and +/// the temp dir. /// /// The server is constructed in `--bare` mode so the MCP-managed /// `_table_catalog` doesn't appear alongside `widgets` and perturb the @@ -17,14 +18,24 @@ use tempfile::TempDir; /// /// Uses `no_daemon` mode to avoid interference from any daemon running /// in parallel (e.g. from daemon_tests in the same `cargo test` run). +/// +/// To seed the table inside the server's own engine (so the data is +/// visible to resource handlers), we read `hyper://workspace` first to +/// trigger lazy engine init, then reach into the engine handle to run +/// the seeding DDL/DML directly. fn server_with_test_table() -> (HyperMcpServer, TempDir) { let dir = TempDir::new().unwrap(); let path = dir.path().join("workspace.hyper"); let server = HyperMcpServer::with_no_daemon(Some(path.to_str().unwrap().into()), false, true, true); + // Trigger lazy engine init. + let _ = server.resource_body_for_uri("hyper://workspace"); + // Seed the test table in the server's ephemeral primary so the + // resource handlers see it through `describe_tables` etc. { - use hyperdb_mcp::engine::Engine; - let engine = Engine::new_no_daemon(Some(path.to_str().unwrap().into())).unwrap(); + let handle = server.engine_handle(); + let guard = handle.lock().expect("engine mutex"); + let engine = guard.as_ref().expect("engine initialized"); engine .execute_command("CREATE TABLE widgets (id INT NOT NULL, name TEXT)") .unwrap(); @@ -64,7 +75,13 @@ fn read_workspace_resource_returns_status() { assert_eq!(body.mime_type(), "application/json"); let json = body.as_json().expect("workspace is JSON"); assert_eq!(json["hyperd_running"], true); - assert!(json["workspace_path"].is_string()); + assert!(json["ephemeral_path"].is_string()); + // `persistent_path` is either a string (when persistent attached) or + // null (--ephemeral-only). The test fixture supplies a path so we + // expect the string form; the orthogonal `has_persistent` flag mirrors + // the same fact and is checked first. + assert_eq!(json["has_persistent"], true); + assert!(json["persistent_path"].is_string()); assert_eq!(json["table_count"], 1); let version = json["hyper_rust_api_version"] .as_str() From 5a77a272dde6bfcd3d9882e89529dd6cd458d7d5 Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 03:34:43 -0700 Subject: [PATCH 2/9] feat(mcp): CLI surface for ephemeral-primary model + remove --bare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CLI changes: - New `--persistent-db `: replaces `--workspace`. Defaults to the platform data dir (or HYPERDB_PERSISTENT_DB env var). The deprecated `--workspace` is hidden but still accepted with a stderr warning; passing both is an error. - New `--ephemeral-only`: skip persistent attachment entirely. Saved queries fall back to in-memory storage. - Removed `--bare`. Catalog creation is now uniform (always seed when MCP creates a fresh .hyper file; never touch an existing file's catalog), so the opt-out flag became redundant. Users wanting a pristine .hyper for export can DROP TABLE _table_catalog after creation; subsequent opens won't recreate it. HyperMcpServer: - new() and with_no_daemon() drop the `bare` parameter. - is_bare() removed. - Saved-query store selection becomes "persistent if available, session otherwise" — same logic, but driven by --ephemeral-only instead of --bare. - AttachRegistry no longer takes a catalog policy; `seed_catalog_on_create` becomes the constant default. `_table_catalog` is now treated as an internal table by `is_internal_table` so it doesn't appear in user-visible `describe_tables` output or `total_rows`. The catalog's own `table_present`/`user_tables` helpers go through the raw Catalog directly to bypass that filter. attach::reset_search_path keeps the primary-name pin when a default persistent attachment is in place — RESET to "$single" would break unqualified resolution while persistent stays attached. Tests updated: - All `HyperMcpServer::new(path, ro, bare)` callsites lose their bare arg. - Two `--bare`-specific tests deleted (bare_server_does_not_create_catalog, is_bare_reflects_constructor_argument). - One detach-search-path test updated to expect the primary-name pin instead of "$single". - Test helpers writing to "the workspace" updated to write into the persistent attachment via fully-qualified SQL. - `table_exists` test helper goes through Catalog directly so it can see internal tables (which the new is_internal_table now filters out). Five tests still failing in saved_queries and table_catalog: - saved_queries: WorkspaceStore still targets primary; iteration 7. - table_catalog: ensure_exists/reconcile/upsert_stub still target primary; iteration 5 will route them to the persistent attachment. --- hyperdb-mcp/src/engine.rs | 19 +++-- hyperdb-mcp/src/main.rs | 74 +++++++++++++++--- hyperdb-mcp/src/server.rs | 99 +++++++++--------------- hyperdb-mcp/src/table_catalog.rs | 35 ++++----- hyperdb-mcp/tests/read_only_tests.rs | 4 +- hyperdb-mcp/tests/resource_tests.rs | 10 +-- hyperdb-mcp/tests/saved_queries_tests.rs | 4 +- hyperdb-mcp/tests/table_catalog_tests.rs | 54 +++---------- hyperdb-mcp/tests/watcher_tests.rs | 2 +- 9 files changed, 150 insertions(+), 151 deletions(-) diff --git a/hyperdb-mcp/src/engine.rs b/hyperdb-mcp/src/engine.rs index c0488a1..fb3a17c 100644 --- a/hyperdb-mcp/src/engine.rs +++ b/hyperdb-mcp/src/engine.rs @@ -1219,13 +1219,22 @@ pub const CLIENT_LOG_FILE_NAME: &str = "hyperdb-mcp.log"; /// automatically — no per-table filter list to keep in sync. pub const HYPERDB_INTERNAL_PREFIX: &str = "_hyperdb_"; -/// Returns true when `name` is one of `HyperDB`'s own internal tables -/// (matches [`HYPERDB_INTERNAL_PREFIX`]). Factored into a helper so -/// every filter site calls the same predicate and a future move to a -/// more nuanced scheme (e.g. per-table allowlist) is a single edit. +/// Names of MCP-managed tables that don't follow the `_hyperdb_` prefix +/// but are also infrastructure rather than user data. Currently just +/// `_table_catalog`. Added because `--bare` (the historical opt-out for +/// catalog creation) was removed, so the catalog table now reliably +/// appears alongside user tables; filtering it from the user-visible +/// list and counts keeps the LLM's mental model clean. +const HYPERDB_NAMED_INTERNAL_TABLES: &[&str] = &[crate::table_catalog::TABLE_CATALOG_TABLE]; + +/// Returns true when `name` is one of `HyperDB`'s own internal tables — +/// either matching [`HYPERDB_INTERNAL_PREFIX`] (saved-queries, watcher +/// state, etc.) or appearing in [`HYPERDB_NAMED_INTERNAL_TABLES`] (the +/// `_table_catalog`, which predates the prefix convention). Factored +/// into a helper so every filter site calls the same predicate. #[must_use] pub fn is_internal_table(name: &str) -> bool { - name.starts_with(HYPERDB_INTERNAL_PREFIX) + name.starts_with(HYPERDB_INTERNAL_PREFIX) || HYPERDB_NAMED_INTERNAL_TABLES.contains(&name) } /// Compute the log directory for both `hyperd` output and the client-side diff --git a/hyperdb-mcp/src/main.rs b/hyperdb-mcp/src/main.rs index d83917f..a617117 100644 --- a/hyperdb-mcp/src/main.rs +++ b/hyperdb-mcp/src/main.rs @@ -26,6 +26,7 @@ use hyperdb_mcp::daemon::discovery; use hyperdb_mcp::daemon::health; use hyperdb_mcp::daemon::run::DaemonConfig; use hyperdb_mcp::engine::{resolve_log_dir, CLIENT_LOG_FILE_NAME}; +use hyperdb_mcp::paths; use hyperdb_mcp::server::HyperMcpServer; use rmcp::ServiceExt; use tracing_subscriber::{fmt, prelude::*, EnvFilter}; @@ -45,25 +46,63 @@ struct Cli { #[command(subcommand)] command: Option, - /// Path to the `.hyper` workspace file for persistent mode (omit for ephemeral mode) + /// Path to the persistent `.hyper` file. Defaults to the platform + /// data dir (e.g. `~/Library/Application Support/hyperdb/workspace.hyper` + /// on macOS) or the `HYPERDB_PERSISTENT_DB` env var if set. #[arg(long, global = true)] + persistent_db: Option, + + /// DEPRECATED alias for `--persistent-db`. Will be removed in a + /// future release. + #[arg(long, global = true, hide = true)] workspace: Option, - /// Run in read-only mode: disables execute, `load_data`, `load_file`, and export to hyper format + /// Skip opening any persistent database. The session has only the + /// ephemeral primary plus any user-attached databases. Disables + /// `save_query` persistence (queries fall back to session storage). #[arg(long, global = true)] - read_only: bool, + ephemeral_only: bool, - /// Bare mode: disable MCP-managed auxiliary tables. Skips creating - /// `_table_catalog` and forces saved queries into in-memory - /// (non-persistent) storage, even with --workspace. + /// Run in read-only mode: disables execute, `load_data`, `load_file`, and export to hyper format #[arg(long, global = true)] - bare: bool, + read_only: bool, /// Disable the shared daemon and spawn a private `hyperd` (legacy behavior) #[arg(long, global = true)] no_daemon: bool, } +impl Cli { + /// Translate the deprecated `--workspace` flag to `--persistent-db`, + /// emitting a one-time deprecation warning, and resolve the final + /// persistent path according to the precedence rules in + /// [`paths::resolve_persistent_db_path`]. Returns `None` only when + /// `--ephemeral-only` is set. + /// + /// Errors out if both `--persistent-db` and `--workspace` are + /// supplied — there's no sensible "winner", so be loud about it. + fn resolve_persistent_path(&self) -> Result, &'static str> { + if self.ephemeral_only { + if self.persistent_db.is_some() || self.workspace.is_some() { + return Err("--ephemeral-only is incompatible with --persistent-db / --workspace"); + } + return Ok(None); + } + if self.persistent_db.is_some() && self.workspace.is_some() { + return Err("Both --persistent-db and --workspace were supplied. \ + --workspace is a deprecated alias; pass only --persistent-db."); + } + if self.workspace.is_some() { + eprintln!( + "warning: --workspace is deprecated; use --persistent-db instead. \ + The old flag will be removed in a future release." + ); + } + let cli_value = self.persistent_db.as_deref().or(self.workspace.as_deref()); + Ok(paths::resolve_persistent_db_path(cli_value)) + } +} + #[derive(Subcommand)] enum Commands { /// Run as a background daemon managing a shared hyperd process @@ -143,7 +182,19 @@ async fn run_daemon_mode( } async fn run_mcp_mode(cli: Cli) -> Result<(), Box> { - let log_dir = resolve_log_dir(cli.workspace.as_deref()); + let persistent_path = match cli.resolve_persistent_path() { + Ok(p) => p, + Err(msg) => { + eprintln!("error: {msg}"); + std::process::exit(2); + } + }; + // Pass the resolved path to log-dir resolution: ephemeral-only + // sessions land in the per-pid temp dir. + let persistent_str = persistent_path + .as_ref() + .map(|p| p.to_string_lossy().into_owned()); + let log_dir = resolve_log_dir(persistent_str.as_deref()); if let Err(e) = std::fs::create_dir_all(&log_dir) { eprintln!( "warning: failed to create log directory {}: {e} — client logs will go to stderr only", @@ -165,15 +216,14 @@ async fn run_mcp_mode(cli: Cli) -> Result<(), Box> { tracing::info!( log_dir = %log_dir.display(), - workspace = cli.workspace.as_deref().unwrap_or(""), + persistent_db = persistent_str.as_deref().unwrap_or(""), read_only = cli.read_only, - bare = cli.bare, + ephemeral_only = cli.ephemeral_only, no_daemon = cli.no_daemon, "hyperdb-mcp starting" ); - let server = - HyperMcpServer::with_no_daemon(cli.workspace, cli.read_only, cli.bare, cli.no_daemon); + let server = HyperMcpServer::with_no_daemon(persistent_str, cli.read_only, cli.no_daemon); let service = server.serve(rmcp::transport::io::stdio()).await?; service.waiting().await?; diff --git a/hyperdb-mcp/src/server.rs b/hyperdb-mcp/src/server.rs index 8029f5b..4cfce52 100644 --- a/hyperdb-mcp/src/server.rs +++ b/hyperdb-mcp/src/server.rs @@ -748,13 +748,11 @@ pub struct HyperMcpServer { /// calls [`AttachRegistry::replay_all`] after building a fresh /// engine. attachments: Arc, + /// Path to the persistent `.hyper` file, or `None` for `--ephemeral-only`. + /// Threaded into `Engine::new` so the engine can attach it under the + /// reserved `"persistent"` alias. workspace_path: Option, read_only: bool, - /// Bare mode: skip all MCP-managed auxiliary tables (catalog *and* - /// saved-queries persistence). Saved queries fall back to - /// [`crate::saved_queries::SessionStore`] and the catalog is never - /// created or updated. - bare: bool, /// Skip the shared daemon and spawn a private `hyperd` (legacy behavior). no_daemon: bool, /// Last time a heartbeat was sent to the daemon (debounced to avoid per-call TCP overhead). @@ -772,9 +770,9 @@ pub struct HyperMcpServer { impl std::fmt::Debug for HyperMcpServer { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("HyperMcpServer") - .field("workspace_path", &self.workspace_path) + .field("persistent_path", &self.workspace_path) .field("read_only", &self.read_only) - .field("bare", &self.bare) + .field("no_daemon", &self.no_daemon) .finish_non_exhaustive() } } @@ -797,50 +795,38 @@ impl HyperMcpServer { /// When `bare` is `true`, the server does not create or maintain the /// `_table_catalog` table, and saved queries fall back to the in-memory /// [`crate::saved_queries::SessionStore`] regardless of `workspace_path` - /// — so no `_hyperdb_*` or `_table_catalog` tables are written to the - /// workspace file. Useful when callers want a pristine `.hyper` file - /// containing only their own data. - pub fn new(workspace_path: Option, read_only: bool, bare: bool) -> Self { - Self::with_options(workspace_path, read_only, bare, false) + /// `persistent_path` is the resolved path to the persistent database + /// (`Some`) or `None` for `--ephemeral-only` mode. + pub fn new(persistent_path: Option, read_only: bool) -> Self { + Self::with_options(persistent_path, read_only, false) } /// Create a server instance with explicit daemon control. pub fn with_no_daemon( - workspace_path: Option, + persistent_path: Option, read_only: bool, - bare: bool, no_daemon: bool, ) -> Self { - Self::with_options(workspace_path, read_only, bare, no_daemon) + Self::with_options(persistent_path, read_only, no_daemon) } - fn with_options( - workspace_path: Option, - read_only: bool, - bare: bool, - no_daemon: bool, - ) -> Self { - // Bare mode forces a SessionStore regardless of workspace so the - // `_hyperdb_saved_queries` meta-table is never created. - let saved_queries: Arc = if bare { - build_store(None) - } else { - build_store(workspace_path.as_deref()) - }; + fn with_options(persistent_path: Option, read_only: bool, no_daemon: bool) -> Self { + // Saved queries persist when a persistent database is available; + // session storage takes over for `--ephemeral-only` sessions. + let saved_queries: Arc = build_store(persistent_path.as_deref()); Self { engine: Arc::new(Mutex::new(None)), catalog_ready: Arc::new(Mutex::new(false)), watchers: Arc::new(crate::watcher::WatcherRegistry::new()), saved_queries, subscriptions: Arc::new(SubscriptionRegistry::new()), - // Bare servers never seed `_table_catalog` into any - // attached database — pristine workspaces stay pristine - // even when used as scratch targets for `attach_database` - // with `on_missing: create`. - attachments: Arc::new(AttachRegistry::with_catalog_policy(!bare)), - workspace_path, + // The catalog policy is now uniform: seed `_table_catalog` + // whenever MCP creates a fresh `.hyper` file. The opt-out + // `--bare` path was removed; users wanting a pristine file + // can `DROP TABLE _table_catalog` after creation. + attachments: Arc::new(AttachRegistry::new()), + workspace_path: persistent_path, read_only, - bare, no_daemon, last_heartbeat: std::sync::Mutex::new(std::time::Instant::now()), tool_router: Self::tool_router(), @@ -848,14 +834,6 @@ impl HyperMcpServer { } } - /// `true` when `--bare` was passed. Exposed so handlers (e.g. - /// `set_table_metadata`) can short-circuit with a clear error rather - /// than silently no-op. - #[must_use] - pub fn is_bare(&self) -> bool { - self.bare - } - /// Return a clone of the subscription registry so background tasks /// (notably the directory watcher) can fire resource updates after /// their own ingest completes. @@ -948,8 +926,8 @@ impl HyperMcpServer { .map_err(|_| McpError::new(ErrorCode::InternalError, "Lock poisoned"))?; if guard.is_none() { tracing::info!( - workspace = self.workspace_path.as_deref().unwrap_or(""), - bare = self.bare, + persistent_db = self.workspace_path.as_deref().unwrap_or(""), + no_daemon = self.no_daemon, "initializing hyper engine" ); let engine = if self.no_daemon { @@ -994,7 +972,7 @@ impl HyperMcpServer { /// legitimate query. The `catalog_ready` flag still flips to `true` /// so we don't retry the same failing bootstrap on every call. fn ensure_catalog_ready(&self, engine: &Engine) { - if self.bare || self.read_only { + if self.read_only { return; } let Ok(mut ready) = self.catalog_ready.lock() else { @@ -1014,7 +992,14 @@ impl HyperMcpServer { /// Best-effort catalog upsert after a successful ingest. Logs and /// swallows errors — a bookkeeping failure should never fail an - /// otherwise-successful load. No-op in bare mode. + /// otherwise-successful load. + /// + /// `&self` is kept for consistency with sibling helpers and because + /// upcoming work (per-DB catalog presence cache) will need it. + #[expect( + clippy::unused_self, + reason = "kept for forthcoming per-DB catalog presence cache" + )] fn after_ingest_catalog_update( &self, engine: &Engine, @@ -1023,9 +1008,6 @@ impl HyperMcpServer { load_params: Option<&str>, row_count: Option, ) { - if self.bare { - return; - } if let Err(e) = crate::table_catalog::upsert_stub( engine, table_name, @@ -1044,10 +1026,11 @@ impl HyperMcpServer { /// Best-effort catalog reconcile after a DDL/DML `execute`. Same /// error-swallowing rationale as [`Self::after_ingest_catalog_update`]. + #[expect( + clippy::unused_self, + reason = "kept for forthcoming per-DB catalog presence cache" + )] fn after_execute_catalog_update(&self, engine: &Engine) { - if self.bare { - return; - } if let Err(e) = crate::table_catalog::reconcile(engine) { tracing::warn!( err = %e.message, @@ -2468,7 +2451,7 @@ impl HyperMcpServer { /// Update prose metadata for a table in the `_table_catalog`. #[tool( - description = "Update prose metadata for a table in the `_table_catalog`: source_url, source_description, purpose, license, notes. Fields you omit stay unchanged; pass an explicit empty string (\"\") to clear a field. Mechanical fields (load_tool, load_params, loaded_at, last_refreshed_at, row_count) are managed by the server. Requires an existing catalog entry — load the table first (load_file / load_data / execute CREATE TABLE) so the stub row is created automatically. Disabled in read-only and --bare mode." + description = "Update prose metadata for a table in the `_table_catalog`: source_url, source_description, purpose, license, notes. Fields you omit stay unchanged; pass an explicit empty string (\"\") to clear a field. Mechanical fields (load_tool, load_params, loaded_at, last_refreshed_at, row_count) are managed by the server. Requires an existing catalog entry — load the table first (load_file / load_data / execute CREATE TABLE) so the stub row is created automatically. Disabled in read-only mode and when the catalog table doesn't exist on the target database." )] fn set_table_metadata( &self, @@ -2477,14 +2460,6 @@ impl HyperMcpServer { if let Err(e) = self.check_writable("set_table_metadata") { return Self::err_content(e); } - if self.bare { - return Self::err_content(McpError::new( - ErrorCode::ReadOnlyViolation, - "set_table_metadata is disabled in --bare mode because the \ - _table_catalog is never created. Restart without --bare if \ - you want per-table provenance tracking.", - )); - } let fields = crate::table_catalog::MetadataFields { source_url: params.source_url, source_description: params.source_description, diff --git a/hyperdb-mcp/src/table_catalog.rs b/hyperdb-mcp/src/table_catalog.rs index 5e5306f..bb074da 100644 --- a/hyperdb-mcp/src/table_catalog.rs +++ b/hyperdb-mcp/src/table_catalog.rs @@ -460,31 +460,28 @@ pub fn reconcile(engine: &Engine) -> Result<(), McpError> { // --- Internals -------------------------------------------------------------- /// List user-facing tables (excludes `_hyperdb_*` internals and the -/// catalog itself). +/// catalog itself). Reads the raw table list from `Catalog` directly +/// rather than going through `describe_tables`, which already filters +/// out internal tables (including `_table_catalog` itself) and would +/// hide the rows we want to compare against. fn user_tables(engine: &Engine) -> Result, McpError> { - let describe = engine.describe_tables()?; - let mut names = Vec::new(); - for table in describe { - if let Some(name) = table.get("name").and_then(|v| v.as_str()) { - if name == TABLE_CATALOG_TABLE || is_internal_table(name) { - continue; - } - names.push(name.to_string()); - } - } - Ok(names) + let catalog = hyperdb_api::Catalog::new(engine.connection()); + let names = catalog.get_table_names("public").map_err(McpError::from)?; + Ok(names + .into_iter() + .filter(|name| name != TABLE_CATALOG_TABLE && !is_internal_table(name)) + .collect()) } /// `true` when `_table_catalog` is already present in the workspace. /// Used by read paths to return an empty result instead of erroring on a -/// brand-new workspace where the table hasn't been created yet. +/// brand-new workspace where the table hasn't been created yet. Reads +/// the raw catalog directly so the result isn't affected by the +/// internal-table filter applied to user-facing `describe_tables`. fn table_present(engine: &Engine) -> Result { - let describe = engine.describe_tables()?; - Ok(describe.iter().any(|t| { - t.get("name") - .and_then(|v| v.as_str()) - .is_some_and(|n| n == TABLE_CATALOG_TABLE) - })) + let catalog = hyperdb_api::Catalog::new(engine.connection()); + let names = catalog.get_table_names("public").map_err(McpError::from)?; + Ok(names.iter().any(|n| n == TABLE_CATALOG_TABLE)) } /// Return `COUNT(*)` for a user table. Quoted to handle mixed-case or diff --git a/hyperdb-mcp/tests/read_only_tests.rs b/hyperdb-mcp/tests/read_only_tests.rs index 4efc5fd..0e11577 100644 --- a/hyperdb-mcp/tests/read_only_tests.rs +++ b/hyperdb-mcp/tests/read_only_tests.rs @@ -73,9 +73,9 @@ fn strips_leading_comments() { /// server does not. #[test] fn server_read_only_flag_is_respected() { - let ro = HyperMcpServer::with_no_daemon(None, true, false, true); + let ro = HyperMcpServer::with_no_daemon(None, true, true); assert!(ro.is_read_only()); - let rw = HyperMcpServer::with_no_daemon(None, false, false, true); + let rw = HyperMcpServer::with_no_daemon(None, false, true); assert!(!rw.is_read_only()); } diff --git a/hyperdb-mcp/tests/resource_tests.rs b/hyperdb-mcp/tests/resource_tests.rs index d6221b9..8eca170 100644 --- a/hyperdb-mcp/tests/resource_tests.rs +++ b/hyperdb-mcp/tests/resource_tests.rs @@ -11,7 +11,7 @@ use tempfile::TempDir; /// ephemeral primary with a test table, and return both the server and /// the temp dir. /// -/// The server is constructed in `--bare` mode so the MCP-managed +/// The server is constructed without a persistent path so the MCP-managed /// `_table_catalog` doesn't appear alongside `widgets` and perturb the /// exact table counts these tests assert against. Catalog behavior is /// covered in `tests/table_catalog_tests.rs`. @@ -26,8 +26,7 @@ use tempfile::TempDir; fn server_with_test_table() -> (HyperMcpServer, TempDir) { let dir = TempDir::new().unwrap(); let path = dir.path().join("workspace.hyper"); - let server = - HyperMcpServer::with_no_daemon(Some(path.to_str().unwrap().into()), false, true, true); + let server = HyperMcpServer::with_no_daemon(Some(path.to_str().unwrap().into()), false, true); // Trigger lazy engine init. let _ = server.resource_body_for_uri("hyper://workspace"); // Seed the test table in the server's ephemeral primary so the @@ -190,9 +189,8 @@ fn read_readme_resource_lists_tables_in_markdown() { fn read_readme_resource_handles_empty_workspace() { let dir = TempDir::new().unwrap(); let path = dir.path().join("empty.hyper"); - // `bare` so `_table_catalog` doesn't make the workspace non-empty. - let server = - HyperMcpServer::with_no_daemon(Some(path.to_str().unwrap().into()), false, true, true); + // No persistent path so `_table_catalog` doesn't make the workspace non-empty. + let server = HyperMcpServer::with_no_daemon(Some(path.to_str().unwrap().into()), false, true); let body = server .resource_body_for_uri("hyper://readme") .unwrap() diff --git a/hyperdb-mcp/tests/saved_queries_tests.rs b/hyperdb-mcp/tests/saved_queries_tests.rs index f5e304c..24e2850 100644 --- a/hyperdb-mcp/tests/saved_queries_tests.rs +++ b/hyperdb-mcp/tests/saved_queries_tests.rs @@ -202,7 +202,7 @@ fn workspace_persists_across_restarts() { /// resource list → resource read chain. #[test] fn server_ephemeral_session_store_exposes_query_resources() { - let server = HyperMcpServer::with_no_daemon(None, false, false, true); + let server = HyperMcpServer::with_no_daemon(None, false, true); // Reach into the store directly via its resource helper by saving a // query through the store (wiring of the tool itself is covered // indirectly — the public testable surface is the store + resource). @@ -250,7 +250,7 @@ fn server_workspace_store_exposes_saved_queries_via_resources() { // with_no_daemon ensures the server spawns its own hyperd rather than // connecting to a daemon left over from daemon_tests. - let server = HyperMcpServer::with_no_daemon(Some(path_str), false, false, true); + let server = HyperMcpServer::with_no_daemon(Some(path_str), false, true); // The URI catalog lists both the definition and result resources. let uris = server.list_resource_uris(); diff --git a/hyperdb-mcp/tests/table_catalog_tests.rs b/hyperdb-mcp/tests/table_catalog_tests.rs index ca8aea3..2271047 100644 --- a/hyperdb-mcp/tests/table_catalog_tests.rs +++ b/hyperdb-mcp/tests/table_catalog_tests.rs @@ -29,12 +29,16 @@ fn workspace_engine() -> (Engine, TempDir) { } /// `true` if the workspace has a public-schema table named `name`. +/// Goes through the underlying Catalog directly so it reflects raw +/// presence — `Engine::describe_tables` filters out internal tables +/// (including `_table_catalog`), and we want to assert on those too. fn table_exists(engine: &Engine, name: &str) -> bool { - engine - .describe_tables() - .unwrap() + let catalog = hyperdb_api::Catalog::new(engine.connection()); + catalog + .get_table_names("public") + .unwrap_or_default() .iter() - .any(|t| t.get("name").and_then(|v| v.as_str()) == Some(name)) + .any(|n| n == name) } // --- Catalog module --------------------------------------------------------- @@ -344,7 +348,8 @@ fn default_server_auto_creates_catalog_on_first_engine_use() { let path_str = path.to_str().unwrap().to_string(); { - let server = HyperMcpServer::with_no_daemon(Some(path_str.clone()), false, false, true); + let server = HyperMcpServer::with_no_daemon(Some(path_str.clone()), false, true); + // Any tool that takes the engine lazily triggers bootstrap. let _ = server.resource_body_for_uri("hyper://workspace").unwrap(); } @@ -355,31 +360,6 @@ fn default_server_auto_creates_catalog_on_first_engine_use() { ); } -/// `--bare`: the catalog is never created, even after the engine has run -/// tool calls. The workspace file stays free of MCP bookkeeping. -#[test] -fn bare_server_does_not_create_catalog() { - let dir = TempDir::new().unwrap(); - let path = dir.path().join("ws.hyper"); - let path_str = path.to_str().unwrap().to_string(); - - { - let server = HyperMcpServer::with_no_daemon(Some(path_str.clone()), false, true, true); - assert!(server.is_bare()); - let _ = server.resource_body_for_uri("hyper://workspace").unwrap(); - } - - let engine = Engine::new_no_daemon(Some(path_str)).unwrap(); - assert!( - !table_exists(&engine, TABLE_CATALOG_TABLE), - "_table_catalog must NOT be present when the server was started with --bare" - ); - assert!( - !table_exists(&engine, "_hyperdb_saved_queries"), - "saved-queries meta-table must NOT be present in bare mode either" - ); -} - /// Read-only mode must not attempt to create the catalog either — the /// first tool call on a pristine workspace shouldn't turn around and /// issue a `CREATE TABLE`. @@ -400,7 +380,7 @@ fn read_only_server_does_not_create_catalog() { } { - let server = HyperMcpServer::with_no_daemon(Some(path_str.clone()), true, false, true); + let server = HyperMcpServer::with_no_daemon(Some(path_str.clone()), true, true); let _ = server.resource_body_for_uri("hyper://workspace").unwrap(); } @@ -435,7 +415,7 @@ fn backfill_stubs_preexisting_tables_on_reopen() { } { - let server = HyperMcpServer::with_no_daemon(Some(path_str.clone()), false, false, true); + let server = HyperMcpServer::with_no_daemon(Some(path_str.clone()), false, true); let _ = server.resource_body_for_uri("hyper://workspace").unwrap(); } @@ -452,13 +432,3 @@ fn backfill_stubs_preexisting_tables_on_reopen() { "backfilled rows are tagged as unknown origin" ); } - -/// `is_bare()` reflects the constructor argument (trivial, but guards -/// against regression of the accessor wiring). -#[test] -fn is_bare_reflects_constructor_argument() { - let bare = HyperMcpServer::with_no_daemon(None, false, true, true); - let normal = HyperMcpServer::with_no_daemon(None, false, false, true); - assert!(bare.is_bare()); - assert!(!normal.is_bare()); -} diff --git a/hyperdb-mcp/tests/watcher_tests.rs b/hyperdb-mcp/tests/watcher_tests.rs index 5ecaa0e..3f881dd 100644 --- a/hyperdb-mcp/tests/watcher_tests.rs +++ b/hyperdb-mcp/tests/watcher_tests.rs @@ -252,7 +252,7 @@ fn unwatch_unknown_dir_errors() { /// we can verify the gate that it relies on. #[test] fn read_only_server_blocks_writes() { - let ro = hyperdb_mcp::server::HyperMcpServer::with_no_daemon(None, true, false, true); + let ro = hyperdb_mcp::server::HyperMcpServer::with_no_daemon(None, true, true); assert!(ro.is_read_only()); } From 0548d438a00f2896b7a76f757e910c3ee6581b10 Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 03:37:50 -0700 Subject: [PATCH 3/9] refactor(mcp): drop seed_catalog_on_create from AttachRegistry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that --bare is gone, AttachRegistry's seed_catalog_on_create flag is always true. Remove the field, the with_catalog_policy() constructor, and the conditional in attach() — seeding now depends only on whether MCP just created the file (file_was_created), matching the same uniform policy the engine uses for the default persistent attachment. The registry continues to hold *user-attached* databases only. The default persistent attachment is owned by Engine itself and isn't tracked in the registry; replay-on-reconnect re-issues only the user attaches. Tests: - on_missing_create_does_not_seed_under_bare_policy deleted (the bare policy no longer exists; the matching positive test for non-bare policy stays as the canonical create-+-seed regression). --- hyperdb-mcp/src/attach.rs | 51 +++++++++---------------------- hyperdb-mcp/tests/attach_tests.rs | 31 ------------------- 2 files changed, 15 insertions(+), 67 deletions(-) diff --git a/hyperdb-mcp/src/attach.rs b/hyperdb-mcp/src/attach.rs index 4671998..8ca615e 100644 --- a/hyperdb-mcp/src/attach.rs +++ b/hyperdb-mcp/src/attach.rs @@ -205,50 +205,30 @@ pub struct AttachRequest { /// Live set of attachments keyed by alias. Thread-safe via an internal /// `Mutex`; all operations are serial, which matches the rest of the /// engine's single-connection model. +/// +/// The registry holds *user-attached* databases — the default persistent +/// database is attached directly by [`crate::engine::Engine`] and isn't +/// tracked here. Replay-on-reconnect only re-issues the user attaches; +/// the engine re-attaches persistent itself when it's reconstructed. #[derive(Debug)] pub struct AttachRegistry { // Insertion-ordered so replay happens in the same order the user // originally attached — matters if attachment B references objects // that rely on attachment A (not today, but cheap to preserve). inner: Mutex>, - /// When `true`, a successful `attach` that just created a fresh - /// `.hyper` file via `on_missing: create` also stamps an empty - /// `_table_catalog` into the new database (fully qualified as - /// `"{alias}"."public"."_table_catalog"`). Exactly mirrors the - /// primary-workspace policy in - /// [`crate::server::HyperMcpServer::ensure_catalog_ready`]: - /// non-bare servers get a catalog on newly-created databases, - /// bare servers never touch the file beyond the `ATTACH` itself. - /// Attaching an *existing* database never seeds regardless of - /// this flag. - seed_catalog_on_create: bool, } impl Default for AttachRegistry { fn default() -> Self { - Self::with_catalog_policy(true) + Self::new() } } impl AttachRegistry { - /// Convenience for tests and the default `HyperMcpServer` flow. - /// Seeds `_table_catalog` on newly-created databases (non-bare - /// policy). Bare servers must use - /// [`AttachRegistry::with_catalog_policy`] with `false` instead. #[must_use] pub fn new() -> Self { - Self::default() - } - - /// Explicit-policy constructor. `seed_catalog_on_create = false` - /// matches `--bare`: the registry will never stamp - /// `_table_catalog` into any attached database (neither freshly - /// created nor pre-existing). - #[must_use] - pub fn with_catalog_policy(seed_catalog_on_create: bool) -> Self { Self { inner: Mutex::new(Vec::new()), - seed_catalog_on_create, } } @@ -374,19 +354,18 @@ impl AttachRegistry { // Seed `_table_catalog` into a freshly-created attached // database so opening that file as a primary workspace later // (on a fresh MCP instance) finds the catalog ready and skips - // the backfill sweep. Intentionally gated on *both*: - // - // 1. `file_was_created` — attaching an existing database - // must never mutate its schema, regardless of contents. - // 2. `self.seed_catalog_on_create` — bare servers use - // `with_catalog_policy(false)` so the workspace stays - // pristine end-to-end. + // the backfill sweep. Gated on `file_was_created` only: + // attaching an *existing* database must never mutate its + // schema, regardless of contents. (`--bare` used to add a + // second gate via `seed_catalog_on_create`; that flag was + // removed when `--bare` was retired in favor of the uniform + // "always seed on create" policy.) // // On failure we roll back the attach to preserve the // all-or-nothing contract: the user asked for "create a new - // DB (non-bare)" which implicitly promises a catalog; leaving - // an attached-but-unseeded file would silently violate that. - if file_was_created && self.seed_catalog_on_create { + // DB" which implicitly promises a catalog; leaving an + // attached-but-unseeded file would silently violate that. + if file_was_created { if let Err(e) = crate::table_catalog::ensure_exists_in_database(engine, &req.alias) { let detach_sql = format!("DETACH DATABASE \"{}\"", req.alias.replace('"', "\"\"")); if let Err(de) = engine.execute_command(&detach_sql) { diff --git a/hyperdb-mcp/tests/attach_tests.rs b/hyperdb-mcp/tests/attach_tests.rs index e1161e3..d6c5eff 100644 --- a/hyperdb-mcp/tests/attach_tests.rs +++ b/hyperdb-mcp/tests/attach_tests.rs @@ -374,37 +374,6 @@ fn on_missing_create_seeds_table_catalog_by_default() { ); } -/// Bare policy (`with_catalog_policy(false)`): the registry never -/// seeds `_table_catalog` into an attached database — even one we -/// just created. Mirrors `--bare` at the server level, where no -/// workspace file is allowed to accumulate MCP bookkeeping. -#[test] -fn on_missing_create_does_not_seed_under_bare_policy() { - let (engine, dir) = primary_workspace(); - let target = dir.path().join("bare.hyper"); - assert!(!target.exists()); - - let registry = AttachRegistry::with_catalog_policy(false); - registry - .attach( - &engine, - AttachRequest { - alias: "bare".into(), - source: AttachSource::LocalFile { - path: target.clone(), - }, - writable: true, - on_missing: OnMissing::Create, - }, - ) - .unwrap(); - - assert!( - !attached_db_has_table_catalog(&engine, "bare"), - "bare policy must not seed _table_catalog, even on a freshly created file" - ); -} - /// Attaching an *existing* database in read/write mode never adds /// `_table_catalog`, regardless of policy — the attached file only /// gets seeded at creation time. This keeps the "treat existing DBs From f755ac9b4832f99deef46f728b3ebcf4c310dafe Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 03:50:56 -0700 Subject: [PATCH 4/9] feat(mcp): route _table_catalog to persistent attachment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The catalog tracks tables the user wants to keep around — i.e. tables in the persistent attachment. Ephemeral scratch tables aren't worth catalogging because the database is replaced every session. So: - ensure_exists / list / get / upsert_stub / set_metadata / delete_for / reconcile / refresh_row_count all qualify SQL with "persistent"."public"."_table_catalog". - When no persistent attachment is present (--ephemeral-only), all catalog operations no-op gracefully (Ok with empty/None) instead of erroring; set_metadata is the only path that surfaces a clear ReadOnlyViolation since the user's intent there is mutation. - user_tables / table_present / row_count_of probe persistent's pg_catalog.pg_tables directly via fully-qualified SQL. - Removed HYPERDB_NAMED_INTERNAL_TABLES from is_internal_table — the catalog never appears in describe_tables now (which only enumerates the connection's primary, i.e. ephemeral) so no filter is needed. Tests: catalog tests update their fixtures to seed user tables in "persistent"."public". (sed-driven batch update — every CREATE TABLE / INSERT INTO in those tests now qualifies the target db). attach_tests.copy_create_stubs_table_catalog_on_primary_workspace asserts catalog presence via persistent's pg_tables. Two saved_queries tests still failing pending iteration 7. --- hyperdb-mcp/src/engine.rs | 23 ++-- hyperdb-mcp/src/table_catalog.rs | 154 +++++++++++++++++------ hyperdb-mcp/tests/attach_tests.rs | 19 ++- hyperdb-mcp/tests/table_catalog_tests.rs | 54 ++++---- 4 files changed, 167 insertions(+), 83 deletions(-) diff --git a/hyperdb-mcp/src/engine.rs b/hyperdb-mcp/src/engine.rs index fb3a17c..2a62b85 100644 --- a/hyperdb-mcp/src/engine.rs +++ b/hyperdb-mcp/src/engine.rs @@ -1219,22 +1219,17 @@ pub const CLIENT_LOG_FILE_NAME: &str = "hyperdb-mcp.log"; /// automatically — no per-table filter list to keep in sync. pub const HYPERDB_INTERNAL_PREFIX: &str = "_hyperdb_"; -/// Names of MCP-managed tables that don't follow the `_hyperdb_` prefix -/// but are also infrastructure rather than user data. Currently just -/// `_table_catalog`. Added because `--bare` (the historical opt-out for -/// catalog creation) was removed, so the catalog table now reliably -/// appears alongside user tables; filtering it from the user-visible -/// list and counts keeps the LLM's mental model clean. -const HYPERDB_NAMED_INTERNAL_TABLES: &[&str] = &[crate::table_catalog::TABLE_CATALOG_TABLE]; - -/// Returns true when `name` is one of `HyperDB`'s own internal tables — -/// either matching [`HYPERDB_INTERNAL_PREFIX`] (saved-queries, watcher -/// state, etc.) or appearing in [`HYPERDB_NAMED_INTERNAL_TABLES`] (the -/// `_table_catalog`, which predates the prefix convention). Factored -/// into a helper so every filter site calls the same predicate. +/// Returns true when `name` is one of `HyperDB`'s own internal tables +/// (matches [`HYPERDB_INTERNAL_PREFIX`]). Factored into a helper so +/// every filter site calls the same predicate and a future move to a +/// more nuanced scheme (e.g. per-table allowlist) is a single edit. +/// +/// Note: `_table_catalog` lives in the persistent attachment, not the +/// ephemeral primary, so it doesn't show up in `describe_tables` even +/// without the filter — `describe_tables` only enumerates the primary. #[must_use] pub fn is_internal_table(name: &str) -> bool { - name.starts_with(HYPERDB_INTERNAL_PREFIX) || HYPERDB_NAMED_INTERNAL_TABLES.contains(&name) + name.starts_with(HYPERDB_INTERNAL_PREFIX) } /// Compute the log directory for both `hyperd` output and the client-side diff --git a/hyperdb-mcp/src/table_catalog.rs b/hyperdb-mcp/src/table_catalog.rs index bb074da..30b3846 100644 --- a/hyperdb-mcp/src/table_catalog.rs +++ b/hyperdb-mcp/src/table_catalog.rs @@ -19,15 +19,20 @@ //! | `license` | User / LLM | //! | `notes` | User / LLM | //! -//! The backing table is `_table_catalog` (single underscore, *not* -//! `_hyperdb_`) so it shows up in `describe` and the resource catalog — the -//! catalog is meant to be read by humans and LLMs, it isn't internal -//! bookkeeping. The `_` prefix signals "workspace meta" without triggering -//! [`crate::engine::is_internal_table`]'s hidden-table filter. +//! The backing table is `_table_catalog`. After the ephemeral-primary +//! migration, the catalog tracks tables in the **persistent** database +//! only — ephemeral scratch tables aren't worth cataloguing because the +//! database is replaced every session. All operations therefore target +//! the persistent attachment via fully-qualified SQL +//! (`"persistent"."public"."_table_catalog"`); when the engine has no +//! persistent attachment (`--ephemeral-only`), catalog operations are +//! no-ops. //! -//! All operations no-op quietly when [`HyperMcpServer`] was constructed with -//! `--bare`: the module never gets called in that mode, so the table is -//! never created and the workspace file stays pristine. +//! `_table_catalog` is hidden from the user-visible +//! [`Engine::describe_tables`] output (see +//! [`crate::engine::is_internal_table`]) so the LLM doesn't see it +//! alongside its data tables; users who want to inspect it directly +//! can run `SELECT * FROM "persistent"."public"."_table_catalog"`. //! //! [`HyperMcpServer`]: crate::server::HyperMcpServer @@ -37,10 +42,29 @@ use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use serde_json::Value; -/// Backing table name. Visible in `describe`; users can `SELECT * FROM -/// _table_catalog` directly. +/// Backing table name. Lives in the persistent attachment under +/// `"persistent"."public"."_table_catalog"`. Hidden from +/// `Engine::describe_tables`; users who want to inspect it directly can +/// run a fully-qualified SELECT. pub const TABLE_CATALOG_TABLE: &str = "_table_catalog"; +/// Returns the fully-qualified SQL reference for `_table_catalog` in the +/// engine's persistent attachment, or `None` when the engine has no +/// persistent attachment (the `--ephemeral-only` case). All catalog +/// read/write helpers consult this; `None` means catalog operations are +/// no-ops because there's nowhere durable to put bookkeeping. +fn qualified_catalog(engine: &Engine) -> Option { + if engine.has_persistent() { + Some(format!( + "\"{}\".\"public\".\"{}\"", + Engine::PERSISTENT_ALIAS, + TABLE_CATALOG_TABLE + )) + } else { + None + } +} + /// One row in [`TABLE_CATALOG_TABLE`]. /// /// The `Option` fields are `NULL` in the backing table when not set. Prose @@ -144,7 +168,13 @@ const CATALOG_COLUMNS: &str = "(\ /// `CREATE TABLE IF NOT EXISTS` statement — typically connection loss /// or permission failures. pub fn ensure_exists(engine: &Engine) -> Result<(), McpError> { - let ddl = format!("CREATE TABLE IF NOT EXISTS \"{TABLE_CATALOG_TABLE}\" {CATALOG_COLUMNS}"); + let Some(qualified) = qualified_catalog(engine) else { + // No persistent attachment — there's nowhere durable to put the + // catalog. Treat as success so callers don't have to special-case + // the ephemeral-only mode. + return Ok(()); + }; + let ddl = format!("CREATE TABLE IF NOT EXISTS {qualified} {CATALOG_COLUMNS}"); engine.execute_command(&ddl)?; Ok(()) } @@ -188,6 +218,9 @@ pub fn ensure_exists_in_database(engine: &Engine, db_alias: &str) -> Result<(), /// - Propagates [`ErrorCode::SchemaMismatch`] from `row_to_entry` if /// a persisted row cannot be decoded into a [`CatalogEntry`]. pub fn list(engine: &Engine) -> Result, McpError> { + let Some(qualified) = qualified_catalog(engine) else { + return Ok(Vec::new()); + }; if !table_present(engine)? { return Ok(Vec::new()); } @@ -195,7 +228,7 @@ pub fn list(engine: &Engine) -> Result, McpError> { "SELECT table_name, source_url, source_description, purpose, \ load_tool, load_params, license, loaded_at, last_refreshed_at, \ row_count, notes \ - FROM \"{TABLE_CATALOG_TABLE}\" ORDER BY table_name" + FROM {qualified} ORDER BY table_name" ); let rows = engine.execute_query_to_json(&sql)?; rows.iter().map(row_to_entry).collect() @@ -208,6 +241,9 @@ pub fn list(engine: &Engine) -> Result, McpError> { /// Same as [`list`]: propagates errors from `table_present`, the /// Hyper `SELECT`, or row decoding. pub fn get(engine: &Engine, table_name: &str) -> Result, McpError> { + let Some(qualified) = qualified_catalog(engine) else { + return Ok(None); + }; if !table_present(engine)? { return Ok(None); } @@ -215,7 +251,7 @@ pub fn get(engine: &Engine, table_name: &str) -> Result, Mc "SELECT table_name, source_url, source_description, purpose, \ load_tool, load_params, license, loaded_at, last_refreshed_at, \ row_count, notes \ - FROM \"{TABLE_CATALOG_TABLE}\" WHERE table_name = {}", + FROM {qualified} WHERE table_name = {}", sql_literal(table_name) ); let rows = engine.execute_query_to_json(&sql)?; @@ -258,6 +294,10 @@ pub fn upsert_stub( bump_refresh: bool, ) -> Result<(), McpError> { ensure_exists(engine)?; + let Some(qualified) = qualified_catalog(engine) else { + // No persistent attachment; nothing to write to. + return Ok(()); + }; let existing = get(engine, table_name)?; let now = Utc::now(); @@ -281,13 +321,13 @@ pub fn upsert_stub( engine.execute_in_transaction(|engine| { let delete_sql = format!( - "DELETE FROM \"{TABLE_CATALOG_TABLE}\" WHERE table_name = {}", + "DELETE FROM {qualified} WHERE table_name = {}", sql_literal(table_name) ); engine.execute_command(&delete_sql)?; let insert_sql = format!( - "INSERT INTO \"{TABLE_CATALOG_TABLE}\" \ + "INSERT INTO {qualified} \ (table_name, source_url, source_description, purpose, load_tool, \ load_params, license, loaded_at, last_refreshed_at, row_count, notes) \ VALUES ({name}, {source_url}, {source_description}, {purpose}, {load_tool}, \ @@ -375,8 +415,15 @@ pub fn set_metadata( assignments.push(format!("notes = {}", sql_literal_or_null_if_empty(v))); } + let qualified = qualified_catalog(engine).ok_or_else(|| { + McpError::new( + ErrorCode::ReadOnlyViolation, + "set_table_metadata is unavailable in --ephemeral-only mode \ + because the catalog has nowhere durable to live.", + ) + })?; let update_sql = format!( - "UPDATE \"{TABLE_CATALOG_TABLE}\" SET {assigns} WHERE table_name = {name}", + "UPDATE {qualified} SET {assigns} WHERE table_name = {name}", assigns = assignments.join(", "), name = sql_literal(table_name), ); @@ -397,11 +444,14 @@ pub fn set_metadata( /// Propagates any error from `table_present` or from the `DELETE` /// statement against the catalog table. pub fn delete_for(engine: &Engine, table_name: &str) -> Result { + let Some(qualified) = qualified_catalog(engine) else { + return Ok(false); + }; if !table_present(engine)? { return Ok(false); } let sql = format!( - "DELETE FROM \"{TABLE_CATALOG_TABLE}\" WHERE table_name = {}", + "DELETE FROM {qualified} WHERE table_name = {}", sql_literal(table_name) ); let affected = engine.execute_command(&sql)?; @@ -459,38 +509,61 @@ pub fn reconcile(engine: &Engine) -> Result<(), McpError> { // --- Internals -------------------------------------------------------------- -/// List user-facing tables (excludes `_hyperdb_*` internals and the -/// catalog itself). Reads the raw table list from `Catalog` directly -/// rather than going through `describe_tables`, which already filters -/// out internal tables (including `_table_catalog` itself) and would -/// hide the rows we want to compare against. +/// List user-facing tables in the **persistent** attachment (excludes +/// `_hyperdb_*` internals and the catalog itself). Returns an empty Vec +/// when no persistent attachment is present. Uses a fully-qualified +/// `pg_catalog.pg_tables` probe so it sees inside the attachment; +/// `Catalog::get_table_names` would target the connection's primary +/// (ephemeral) by default. fn user_tables(engine: &Engine) -> Result, McpError> { - let catalog = hyperdb_api::Catalog::new(engine.connection()); - let names = catalog.get_table_names("public").map_err(McpError::from)?; - Ok(names + if !engine.has_persistent() { + return Ok(Vec::new()); + } + let sql = format!( + "SELECT tablename FROM \"{}\".pg_catalog.pg_tables \ + WHERE schemaname = 'public'", + Engine::PERSISTENT_ALIAS + ); + let rows = engine.execute_query_to_json(&sql)?; + Ok(rows .into_iter() + .filter_map(|r| { + r.get("tablename") + .and_then(|v| v.as_str()) + .map(str::to_string) + }) .filter(|name| name != TABLE_CATALOG_TABLE && !is_internal_table(name)) .collect()) } -/// `true` when `_table_catalog` is already present in the workspace. -/// Used by read paths to return an empty result instead of erroring on a -/// brand-new workspace where the table hasn't been created yet. Reads -/// the raw catalog directly so the result isn't affected by the -/// internal-table filter applied to user-facing `describe_tables`. +/// `true` when `_table_catalog` is present inside the persistent +/// attachment. Returns `false` if there is no persistent attachment. fn table_present(engine: &Engine) -> Result { - let catalog = hyperdb_api::Catalog::new(engine.connection()); - let names = catalog.get_table_names("public").map_err(McpError::from)?; - Ok(names.iter().any(|n| n == TABLE_CATALOG_TABLE)) + if !engine.has_persistent() { + return Ok(false); + } + let sql = format!( + "SELECT tablename FROM \"{}\".pg_catalog.pg_tables \ + WHERE schemaname = 'public' AND tablename = {}", + Engine::PERSISTENT_ALIAS, + sql_literal(TABLE_CATALOG_TABLE) + ); + let rows = engine.execute_query_to_json(&sql)?; + Ok(!rows.is_empty()) } -/// Return `COUNT(*)` for a user table. Quoted to handle mixed-case or -/// keyword-like names. The `describe_tables` path already gives us a row -/// count, but only after a full schema read; this dedicated query is -/// cheaper when we only need the number. +/// Return `COUNT(*)` for a user table inside the persistent attachment. +/// Quoted to handle mixed-case or keyword-like names. Returns 0 if no +/// persistent attachment is present. fn row_count_of(engine: &Engine, table_name: &str) -> Result { + if !engine.has_persistent() { + return Ok(0); + } let quoted = table_name.replace('"', "\"\""); - let sql = format!("SELECT COUNT(*) AS cnt FROM \"{quoted}\""); + let sql = format!( + "SELECT COUNT(*) AS cnt FROM \"{}\".\"public\".\"{quoted}\"", + Engine::PERSISTENT_ALIAS + ); let rows = engine.execute_query_to_json(&sql)?; Ok(rows .first() @@ -506,8 +579,11 @@ fn refresh_row_count( table_name: &str, row_count: Option, ) -> Result<(), McpError> { + let Some(qualified) = qualified_catalog(engine) else { + return Ok(()); + }; let sql = format!( - "UPDATE \"{TABLE_CATALOG_TABLE}\" SET row_count = {count} WHERE table_name = {name}", + "UPDATE {qualified} SET row_count = {count} WHERE table_name = {name}", count = row_count.map_or_else(|| "NULL".into(), |n| n.to_string()), name = sql_literal(table_name), ); diff --git a/hyperdb-mcp/tests/attach_tests.rs b/hyperdb-mcp/tests/attach_tests.rs index d6c5eff..19636aa 100644 --- a/hyperdb-mcp/tests/attach_tests.rs +++ b/hyperdb-mcp/tests/attach_tests.rs @@ -636,17 +636,28 @@ fn copy_create_stubs_table_catalog_on_primary_workspace() { "load_params should echo target_table: {params}" ); - // Catalog table must be present (this test seeded it explicitly - // but the real code path goes through `after_ingest_catalog_update` - // which also calls `ensure_exists` via `upsert_stub`). + // The catalog lives in the persistent attachment now; verify by + // probing `pg_tables` there directly. + let catalog_present = engine + .execute_query_to_json( + "SELECT tablename FROM \"persistent\".pg_catalog.pg_tables \ + WHERE schemaname = 'public' AND tablename = '_table_catalog'", + ) + .unwrap(); + assert!( + !catalog_present.is_empty(), + "_table_catalog must be present in the persistent attachment" + ); + // `derived` was seeded into the ephemeral primary, so it appears in + // the engine's regular table listing. let names: Vec = engine .describe_tables() .unwrap() .iter() .filter_map(|t| t.get("name").and_then(|v| v.as_str()).map(str::to_string)) .collect(); - assert!(names.iter().any(|n| n == TABLE_CATALOG_TABLE)); assert!(names.iter().any(|n| n == "derived")); + let _ = TABLE_CATALOG_TABLE; // keep import live (referenced via prose-only assertion) } /// A replay where the source file has been deleted should drop that diff --git a/hyperdb-mcp/tests/table_catalog_tests.rs b/hyperdb-mcp/tests/table_catalog_tests.rs index 2271047..176b132 100644 --- a/hyperdb-mcp/tests/table_catalog_tests.rs +++ b/hyperdb-mcp/tests/table_catalog_tests.rs @@ -28,17 +28,19 @@ fn workspace_engine() -> (Engine, TempDir) { (engine, dir) } -/// `true` if the workspace has a public-schema table named `name`. -/// Goes through the underlying Catalog directly so it reflects raw -/// presence — `Engine::describe_tables` filters out internal tables -/// (including `_table_catalog`), and we want to assert on those too. +/// `true` if `name` exists in the persistent attachment's `public` +/// schema. The catalog tests target the persistent DB because that's +/// where MCP-managed bookkeeping lives in the new model; user-data +/// tables only matter to these tests in the same scope. fn table_exists(engine: &Engine, name: &str) -> bool { - let catalog = hyperdb_api::Catalog::new(engine.connection()); - catalog - .get_table_names("public") - .unwrap_or_default() - .iter() - .any(|n| n == name) + let sql = format!( + "SELECT tablename FROM \"persistent\".pg_catalog.pg_tables \ + WHERE schemaname = 'public' AND tablename = '{}'", + name.replace('\'', "''") + ); + engine + .execute_query_to_json(&sql) + .is_ok_and(|rows| !rows.is_empty()) } // --- Catalog module --------------------------------------------------------- @@ -60,7 +62,7 @@ fn ensure_exists_is_idempotent() { fn upsert_stub_creates_row_with_null_prose_fields() { let (engine, _dir) = workspace_engine(); engine - .execute_command("CREATE TABLE widgets (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") .unwrap(); table_catalog::upsert_stub( &engine, @@ -92,7 +94,7 @@ fn upsert_stub_creates_row_with_null_prose_fields() { fn upsert_stub_preserves_prose_on_reload() { let (engine, _dir) = workspace_engine(); engine - .execute_command("CREATE TABLE widgets (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") .unwrap(); table_catalog::upsert_stub(&engine, "widgets", "load_file", None, Some(10), true).unwrap(); table_catalog::set_metadata( @@ -129,7 +131,7 @@ fn upsert_stub_preserves_prose_on_reload() { fn upsert_stub_bump_refresh_updates_last_refreshed_not_loaded_at() { let (engine, _dir) = workspace_engine(); engine - .execute_command("CREATE TABLE widgets (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") .unwrap(); table_catalog::upsert_stub(&engine, "widgets", "load_file", None, Some(1), true).unwrap(); let first = table_catalog::get(&engine, "widgets").unwrap().unwrap(); @@ -155,7 +157,7 @@ fn upsert_stub_bump_refresh_updates_last_refreshed_not_loaded_at() { fn set_metadata_updates_prose_without_touching_mechanical_fields() { let (engine, _dir) = workspace_engine(); engine - .execute_command("CREATE TABLE widgets (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") .unwrap(); table_catalog::upsert_stub(&engine, "widgets", "load_file", None, Some(10), true).unwrap(); let before = table_catalog::get(&engine, "widgets").unwrap().unwrap(); @@ -183,7 +185,7 @@ fn set_metadata_updates_prose_without_touching_mechanical_fields() { fn set_metadata_empty_string_clears_field() { let (engine, _dir) = workspace_engine(); engine - .execute_command("CREATE TABLE widgets (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") .unwrap(); table_catalog::upsert_stub(&engine, "widgets", "load_file", None, Some(1), true).unwrap(); table_catalog::set_metadata( @@ -215,7 +217,7 @@ fn set_metadata_empty_string_clears_field() { fn set_metadata_rejects_empty_payload() { let (engine, _dir) = workspace_engine(); engine - .execute_command("CREATE TABLE widgets (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") .unwrap(); table_catalog::upsert_stub(&engine, "widgets", "load_file", None, Some(1), true).unwrap(); let err = @@ -252,16 +254,16 @@ fn reconcile_inserts_stubs_drops_orphans_refreshes_counts() { // Three user tables exist; the catalog starts with an entry for a // table that was dropped, and only one of the three live tables. engine - .execute_command("CREATE TABLE alpha (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".alpha (id INT)") .unwrap(); engine - .execute_command("INSERT INTO alpha VALUES (1), (2)") + .execute_command("INSERT INTO \"persistent\".\"public\".alpha VALUES (1), (2)") .unwrap(); engine - .execute_command("CREATE TABLE beta (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".beta (id INT)") .unwrap(); engine - .execute_command("CREATE TABLE gamma (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".gamma (id INT)") .unwrap(); // Pre-populate catalog: known `alpha` (stale count) + orphan `zeta`. @@ -303,7 +305,7 @@ fn reconcile_inserts_stubs_drops_orphans_refreshes_counts() { fn reconcile_skips_internal_tables() { let (engine, _dir) = workspace_engine(); engine - .execute_command("CREATE TABLE alpha (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".alpha (id INT)") .unwrap(); // Force-create a `_hyperdb_*` table the way `saved_queries` would. engine @@ -324,7 +326,7 @@ fn reconcile_skips_internal_tables() { fn delete_for_is_idempotent() { let (engine, _dir) = workspace_engine(); engine - .execute_command("CREATE TABLE widgets (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") .unwrap(); table_catalog::upsert_stub(&engine, "widgets", "load_file", None, Some(1), true).unwrap(); assert!(table_catalog::delete_for(&engine, "widgets").unwrap()); @@ -375,7 +377,7 @@ fn read_only_server_does_not_create_catalog() { { let engine = Engine::new_no_daemon(Some(path_str.clone())).unwrap(); engine - .execute_command("CREATE TABLE widgets (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".widgets (id INT)") .unwrap(); } @@ -404,13 +406,13 @@ fn backfill_stubs_preexisting_tables_on_reopen() { { let engine = Engine::new_no_daemon(Some(path_str.clone())).unwrap(); engine - .execute_command("CREATE TABLE alpha (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".alpha (id INT)") .unwrap(); engine - .execute_command("INSERT INTO alpha VALUES (1), (2), (3)") + .execute_command("INSERT INTO \"persistent\".\"public\".alpha VALUES (1), (2), (3)") .unwrap(); engine - .execute_command("CREATE TABLE beta (id INT)") + .execute_command("CREATE TABLE \"persistent\".\"public\".beta (id INT)") .unwrap(); } From c6d1bf83ea389a1eb8a21cb7df05ad63d65b84fb Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 03:57:59 -0700 Subject: [PATCH 5/9] feat(mcp): saved queries persist in the persistent attachment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WorkspaceStore now writes _hyperdb_saved_queries into the persistent attachment ("persistent"."public"."_hyperdb_saved_queries") instead of the connection's primary database. This matches user expectations: saved queries are reference material that should outlive a single session, and the persistent attachment is where curated, long-lived data lives in the new model. build_store remains driven by 'persistent path was supplied' — the behavior is unchanged from the user's perspective. --ephemeral-only sessions still get SessionStore (in-memory, dies with the process). --- hyperdb-mcp/src/saved_queries.rs | 49 +++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/hyperdb-mcp/src/saved_queries.rs b/hyperdb-mcp/src/saved_queries.rs index 01a36a1..251eb5b 100644 --- a/hyperdb-mcp/src/saved_queries.rs +++ b/hyperdb-mcp/src/saved_queries.rs @@ -211,17 +211,27 @@ impl WorkspaceStore { Self::default() } - /// Idempotently create the meta-table. Called at the top of every - /// public method to keep each entry point self-contained. + /// Fully-qualified table reference inside the persistent attachment. + /// Saved queries are user reference material — they belong with + /// curated, long-lived data, which lives in the persistent DB. + fn qualified_table() -> String { + format!( + "\"{}\".\"public\".\"{}\"", + Engine::PERSISTENT_ALIAS, + SAVED_QUERIES_TABLE + ) + } + + /// Idempotently create the meta-table inside the persistent + /// attachment. Called at the top of every public method to keep + /// each entry point self-contained. /// /// The `initialized` flag is intentionally **not** reset on a /// `ConnectionLost` reconnect. That's safe because `WorkspaceStore` - /// only ever backs persistent workspaces (ephemeral workspaces use - /// `SessionStore`), and the meta-table lives in the `.hyper` file - /// itself — a reconnect opens the same file and finds the table - /// already there. If this class is ever reused with an in-memory or - /// recreate-on-reconnect engine the flag will need to be tied to the - /// engine's lifetime. + /// only ever backs persistent attachments (ephemeral-only sessions + /// use `SessionStore`), and the meta-table lives in the `.hyper` + /// file itself — a reconnect opens the same file and finds the table + /// already there. fn ensure_table(&self, engine: &Engine) -> Result<(), McpError> { let mut flag = self .initialized @@ -235,12 +245,13 @@ impl WorkspaceStore { // `PRIMARY KEY` because Hyper does not support indexes; name // uniqueness is enforced application-side in [`Self::save`]. let ddl = format!( - "CREATE TABLE IF NOT EXISTS \"{SAVED_QUERIES_TABLE}\" (\ + "CREATE TABLE IF NOT EXISTS {table} (\ name TEXT NOT NULL, \ sql TEXT NOT NULL, \ description TEXT, \ created_at TIMESTAMP NOT NULL\ - )" + )", + table = Self::qualified_table() ); engine.execute_command(&ddl)?; *flag = true; @@ -321,11 +332,12 @@ impl SavedQueryStore for WorkspaceStore { ) })?; self.ensure_table(engine)?; + let table = Self::qualified_table(); // Up-front existence check — clearer error than Hyper's raw PK // violation message, and matches SessionStore's behaviour. let existing_sql = format!( - "SELECT name FROM \"{SAVED_QUERIES_TABLE}\" WHERE name = {}", + "SELECT name FROM {table} WHERE name = {}", sql_literal(&query.name) ); let rows = engine.execute_query_to_json(&existing_sql)?; @@ -345,7 +357,7 @@ impl SavedQueryStore for WorkspaceStore { None => "NULL".into(), }; let insert_sql = format!( - "INSERT INTO \"{SAVED_QUERIES_TABLE}\" (name, sql, description, created_at) \ + "INSERT INTO {table} (name, sql, description, created_at) \ VALUES ({name}, {sql}, {desc}, TIMESTAMP {ts})", name = sql_literal(&query.name), sql = sql_literal(&query.sql), @@ -368,8 +380,9 @@ impl SavedQueryStore for WorkspaceStore { self.ensure_table(engine)?; let sql = format!( "SELECT name, sql, description, created_at \ - FROM \"{SAVED_QUERIES_TABLE}\" WHERE name = {}", - sql_literal(name) + FROM {table} WHERE name = {}", + sql_literal(name), + table = Self::qualified_table(), ); let rows = engine.execute_query_to_json(&sql)?; match rows.first() { @@ -388,7 +401,8 @@ impl SavedQueryStore for WorkspaceStore { self.ensure_table(engine)?; let sql = format!( "SELECT name, sql, description, created_at \ - FROM \"{SAVED_QUERIES_TABLE}\" ORDER BY name" + FROM {table} ORDER BY name", + table = Self::qualified_table(), ); let rows = engine.execute_query_to_json(&sql)?; rows.iter().map(row_to_saved_query).collect() @@ -403,8 +417,9 @@ impl SavedQueryStore for WorkspaceStore { })?; self.ensure_table(engine)?; let sql = format!( - "DELETE FROM \"{SAVED_QUERIES_TABLE}\" WHERE name = {}", - sql_literal(name) + "DELETE FROM {table} WHERE name = {}", + sql_literal(name), + table = Self::qualified_table(), ); let affected = engine.execute_command(&sql)?; Ok(affected > 0) From cda8ec4003f7fd2fe49d1bcbc16891836cc3191a Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 04:00:50 -0700 Subject: [PATCH 6/9] test(mcp): regression tests for ephemeral-primary two-DB model Eight new tests in tests/two_db_model_tests.rs covering the core contracts of the new model: - Engine::new(Some(path)) attaches the file as 'persistent' and has_persistent() reports true. - Engine::new(None) produces an ephemeral-only engine; the 'persistent' alias is genuinely absent. - Each Engine gets a distinct ephemeral path even when multiple Engines coexist in the same PID (parallel test runners, embedded uses). - Persistent writes survive engine drop and are visible on recreate. - Ephemeral writes are discarded on drop (the entire point). - resolve_target_db routes None -> primary, 'persistent' -> persistent when present, errors with InvalidArgument when --ephemeral-only. - Engine::status() exposes both database paths and the has_persistent flag. ephemeral_only mode reports persistent_path = null. --- hyperdb-mcp/tests/two_db_model_tests.rs | 179 ++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100644 hyperdb-mcp/tests/two_db_model_tests.rs diff --git a/hyperdb-mcp/tests/two_db_model_tests.rs b/hyperdb-mcp/tests/two_db_model_tests.rs new file mode 100644 index 0000000..0d886d2 --- /dev/null +++ b/hyperdb-mcp/tests/two_db_model_tests.rs @@ -0,0 +1,179 @@ +// Copyright (c) 2026, Salesforce, Inc. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Regression tests for the ephemeral-primary + persistent-attached +//! engine model. + +use hyperdb_mcp::engine::Engine; +use hyperdb_mcp::error::ErrorCode; +use tempfile::TempDir; + +/// `Engine::new(Some(path))` attaches the file at `path` as `"persistent"` +/// and reports `has_persistent() = true`. +#[test] +fn engine_with_persistent_path_attaches_under_persistent_alias() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("ws.hyper"); + let engine = Engine::new(Some(path.to_str().unwrap().into())).unwrap(); + assert!(engine.has_persistent()); + // The persistent attachment is reachable via fully-qualified SQL — + // a CREATE TABLE there should succeed and the table visible via + // a qualified probe. + engine + .execute_command("CREATE TABLE \"persistent\".\"public\".\"smoke\" (x INT)") + .unwrap(); + let rows = engine + .execute_query_to_json( + "SELECT tablename FROM \"persistent\".pg_catalog.pg_tables \ + WHERE schemaname = 'public' AND tablename = 'smoke'", + ) + .unwrap(); + assert_eq!(rows.len(), 1); +} + +/// `Engine::new(None)` creates an ephemeral-only engine — no persistent +/// attachment, no `"persistent"` alias. +#[test] +fn engine_without_persistent_path_is_ephemeral_only() { + let engine = Engine::new(None).unwrap(); + assert!(!engine.has_persistent()); + // Querying the persistent alias must fail because nothing is attached + // under that name. + let result = + engine.execute_query_to_json("SELECT 1 FROM \"persistent\".pg_catalog.pg_database"); + assert!( + result.is_err(), + "no persistent alias must reject queries naming it" + ); +} + +/// The ephemeral path is unique per Engine — concurrent engines don't +/// collide on the same temp file. +#[test] +fn ephemeral_paths_are_unique_per_engine() { + let e1 = Engine::new(None).unwrap(); + let e2 = Engine::new(None).unwrap(); + assert_ne!( + e1.ephemeral_path(), + e2.ephemeral_path(), + "each engine must own a distinct ephemeral file" + ); +} + +/// Data written to the persistent attachment via fully-qualified SQL +/// survives the engine drop and is visible to a fresh engine on the +/// same path. +#[test] +fn persistent_writes_survive_engine_recreate() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("survives.hyper"); + let path_str = path.to_str().unwrap().to_string(); + + { + let engine = Engine::new(Some(path_str.clone())).unwrap(); + engine + .execute_command( + "CREATE TABLE \"persistent\".\"public\".\"keepers\" (n INT)", + ) + .unwrap(); + engine + .execute_command( + "INSERT INTO \"persistent\".\"public\".\"keepers\" VALUES (1), (2), (3)", + ) + .unwrap(); + } + + let engine = Engine::new(Some(path_str)).unwrap(); + let rows = engine + .execute_query_to_json( + "SELECT n FROM \"persistent\".\"public\".\"keepers\" ORDER BY n", + ) + .unwrap(); + assert_eq!(rows.len(), 3); + assert_eq!(rows[0]["n"], 1); + assert_eq!(rows[2]["n"], 3); +} + +/// Data written to the ephemeral primary disappears when the engine is +/// dropped — that's the point of "ephemeral". +#[test] +fn ephemeral_writes_are_discarded_on_drop() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("ws.hyper"); + let path_str = path.to_str().unwrap().to_string(); + + { + let engine = Engine::new(Some(path_str.clone())).unwrap(); + // Default-target: writes go to ephemeral. + engine + .execute_command("CREATE TABLE scratch (id INT)") + .unwrap(); + engine + .execute_command("INSERT INTO scratch VALUES (1), (2), (3)") + .unwrap(); + } + + // New engine, same persistent file. The persistent attachment exists + // but `scratch` was in the previous engine's ephemeral primary. + let engine = Engine::new(Some(path_str)).unwrap(); + // Probe: scratch should not be visible. We query through the engine's + // own connection (ephemeral) and through persistent — neither should + // see it. + let rows = engine + .execute_query_to_json("SELECT tablename FROM pg_catalog.pg_tables WHERE tablename = 'scratch'") + .unwrap_or_default(); + assert!( + rows.is_empty(), + "scratch must not survive engine drop (it lived in ephemeral)" + ); +} + +/// `resolve_target_db("persistent")` returns the alias when persistent +/// is present, errors when it isn't. +#[test] +fn resolve_target_db_handles_persistent_presence() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("ws.hyper"); + let with_persistent = Engine::new(Some(path.to_str().unwrap().into())).unwrap(); + assert_eq!( + with_persistent.resolve_target_db(Some("persistent")).unwrap(), + "persistent" + ); + assert_eq!( + with_persistent.resolve_target_db(None).unwrap(), + with_persistent.primary_db_name() + ); + + let ephemeral_only = Engine::new(None).unwrap(); + assert_eq!( + ephemeral_only.resolve_target_db(None).unwrap(), + ephemeral_only.primary_db_name() + ); + let err = ephemeral_only + .resolve_target_db(Some("persistent")) + .unwrap_err(); + assert_eq!(err.code, ErrorCode::InvalidArgument); + assert!(err.message.contains("--ephemeral-only")); +} + +/// Status JSON exposes both database paths and the `has_persistent` flag. +#[test] +fn engine_status_reports_both_paths() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("ws.hyper"); + let engine = Engine::new(Some(path.to_str().unwrap().into())).unwrap(); + let status = engine.status().unwrap(); + assert!(status["has_persistent"].as_bool().unwrap()); + assert!(status["ephemeral_path"].is_string()); + assert!(status["persistent_path"].is_string()); +} + +/// In ephemeral-only mode, status reflects no persistent attachment. +#[test] +fn engine_status_ephemeral_only_reports_no_persistent() { + let engine = Engine::new(None).unwrap(); + let status = engine.status().unwrap(); + assert!(!status["has_persistent"].as_bool().unwrap()); + assert!(status["ephemeral_path"].is_string()); + assert!(status["persistent_path"].is_null()); +} From 40d971b4ae4b8cb5484fe781f7afb7e21c4b41ee Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 04:11:02 -0700 Subject: [PATCH 7/9] docs(mcp): describe ephemeral-primary + persistent-attached model README: - Operating Modes section now leads with the two-database concept and explains how to target either DB from SQL via fully-qualified names. - New 'Database storage' table documents --persistent-db default, --ephemeral-only, and the deprecated --workspace alias. - Saved-queries persistence note updated (queries now land in the persistent attachment automatically). - CLI Reference rewritten to match the actual flag set; --bare removed. - Examples migrated from --workspace to --persistent-db. DEVELOPMENT.md: - 'Workspace Modes Internals' replaced with 'Two-Database Engine Model' covering the per-engine ephemeral path naming, the schema_search_path pin to the primary, and the catalog/saved-queries routing helpers. CHANGELOG.md: - Added entries for the two-database engine model, platform-default persistent path, --persistent-db / --ephemeral-only flags, and the catalog / saved-queries persistence move. - Added 'Removed' section documenting --bare retirement. --- hyperdb-mcp/CHANGELOG.md | 27 ++++++++++ hyperdb-mcp/DEVELOPMENT.md | 12 +++-- hyperdb-mcp/README.md | 104 ++++++++++++++++++++++++++++--------- 3 files changed, 114 insertions(+), 29 deletions(-) diff --git a/hyperdb-mcp/CHANGELOG.md b/hyperdb-mcp/CHANGELOG.md index ed411b1..d4f833a 100644 --- a/hyperdb-mcp/CHANGELOG.md +++ b/hyperdb-mcp/CHANGELOG.md @@ -31,6 +31,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/). attempts are rate-limited to 3 per 60 seconds; exceeding the limit triggers daemon shutdown so the user sees the failure clearly rather than spinning silently. +- **Two-database engine model.** Every session now has both an + ephemeral primary (created fresh per-session, deleted on exit) AND + a persistent attachment under the alias `"persistent"`. Unqualified + SQL routes to the ephemeral primary (the LLM's scratch space); + fully-qualified SQL like + `INSERT INTO "persistent"."public"."customers" ...` writes to + storage that survives across sessions. +- **Platform-default persistent path.** When `--persistent-db` is + unset, the persistent file lives at the platform data directory: + `~/Library/Application Support/hyperdb/workspace.hyper` on macOS, + `~/.local/share/hyperdb/workspace.hyper` on Linux, + `%APPDATA%\hyperdb\workspace.hyper` on Windows. Override with + `HYPERDB_PERSISTENT_DB`. +- **New CLI flags:** `--persistent-db ` (replaces `--workspace`, + which is kept as a deprecated alias with a stderr warning); and + `--ephemeral-only` to skip the persistent attachment entirely. +- The `_table_catalog` and `_hyperdb_saved_queries` meta-tables now + live in the persistent attachment instead of the connection's + primary, so saved queries automatically persist across sessions + without any flag toggling. + +### Removed + +- **`--bare` flag.** Catalog seeding is now uniform: created when MCP + creates a fresh `.hyper`, never touched on existing files. Users + who want a pristine `.hyper` for export can `DROP TABLE _table_catalog` + once after creation; subsequent opens won't recreate it. ## [0.1.1] - 2026-05-13 diff --git a/hyperdb-mcp/DEVELOPMENT.md b/hyperdb-mcp/DEVELOPMENT.md index 8204f97..7f1f142 100644 --- a/hyperdb-mcp/DEVELOPMENT.md +++ b/hyperdb-mcp/DEVELOPMENT.md @@ -196,12 +196,16 @@ Derived fields are merged into the serialized JSON so callers get a self-contain --- -## Workspace Modes Internals +## Two-Database Engine Model -- **Ephemeral** — `Engine::new(None)` creates a temp directory (`$TMPDIR/hyperdb-mcp-/`) with a `workspace.hyper` file. Cleaned up on process exit. In daemon mode, `Engine::drop` issues `DETACH DATABASE` (releasing Hyper's file lock — required on Windows) before `remove_dir_all` deletes the temp directory. -- **Persistent** — `Engine::new(Some(path))` uses the caller-supplied path. Parent directories are created automatically. `~` is expanded via `$HOME` (no shell crate dependency). +Every `Engine` holds: -Logs (both `hyperd` server logs and the MCP client log) land in the same directory as the workspace file. The `status` tool reports log paths so operators know where to look. +- **Ephemeral primary** at `$TMPDIR/hyperdb-mcp--/scratch.hyper`. Created fresh on each `Engine::new` (`` is a process-wide atomic so parallel test runners and restart-after-ConnectionLost reconnects get distinct files). The connection is *bound* to this database — unqualified SQL routes here. `Engine::drop` always deletes the per-engine temp directory. +- **Persistent attachment** under the alias `"persistent"`. When `Engine::new(Some(path))` is called, the engine runs `CREATE DATABASE IF NOT EXISTS ''` (creating the file if missing) and `ATTACH DATABASE '' AS "persistent"`. Then `SET schema_search_path = ''` keeps unqualified resolution routing into the ephemeral primary. `Engine::new(None)` skips this step (the `--ephemeral-only` mode); `engine.has_persistent()` reflects which mode was selected. + +Catalog and saved-queries meta-tables live in the persistent attachment, qualified via `"persistent"."public"."_table_catalog"` etc. When the engine has no persistent attachment, those operations no-op. See `crate::table_catalog::qualified_catalog` and `crate::saved_queries::WorkspaceStore::qualified_table` for the routing helpers. + +Logs land next to the persistent file when one is supplied (so users find them in a stable per-project location); ephemeral-only sessions log to `$TMPDIR/hyperdb-mcp-/`. The `status` tool reports `ephemeral_path`, `persistent_path`, and `has_persistent` so operators can confirm where data lives. ## Daemon Mode Internals diff --git a/hyperdb-mcp/README.md b/hyperdb-mcp/README.md index 6552289..e866d98 100644 --- a/hyperdb-mcp/README.md +++ b/hyperdb-mcp/README.md @@ -31,7 +31,7 @@ LLMs are powerful at reasoning but cannot natively crunch millions of rows. This - **Pre-ingest file inspection** — `inspect_file` dry-runs the same inference without touching Hyper so LLMs can build safe schema overrides in one shot - **Partial schema overrides** — supply just the columns you want to correct (e.g. `{"population":"BIGINT"}`) — the rest keep their inferred type - **Rich resource surface** — workspace readme, per-table JSON and CSV samples, and one JSON + one CSV resource per table so LLMs can orient themselves via `resources/list` without any tool calls -- **Saved queries** — register named read-only SQL with `save_query`; each query becomes `hyper://queries/{name}/definition` (metadata) + `hyper://queries/{name}/result` (live re-run). Persisted in `--workspace` mode, session-only otherwise +- **Saved queries** — register named read-only SQL with `save_query`; each query becomes `hyper://queries/{name}/definition` (metadata) + `hyper://queries/{name}/result` (live re-run). Persisted in the persistent attachment, session-only when `--ephemeral-only` - **Live resource-update notifications** — MCP clients can `resources/subscribe` to any `hyper://...` URI; the server fires `notifications/resources/updated` after every ingest, DDL, watcher event, or saved-query mutation --- @@ -132,12 +132,12 @@ Or if you built from source: } ``` -For a **persistent workspace** (tables survive across sessions), add `"args"`: +By default, persistent storage lives at the platform data dir (`~/Library/Application Support/hyperdb/workspace.hyper` on macOS, `~/.local/share/hyperdb/workspace.hyper` on Linux, `%APPDATA%\hyperdb\workspace.hyper` on Windows). To use a custom path: ```json -"args": ["--workspace", "/path/to/my-project.hyper"] +"args": ["--persistent-db", "/path/to/my-project.hyper"] ``` -Multiple MCP clients can point at the **same** persistent workspace simultaneously — they all connect through the shared `hyperd` daemon and use Hyper's MVCC transaction isolation. See [Operating Modes](#operating-modes) below. +Multiple MCP clients can point at the **same** persistent file simultaneously — they all connect through the shared `hyperd` daemon and use Hyper's MVCC transaction isolation. See [Operating Modes](#operating-modes) below. #### Claude Code / AI Suite @@ -163,7 +163,7 @@ Any tool that supports the MCP stdio transport can use this server. Point it at ## Operating Modes -The server has two independent mode dimensions: **how the Hyper engine is run** and **where the database is stored**. +Each session has **two databases**: an ephemeral primary (always created fresh per session, deleted on exit) and a persistent attachment (stored at the platform-default location or a path you supply). Unqualified SQL targets the ephemeral primary; the persistent attachment is reachable as the `"persistent"` alias and survives across sessions. ### Hyper engine @@ -174,14 +174,58 @@ The server has two independent mode dimensions: **how the Hyper engine is run** The shared daemon is the bigger win for users running multiple AI clients (Claude Code + Cursor + VS Code) — they all share one Hyper engine instead of spawning three. -### Database persistence +### Database storage | Mode | Flag | Behavior | |---|---|---| -| **Ephemeral** *(default)* | *(none)* | A temp `.hyper` file is created per session and deleted on exit (DETACH + delete in daemon mode, Windows-safe). | -| **Persistent** | `--workspace ` | Uses the supplied `.hyper` file; survives across sessions. Multiple clients can point at the same path simultaneously. | +| **Default** | *(none)* | Ephemeral primary in `$TMPDIR/hyperdb-mcp--/scratch.hyper` + persistent attachment at the platform data dir (e.g. `~/Library/Application Support/hyperdb/workspace.hyper` on macOS). | +| **Custom persistent path** | `--persistent-db ` | Same as default but the persistent file lives at ``. The deprecated `--workspace ` is accepted as an alias with a stderr warning. | +| **Ephemeral-only** | `--ephemeral-only` | No persistent attachment; the session has only the ephemeral primary plus any user-attached databases via `attach_database`. Saved queries fall back to in-memory storage and disappear when the session ends. | -The two dimensions are orthogonal — any combination works. With the default (shared daemon + ephemeral), every client gets its own scratch database living inside the same shared engine; with `--workspace` added, multiple clients can collaborate on the same persistent dataset. +`HYPERDB_PERSISTENT_DB` overrides the default persistent path the same way `--persistent-db` does. + +### Working with both databases + +Tool calls default to the ephemeral primary — that's the LLM's scratch space for exploratory work. To target the persistent attachment from SQL, use a fully-qualified table reference: + +```sql +-- Read from persistent +SELECT * FROM "persistent"."public"."customers"; + +-- Write to persistent +CREATE TABLE "persistent"."public"."revenue_2026" AS + SELECT region, SUM(amount) FROM scratch_orders GROUP BY region; +``` + +The `_table_catalog` (which tracks MCP-managed metadata for your tables) lives in the persistent attachment automatically — there's nothing to manage. If you want a pristine `.hyper` file for export with no MCP bookkeeping, run `DROP TABLE "persistent"."public"."_table_catalog"` once and subsequent sessions opening that file will leave it dropped. + +### Daemon management + +The daemon is normally invisible — it auto-spawns and idle-times-out on its own. For diagnostics: + +```bash +hyperdb-mcp daemon status # Show running daemon (PID, endpoint, started_at, version) +hyperdb-mcp daemon stop # Gracefully shut down the daemon +hyperdb-mcp daemon # Run as a daemon explicitly (rarely needed) +``` + +State files live at `~/.hyperdb/` by default (override with `HYPERDB_STATE_DIR`). + +### Recovery from hyperd crashes + +The daemon polls `hyperd` every 5 seconds. If the process has exited (crashed, OOM, killed), the daemon spawns a replacement, atomically updates `~/.hyperdb/daemon.json` with the new endpoint, and continues serving clients. Clients see one failed tool call (the request that was in flight when hyperd died); the next tool call transparently reconnects to the new hyperd via the same recovery path used for normal connection drops. + +If a client itself notices hyperd is unreachable before the next polling tick, it sends a fast-path `REPORT_HYPERD_ERROR` signal to the daemon so the restart kicks off without waiting for the timer. + +If hyperd repeatedly fails to start (3 attempts within 60 seconds — e.g., misconfigured `HYPERD_PATH`, port exhaustion, broken binary), the daemon shuts itself down and removes the discovery file. The next MCP client to start up will then spawn a fresh daemon, surfacing any persistent failure clearly to the user rather than spinning silently. + +**Known limitation:** if hyperd hangs (alive at the OS level but unresponsive to queries), the daemon's polling can't detect it and your tool call may stall indefinitely. The recovery path is `hyperdb-mcp daemon stop` followed by reconnecting from your MCP client. + +### Other behavioral flags + +| Flag | Behavior | +|---|---| +| `--read-only` | Disables `execute`, `load_data`, `load_file`, `watch_directory`, `save_query`, `delete_query`, and Hyper-format export. See [Read-Only Mode](#read-only-mode). | ### Daemon management @@ -210,7 +254,6 @@ If hyperd repeatedly fails to start (3 attempts within 60 seconds — e.g., misc | Flag | Behavior | |---|---| | `--read-only` | Disables `execute`, `load_data`, `load_file`, `watch_directory`, `save_query`, `delete_query`, and Hyper-format export. See [Read-Only Mode](#read-only-mode). | -| `--bare` | Skips MCP-managed auxiliary tables (`_table_catalog`); saved queries are kept in-memory only, even with `--workspace`. | --- @@ -398,10 +441,10 @@ Each saved query produces **two** resources: - `hyper://queries/{name}/result` — re-runs the SQL on every read and returns `{ name, result: [...], stats: {...} }`. -**Persistence:** queries saved while `--workspace ` is set are -stored in the `_hyperdb_saved_queries` meta-table inside the `.hyper` -file and survive server restarts. In ephemeral workspaces they live only -for the lifetime of the server process. +**Persistence:** saved queries land in the persistent attachment's +`_hyperdb_saved_queries` meta-table (`"persistent"."public"."_hyperdb_saved_queries"`) +and survive server restarts. In `--ephemeral-only` sessions they live +only for the lifetime of the server process. #### `save_query` @@ -588,7 +631,7 @@ Four guided analytical workflows registered as MCP **Prompts**. ## Read-Only Mode ```bash -hyperdb-mcp --workspace ~/analytics.hyper --read-only +hyperdb-mcp --persistent-db ~/analytics.hyper --read-only ``` - **Allowed:** `query`, `query_data`, `query_file`, `describe`, `sample`, `inspect_file`, `status`, `export` @@ -697,23 +740,34 @@ Full reference: [Data Cloud SQL Reference](https://developer.salesforce.com/docs hyperdb-mcp [OPTIONS] [COMMAND] Commands: - daemon Run as a background daemon managing a shared hyperd process + daemon Run as a background daemon managing a shared hyperd process Options: - --workspace Path to the `.hyper` workspace file for persistent mode (omit for ephemeral) - --read-only Disable mutating tools (execute, load_data, load_file, save_query, delete_query, watch_directory) - --bare Skip MCP-managed auxiliary tables (`_table_catalog`) and force saved queries into in-memory storage, even with --workspace - --no-daemon Disable the shared daemon and spawn a private hyperd (legacy per-session behavior) + --persistent-db Path to the persistent .hyper file. Defaults to the platform + data dir (~/Library/Application Support/hyperdb/workspace.hyper + on macOS, ~/.local/share/hyperdb/workspace.hyper on Linux, + %APPDATA%\hyperdb\workspace.hyper on Windows). Override via + the HYPERDB_PERSISTENT_DB env var. + --ephemeral-only Skip the persistent attachment entirely. Disables save_query + persistence (queries fall back to session storage). + --read-only Disable mutating tools (execute, load_data, load_file, + save_query, delete_query, watch_directory) + --no-daemon Disable the shared daemon and spawn a private hyperd + +Deprecated: + --workspace Old name for --persistent-db. Still accepted, emits a + stderr warning, and will be removed in a future release. Daemon subcommand: - hyperdb-mcp daemon Start the daemon (usually auto-spawned) - hyperdb-mcp daemon stop Gracefully stop the running daemon - hyperdb-mcp daemon status Show running daemon info - hyperdb-mcp daemon --port Override the health/lock port (default 7484) - hyperdb-mcp daemon --idle-timeout Override idle timeout (default 1800 = 30 min) + hyperdb-mcp daemon Start the daemon (usually auto-spawned) + hyperdb-mcp daemon stop Gracefully stop the running daemon + hyperdb-mcp daemon status Show running daemon info + hyperdb-mcp daemon --port Override the health/lock port (default 7484) + hyperdb-mcp daemon --idle-timeout Override idle timeout (default 1800 = 30 min) Environment: HYPERD_PATH Path to hyperd binary (auto-detected if on PATH) + HYPERDB_PERSISTENT_DB Override the default persistent-db path HYPERDB_STATE_DIR Override daemon state directory (default ~/.hyperdb/) HYPERDB_DAEMON_PORT Override daemon health/lock port (default 7484) HYPERDB_DAEMON_IDLE_TIMEOUT Override daemon idle timeout in seconds (default 1800) From 3a23ba5acb0fd4c2dc70838cd76726d640d62969 Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 04:20:47 -0700 Subject: [PATCH 8/9] perf(mcp): cache _table_catalog presence on the engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit table_catalog::table_present previously ran a fresh "persistent".pg_catalog.pg_tables probe on every catalog read/write (get, list, delete_for, set_metadata, upsert_stub via ensure_exists). For workloads that touch the catalog frequently — every ingest, every DDL — that's one round-trip per call after the catalog has clearly been created. Add a catalog_present_cache: Mutex> on Engine. The cache: - Lives for the engine's lifetime, which is the right TTL: a ConnectionLost reconnect builds a fresh Engine, naturally resetting the cache. - Short-circuits to Ok(false) in --ephemeral-only mode without running the probe at all. - Gets primed to Some(true) by ensure_exists immediately after CREATE TABLE IF NOT EXISTS so the next catalog op skips the probe. - Returns the cached value on every subsequent call until the engine is dropped. Two new tests in two_db_model_tests verify the cache: - catalog_presence_probe_is_cached: probe runs once, mark_catalog_present flips to true without running the probe again. - catalog_presence_short_circuits_in_ephemeral_only: probe never runs when has_persistent() is false. --- hyperdb-mcp/src/engine.rs | 58 ++++++++++++++++++++++++ hyperdb-mcp/src/table_catalog.rs | 29 +++++++----- hyperdb-mcp/tests/two_db_model_tests.rs | 59 +++++++++++++++++++++---- 3 files changed, 127 insertions(+), 19 deletions(-) diff --git a/hyperdb-mcp/src/engine.rs b/hyperdb-mcp/src/engine.rs index 2a62b85..9a0618b 100644 --- a/hyperdb-mcp/src/engine.rs +++ b/hyperdb-mcp/src/engine.rs @@ -168,6 +168,17 @@ pub struct Engine { /// to `false` after the server consumes it via /// [`Self::take_persistent_was_created`]. persistent_was_created: bool, + /// Cached "_table_catalog exists in persistent" probe. Populated on + /// first call to [`Self::catalog_present_in_persistent`] and held + /// for the lifetime of the connection. Avoids repeated + /// `pg_catalog.pg_tables` round-trips on every catalog read/write. + /// Lives on the Engine because the catalog is per-engine-lifetime + /// (a `ConnectionLost` reconnect creates a fresh Engine, so the + /// cache resets at the right boundary). `None` means "not yet + /// probed"; `Some(false)` is cacheable too — once the catalog is + /// confirmed absent on a read-only or `--ephemeral-only` flow it + /// stays absent for the whole engine lifetime. + catalog_present_cache: std::sync::Mutex>, log_dir: PathBuf, } @@ -301,6 +312,7 @@ impl Engine { ephemeral_path, persistent_path, persistent_was_created, + catalog_present_cache: std::sync::Mutex::new(None), log_dir, }) } @@ -375,6 +387,7 @@ impl Engine { ephemeral_path: ephemeral_path.to_path_buf(), persistent_path, persistent_was_created, + catalog_present_cache: std::sync::Mutex::new(None), log_dir: log_dir.to_path_buf(), })) } @@ -533,6 +546,51 @@ impl Engine { self.persistent_was_created } + /// Returns whether `_table_catalog` exists in the persistent + /// attachment, caching the result on first call so subsequent + /// catalog read/write paths skip the `pg_catalog.pg_tables` probe. + /// + /// `prober` is the SQL-side existence check; the cache layer here + /// is intentionally generic so the catalog module can keep its + /// existing probe SQL in one place. + /// + /// Returns `Ok(false)` immediately when the engine has no + /// persistent attachment (no probe needed). + /// + /// # Errors + /// Propagates whatever error `prober` returns on the first call. + /// On subsequent calls, the cached value is returned without + /// re-running the probe. + pub fn catalog_present_in_persistent(&self, prober: F) -> Result + where + F: Fn(&Engine) -> Result, + { + if !self.has_persistent() { + return Ok(false); + } + // Fast path: cache already populated. + if let Ok(guard) = self.catalog_present_cache.lock() { + if let Some(present) = *guard { + return Ok(present); + } + } + // Slow path: run the probe and cache its result. + let present = prober(self)?; + if let Ok(mut guard) = self.catalog_present_cache.lock() { + *guard = Some(present); + } + Ok(present) + } + + /// Synchronously set the catalog-presence cache to `true` — used by + /// `table_catalog::ensure_exists` after a `CREATE TABLE IF NOT + /// EXISTS` so subsequent reads/writes skip the existence probe. + pub fn mark_catalog_present(&self) { + if let Ok(mut guard) = self.catalog_present_cache.lock() { + *guard = Some(true); + } + } + /// Direct access to the underlying connection for operations not /// wrapped by `Engine` (e.g. `export_csv`, `execute_query_to_arrow`). pub fn connection(&self) -> &Connection { diff --git a/hyperdb-mcp/src/table_catalog.rs b/hyperdb-mcp/src/table_catalog.rs index 30b3846..b09e4f9 100644 --- a/hyperdb-mcp/src/table_catalog.rs +++ b/hyperdb-mcp/src/table_catalog.rs @@ -176,6 +176,9 @@ pub fn ensure_exists(engine: &Engine) -> Result<(), McpError> { }; let ddl = format!("CREATE TABLE IF NOT EXISTS {qualified} {CATALOG_COLUMNS}"); engine.execute_command(&ddl)?; + // After CREATE TABLE IF NOT EXISTS the catalog is guaranteed + // present — short-circuit subsequent existence probes. + engine.mark_catalog_present(); Ok(()) } @@ -538,18 +541,22 @@ fn user_tables(engine: &Engine) -> Result, McpError> { /// `true` when `_table_catalog` is present inside the persistent /// attachment. Returns `false` if there is no persistent attachment. +/// +/// Caches the result on the engine after the first probe — subsequent +/// reads/writes don't re-run the existence query. Callers that mutate +/// the catalog (`ensure_exists`) update the cache directly via +/// [`Engine::mark_catalog_present`]. fn table_present(engine: &Engine) -> Result { - if !engine.has_persistent() { - return Ok(false); - } - let sql = format!( - "SELECT tablename FROM \"{}\".pg_catalog.pg_tables \ - WHERE schemaname = 'public' AND tablename = {}", - Engine::PERSISTENT_ALIAS, - sql_literal(TABLE_CATALOG_TABLE) - ); - let rows = engine.execute_query_to_json(&sql)?; - Ok(!rows.is_empty()) + engine.catalog_present_in_persistent(|engine| { + let sql = format!( + "SELECT tablename FROM \"{}\".pg_catalog.pg_tables \ + WHERE schemaname = 'public' AND tablename = {}", + Engine::PERSISTENT_ALIAS, + sql_literal(TABLE_CATALOG_TABLE) + ); + let rows = engine.execute_query_to_json(&sql)?; + Ok(!rows.is_empty()) + }) } /// Return `COUNT(*)` for a user table inside the persistent attachment. diff --git a/hyperdb-mcp/tests/two_db_model_tests.rs b/hyperdb-mcp/tests/two_db_model_tests.rs index 0d886d2..7dec4f7 100644 --- a/hyperdb-mcp/tests/two_db_model_tests.rs +++ b/hyperdb-mcp/tests/two_db_model_tests.rs @@ -72,9 +72,7 @@ fn persistent_writes_survive_engine_recreate() { { let engine = Engine::new(Some(path_str.clone())).unwrap(); engine - .execute_command( - "CREATE TABLE \"persistent\".\"public\".\"keepers\" (n INT)", - ) + .execute_command("CREATE TABLE \"persistent\".\"public\".\"keepers\" (n INT)") .unwrap(); engine .execute_command( @@ -85,9 +83,7 @@ fn persistent_writes_survive_engine_recreate() { let engine = Engine::new(Some(path_str)).unwrap(); let rows = engine - .execute_query_to_json( - "SELECT n FROM \"persistent\".\"public\".\"keepers\" ORDER BY n", - ) + .execute_query_to_json("SELECT n FROM \"persistent\".\"public\".\"keepers\" ORDER BY n") .unwrap(); assert_eq!(rows.len(), 3); assert_eq!(rows[0]["n"], 1); @@ -120,7 +116,9 @@ fn ephemeral_writes_are_discarded_on_drop() { // own connection (ephemeral) and through persistent — neither should // see it. let rows = engine - .execute_query_to_json("SELECT tablename FROM pg_catalog.pg_tables WHERE tablename = 'scratch'") + .execute_query_to_json( + "SELECT tablename FROM pg_catalog.pg_tables WHERE tablename = 'scratch'", + ) .unwrap_or_default(); assert!( rows.is_empty(), @@ -136,7 +134,9 @@ fn resolve_target_db_handles_persistent_presence() { let path = dir.path().join("ws.hyper"); let with_persistent = Engine::new(Some(path.to_str().unwrap().into())).unwrap(); assert_eq!( - with_persistent.resolve_target_db(Some("persistent")).unwrap(), + with_persistent + .resolve_target_db(Some("persistent")) + .unwrap(), "persistent" ); assert_eq!( @@ -177,3 +177,46 @@ fn engine_status_ephemeral_only_reports_no_persistent() { assert!(status["ephemeral_path"].is_string()); assert!(status["persistent_path"].is_null()); } + +/// `catalog_present_in_persistent` caches the probe result so the +/// underlying SQL only runs once per engine lifetime. +#[test] +fn catalog_presence_probe_is_cached() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("ws.hyper"); + let engine = Engine::new(Some(path.to_str().unwrap().into())).unwrap(); + + let probe_count = std::sync::atomic::AtomicUsize::new(0); + let probe = |_e: &Engine| -> Result { + probe_count.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + Ok(false) + }; + + // First call runs the probe. + assert!(!engine.catalog_present_in_persistent(probe).unwrap()); + assert_eq!(probe_count.load(std::sync::atomic::Ordering::SeqCst), 1); + + // Second call uses the cache; probe count stays at 1. + assert!(!engine.catalog_present_in_persistent(probe).unwrap()); + assert_eq!(probe_count.load(std::sync::atomic::Ordering::SeqCst), 1); + + // After mark_catalog_present, the cached value flips to true and + // the probe stays untouched. + engine.mark_catalog_present(); + assert!(engine.catalog_present_in_persistent(probe).unwrap()); + assert_eq!(probe_count.load(std::sync::atomic::Ordering::SeqCst), 1); +} + +/// In ephemeral-only mode, the cache short-circuits to `false` without +/// running the probe at all. +#[test] +fn catalog_presence_short_circuits_in_ephemeral_only() { + let engine = Engine::new(None).unwrap(); + let probe_count = std::sync::atomic::AtomicUsize::new(0); + let probe = |_e: &Engine| -> Result { + probe_count.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + Ok(true) + }; + assert!(!engine.catalog_present_in_persistent(probe).unwrap()); + assert_eq!(probe_count.load(std::sync::atomic::Ordering::SeqCst), 0); +} From e0f5f1dc32d81aeb887e76c3a410422848ef6335 Mon Sep 17 00:00:00 2001 From: Stefan Steiner Date: Mon, 25 May 2026 04:31:34 -0700 Subject: [PATCH 9/9] fix(mcp): watchers auto-recover after hyperd restart When the daemon restarts hyperd, every connection in a watcher's pool becomes invalid. Previously the watcher would route every subsequent file to 'failed/' until the user noticed and re-issued watch_directory. Now: - The watcher's pool lives behind Arc>> so it can be swapped atomically. - Each per-file ingest reads the current pool from the slot, calls ingest_one_ready_file, and inspects the result. - If the result is a connection-lost error (per is_connection_lost), rebuild_watcher_pool builds a fresh Pool from the engine's *current* endpoint and the ingest retries exactly once on the new pool. - Persistent failures (the retry also fails) fall through to the existing 'failed/' move logic so a permanently-broken file doesn't keep the watcher pinned in retry loops. - The initial-sweep path uses the same recovery wrapper so a watcher registered just after a hyperd hiccup still ingests the backlog. Internal refactor: - Pure ingest path extracted into ingest_one_ready_file (returns Result with no file-system side effects). - process_ready_with_recovery wraps it: handles symlink rejection, the retry-on-connection-lost loop, and the success/failure file moves. The old process_ready_async is gone in favor of these two. The DEVELOPMENT.md known-limitation entry for watchers is updated to reflect the new behavior; CHANGELOG gets a Fixed entry. --- hyperdb-mcp/CHANGELOG.md | 16 +++ hyperdb-mcp/DEVELOPMENT.md | 2 +- hyperdb-mcp/src/watcher.rs | 214 ++++++++++++++++++++++++++----------- 3 files changed, 168 insertions(+), 64 deletions(-) diff --git a/hyperdb-mcp/CHANGELOG.md b/hyperdb-mcp/CHANGELOG.md index d4f833a..3df2816 100644 --- a/hyperdb-mcp/CHANGELOG.md +++ b/hyperdb-mcp/CHANGELOG.md @@ -59,6 +59,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/). who want a pristine `.hyper` for export can `DROP TABLE _table_catalog` once after creation; subsequent opens won't recreate it. +### Performance + +- **Per-engine `_table_catalog` presence cache.** Catalog reads/writes + used to round-trip a `pg_catalog.pg_tables` probe on every call; + now the existence check is cached for the engine's lifetime and + primed immediately after `CREATE TABLE IF NOT EXISTS`. + +### Fixed + +- **Watcher recovery after hyperd restart.** The watcher's connection + pool now auto-rebuilds when a per-file ingest hits a connection-lost + error (typically after the daemon restarts hyperd). Each ingest gets + one retry on a fresh pool; persistent failures still flow into the + standard `failed/` move so a single broken file can't pin the + watcher in retry loops. + ## [0.1.1] - 2026-05-13 ### Added diff --git a/hyperdb-mcp/DEVELOPMENT.md b/hyperdb-mcp/DEVELOPMENT.md index 7f1f142..ea1f494 100644 --- a/hyperdb-mcp/DEVELOPMENT.md +++ b/hyperdb-mcp/DEVELOPMENT.md @@ -243,7 +243,7 @@ Two new code paths fire `report_hyperd_error_to_daemon` (best-effort, 200ms time ### Known limitations - **Hung-but-alive `hyperd`** (TCP listening, but unresponsive to queries) is NOT detected. The monitor's `try_wait()` returns `None` for a hung process; client tool calls hang on the read side without producing a `ConnectionLost` error. Operator recovery is `hyperdb-mcp daemon stop` followed by reconnect. -- **Watchers (background tasks holding `AsyncConnection` handles in `WatcherRegistry`)** do not currently auto-reconnect after a hyperd restart. They will go quiet until the user re-issues `watch_directory`. +- **Watchers** auto-recover from hyperd restarts: when an ingest fails with a connection-lost error, the watcher rebuilds its connection pool against the engine's current endpoint and retries the file once. Persistent failures (the second attempt also fails) fall through to the standard `failed/` move so a single broken file can't keep the watcher pinned in retry loops. See `src/daemon/{mod,discovery,health,run,spawn}.rs` for the full implementation. diff --git a/hyperdb-mcp/src/watcher.rs b/hyperdb-mcp/src/watcher.rs index 226f4fc..c8ec31b 100644 --- a/hyperdb-mcp/src/watcher.rs +++ b/hyperdb-mcp/src/watcher.rs @@ -71,6 +71,53 @@ use tokio::task::JoinHandle; /// this to the full data file name (e.g. `orders.csv` → `orders.csv.ready`). pub const READY_SUFFIX: &str = ".ready"; +/// Build a watcher connection pool from the current engine. Pulled out +/// of [`start_watching`] so the recovery path (post hyperd restart) +/// can call it again to swap in a fresh pool. +fn build_watcher_pool( + engine: &Arc>>, + concurrency: usize, +) -> Result, McpError> { + let guard = engine + .lock() + .map_err(|_| McpError::new(ErrorCode::InternalError, "Engine lock poisoned"))?; + let eng = guard.as_ref().ok_or_else(|| { + McpError::new( + ErrorCode::InternalError, + "Engine not initialized when watcher pool requested", + ) + })?; + let endpoint = eng.hyperd_endpoint()?; + // Watcher pool connects to the engine's primary (ephemeral). The + // user-supplied table on `watch_directory` always lives there + // unless a future flag routes it to the persistent attachment. + let workspace = eng.ephemeral_path().to_string_lossy().to_string(); + let cfg = PoolConfig::new(endpoint, workspace) + .create_mode(CreateMode::DoNotCreate) + .max_size(concurrency); + Ok(Arc::new(create_pool(cfg).map_err(|e| { + McpError::new( + ErrorCode::InternalError, + format!("Failed to build watcher pool: {e}"), + ) + })?)) +} + +/// Replace the watcher's pool atomically with a freshly-built one. +/// Called from the ingest path when a connection-lost error suggests +/// the underlying hyperd has been replaced (daemon restart, manual +/// `hyperd kill`). +async fn rebuild_watcher_pool( + pool_slot: &tokio::sync::RwLock>, + engine: &Arc>>, + concurrency: usize, +) -> Result<(), McpError> { + let new_pool = build_watcher_pool(engine, concurrency)?; + let mut guard = pool_slot.write().await; + *guard = new_pool; + Ok(()) +} + /// Default ceiling on parallel ingests per watcher. Chosen conservatively — /// each parallel ingest holds one open TCP connection to hyperd plus a /// transaction, and most workloads have fewer than 4 incoming streams at a @@ -146,9 +193,12 @@ pub struct WatcherHandle { /// tokio mpsc channel. Joined on drop after the notify watcher is /// dropped (which closes the std sender and lets this thread exit). forwarder: Option>, - /// Per-watcher connection pool. Kept here so it's torn down when the - /// handle is dropped — all outstanding connections close. - _pool: Arc, + /// Per-watcher connection pool. Wrapped in a tokio `RwLock` so the + /// recovery path can swap in a fresh pool after a hyperd restart + /// without disturbing in-flight ingests on the old pool. Kept here + /// so the pool is torn down (all connections close) when the handle + /// is dropped. + _pool: Arc>>, } impl Drop for WatcherHandle { @@ -325,31 +375,13 @@ pub fn start_watching( // from the engine under a brief lock, then release it — the pool // itself operates independently of the sync connection the engine // still owns. - let pool = { - let guard = engine - .lock() - .map_err(|_| McpError::new(ErrorCode::InternalError, "Engine lock poisoned"))?; - let eng = guard.as_ref().ok_or_else(|| { - McpError::new( - ErrorCode::InternalError, - "Engine not initialized when watcher started", - ) - })?; - let endpoint = eng.hyperd_endpoint()?; - // Watcher pool connects to the engine's primary (ephemeral). The - // user-supplied table on `watch_directory` always lives there - // unless a future flag routes it to the persistent attachment. - let workspace = eng.ephemeral_path().to_string_lossy().to_string(); - let cfg = PoolConfig::new(endpoint, workspace) - .create_mode(CreateMode::DoNotCreate) - .max_size(concurrency); - Arc::new(create_pool(cfg).map_err(|e| { - McpError::new( - ErrorCode::InternalError, - format!("Failed to build watcher pool: {e}"), - ) - })?) - }; + // Build the pool wrapped in an Arc> so post-construction + // hyperd restarts can swap in a fresh pool without restarting the + // watcher. + let pool = Arc::new(tokio::sync::RwLock::new(build_watcher_pool( + &engine, + concurrency, + )?)); let stats = Arc::new(Mutex::new(WatcherStats { // Concurrency is configured by the user via a `u32`-sized field @@ -388,8 +420,10 @@ pub fn start_watching( tokio::task::block_in_place(|| { rt.block_on(async { for ready_path in scan_ready_files(&canonical) { - process_ready_async( + process_ready_with_recovery( &pool, + &engine, + concurrency, subscriptions.as_deref(), &canonical, &table, @@ -457,6 +491,7 @@ pub fn start_watching( // the consumer's point of view, with stats updated via shared Mutex. let task = { let pool = Arc::clone(&pool); + let engine_for_pool = Arc::clone(&engine); let subs = subscriptions.clone(); let stats = Arc::clone(&stats); let in_flight = Arc::clone(&in_flight); @@ -485,7 +520,8 @@ pub fn start_watching( if !claimed { continue; } - let pool = Arc::clone(&pool); + let pool_slot = Arc::clone(&pool); + let engine_handle = Arc::clone(&engine_for_pool); let subs = subs.clone(); let stats = Arc::clone(&stats); let in_flight = Arc::clone(&in_flight); @@ -494,8 +530,17 @@ pub fn start_watching( let table = table.clone(); tokio::spawn(async move { let _guard = InFlightGuard::new(in_flight); - process_ready_async(&pool, subs.as_deref(), &dir, &table, &path, &stats) - .await; + process_ready_with_recovery( + &pool_slot, + &engine_handle, + concurrency, + subs.as_deref(), + &dir, + &table, + &path, + &stats, + ) + .await; if let Ok(mut set) = in_flight_paths.lock() { set.remove(&path); } @@ -608,8 +653,51 @@ fn strip_ready_suffix(ready_path: &Path) -> Option { /// ingest (including the `BEGIN / COMMIT`) and released on scope exit /// via the `PooledConnection` Drop. Other ingest tasks run in parallel /// on their own connections, up to the pool's `max_size`. -async fn process_ready_async( +/// Ingest one ready file and return `Ok(rows)` on success, or the +/// underlying error. Pure ingest path — no file moves, no stats writes. +/// The wrapper layer ([`process_ready_with_recovery`]) handles those +/// after deciding whether the error is recoverable. +async fn ingest_one_ready_file( pool: &Arc, + table: &str, + ready_path: &Path, + data_path: &Path, +) -> Result { + let conn = pool.get().await.map_err(|e| { + McpError::new( + ErrorCode::InternalError, + format!("Failed to check out connection: {e}"), + ) + })?; + let opts = IngestOptions { + table: table.to_string(), + mode: "append".into(), + schema_override: None, + merge_key: None, + }; + let data_str = data_path + .to_str() + .ok_or_else(|| McpError::new(ErrorCode::InternalError, "Non-UTF-8 path"))?; + let res = match detect_file_format(data_path) { + InferredFileFormat::Parquet => ingest_parquet_file_async(&conn, data_str, &opts).await, + InferredFileFormat::ArrowIpc => ingest_arrow_ipc_file_async(&conn, data_str, &opts).await, + InferredFileFormat::Json => ingest_json_file_async(&conn, data_str, &opts).await, + InferredFileFormat::Csv => ingest_csv_file_async(&conn, data_str, &opts).await, + }?; + let _ = ready_path; // silence the unused-variable lint; the path is used by the caller + Ok(res.rows) +} + +/// Ingest one ready file with one-shot pool-rebuild recovery. If the +/// first attempt fails with a connection-lost error (hyperd was +/// restarted by the daemon, or the pool's connections went stale), the +/// watcher rebuilds its pool from the engine's *current* endpoint and +/// retries the ingest exactly once. Persistent errors fall through to +/// the standard `failed/` move. +async fn process_ready_with_recovery( + pool_slot: &tokio::sync::RwLock>, + engine: &Arc>>, + concurrency: usize, subscriptions: Option<&SubscriptionRegistry>, dir: &Path, table: &str, @@ -624,10 +712,6 @@ async fn process_ready_async( if !ready_path.exists() || !data_path.exists() { return; } - // Reject symlinks for both the sentinel and the data file. A symlink swap - // between this check and the open could redirect ingest to read sensitive - // files (e.g. /etc/passwd) into the table. Producers writing the sentinel - // and data file as regular files (the documented protocol) are unaffected. let is_symlink = |p: &std::path::Path| { p.symlink_metadata() .is_ok_and(|m| m.file_type().is_symlink()) @@ -641,34 +725,38 @@ async fn process_ready_async( return; } - let result = async { - let conn = pool.get().await.map_err(|e| { - McpError::new( - ErrorCode::InternalError, - format!("Failed to check out connection: {e}"), - ) - })?; - - let opts = IngestOptions { - table: table.to_string(), - mode: "append".into(), - schema_override: None, - merge_key: None, - }; - let data_str = data_path - .to_str() - .ok_or_else(|| McpError::new(ErrorCode::InternalError, "Non-UTF-8 path"))?; - let res = match detect_file_format(&data_path) { - InferredFileFormat::Parquet => ingest_parquet_file_async(&conn, data_str, &opts).await, - InferredFileFormat::ArrowIpc => { - ingest_arrow_ipc_file_async(&conn, data_str, &opts).await + // First attempt — uses whatever pool is currently in the slot. + let active_pool = pool_slot.read().await.clone(); + let mut result = ingest_one_ready_file(&active_pool, table, ready_path, &data_path).await; + drop(active_pool); + + // If the first attempt failed with what looks like a connection + // loss, try rebuilding the pool once and retrying. Hyperd restarts + // (daemon-managed) reuse the same endpoint slot but invalidate + // every connection in the pool; without this branch the watcher + // would route every subsequent file to `failed/` until the user + // notices and re-issues `watch_directory`. + if let Err(ref err) = result { + if crate::error::is_connection_lost(&err.message) { + tracing::warn!( + err = %err.message, + "watcher: detected connection-lost error, rebuilding pool and retrying" + ); + match rebuild_watcher_pool(pool_slot, engine, concurrency).await { + Ok(()) => { + let active_pool = pool_slot.read().await.clone(); + result = + ingest_one_ready_file(&active_pool, table, ready_path, &data_path).await; + } + Err(e) => { + tracing::warn!( + err = %e.message, + "watcher: pool rebuild failed; the original ingest error will surface" + ); + } } - InferredFileFormat::Json => ingest_json_file_async(&conn, data_str, &opts).await, - InferredFileFormat::Csv => ingest_csv_file_async(&conn, data_str, &opts).await, - }?; - Ok::(res.rows) + } } - .await; match result { Ok(rows) => {