diff --git a/.changeset/config.json b/.changeset/config.json index fada7ba00..9dd9d62fc 100644 --- a/.changeset/config.json +++ b/.changeset/config.json @@ -50,6 +50,7 @@ "@objectstack/service-cache", "@objectstack/service-cluster", "@objectstack/service-cluster-redis", + "@objectstack/service-external-datasource", "@objectstack/service-feed", "@objectstack/service-i18n", "@objectstack/service-job", diff --git a/.changeset/external-datasource-federation-p1.md b/.changeset/external-datasource-federation-p1.md new file mode 100644 index 000000000..2f094ae56 --- /dev/null +++ b/.changeset/external-datasource-federation-p1.md @@ -0,0 +1,21 @@ +--- +"@objectstack/spec": minor +"@objectstack/driver-sql": minor +--- + +External Datasource Federation (ADR-0015) — Phase 1. + +Adds the spec foundation and the DDL gate for federating mature external +databases without ObjectStack ever mutating their schema: + +- `Datasource.schemaMode` (`managed` | `external` | `validate-only`) and + `Datasource.external` settings, with a cross-field invariant. +- `Object.external` binding (remote table/schema, writability, column map). +- Shared error contract: `ExternalSchemaMismatchError`, + `ExternalWriteForbiddenError`, `ExternalSchemaModeViolationError` + (stable `code`s) + structured `SchemaDiffEntry` rendering. +- `driver-sql` DDL gate: schema-mutating DDL (`initObjects`/`syncSchema`/ + `dropTable`) is rejected when `schemaMode !== 'managed'`. + +All changes are additive and backward-compatible (`schemaMode` defaults to +`'managed'`). diff --git a/.changeset/external-datasource-federation-p2-cli.md b/.changeset/external-datasource-federation-p2-cli.md new file mode 100644 index 000000000..b4fc3a5a3 --- /dev/null +++ b/.changeset/external-datasource-federation-p2-cli.md @@ -0,0 +1,11 @@ +--- +"@objectstack/cli": minor +--- + +External Datasource Federation (ADR-0015) — CLI surface. + +New `os datasource` command group: `list-tables` (list remote tables), +`introspect` (generate a reviewable `*.object.ts` draft from a remote table), +and `validate` (validate federated objects against the remote schema; exits +non-zero on mismatch). Backed by the `/api/v1/datasources/:name/external/*` +REST routes. diff --git a/.changeset/external-datasource-federation-p2-rest.md b/.changeset/external-datasource-federation-p2-rest.md new file mode 100644 index 000000000..68c8aa5a7 --- /dev/null +++ b/.changeset/external-datasource-federation-p2-rest.md @@ -0,0 +1,11 @@ +--- +"@objectstack/rest": minor +--- + +External Datasource Federation (ADR-0015) — REST surface. + +Adds `registerExternalDatasourceRoutes`, mounting `/api/v1/datasources/:name/ +external/*` — `GET tables`, `POST tables/:remote/draft`, `POST refresh-catalog`, +`POST validate` — served by the `external-datasource` service and wired into the +REST API plugin. Routes return `503 external_service_unavailable` when the +service is not registered, so they are safe to mount unconditionally. diff --git a/.changeset/external-datasource-federation-p2.md b/.changeset/external-datasource-federation-p2.md new file mode 100644 index 000000000..88d4a5354 --- /dev/null +++ b/.changeset/external-datasource-federation-p2.md @@ -0,0 +1,27 @@ +--- +"@objectstack/spec": minor +"@objectstack/service-external-datasource": minor +--- + +External Datasource Federation (ADR-0015) — Phase 2 (service core). + +Adds the federation service contract, the type-compatibility matrix, and a +new service package that introspects, drafts, and validates federated +objects: + +- `@objectstack/spec`: + - `data/type-compat.ts` — dialect-aware SQL↔field-type matrix + (`canonicalizeSqlType`, `suggestFieldType`, `isCompatible`) for + postgres/mysql/sqlite/snowflake/bigquery/mongo. + - `contracts/external-datasource-service.ts` — `IExternalDatasourceService` + plus `RemoteTable`, `GenerateDraftOpts`, `ObjectDraft`, + `SchemaValidationResult`/`Report`. +- `@objectstack/service-external-datasource` (new): implements the service — + `listRemoteTables`, `generateObjectDraft` (renders a reviewable + `*.object.ts` with `// REVIEW:` markers), `validateObject`/`validateAll` + (structured `SchemaDiffEntry` diffs), and `refreshCatalog`. Decoupled from + the kernel via injected I/O; kernel plugin registers it as the + `external-datasource` service. + +REST routes and the `os datasource` CLI commands follow in a subsequent +slice. diff --git a/.changeset/external-datasource-federation-p3-boot-gate.md b/.changeset/external-datasource-federation-p3-boot-gate.md new file mode 100644 index 000000000..f8df7564c --- /dev/null +++ b/.changeset/external-datasource-federation-p3-boot-gate.md @@ -0,0 +1,12 @@ +--- +"@objectstack/runtime": minor +--- + +External Datasource Federation (ADR-0015) — boot-validation gate (Gate 2). + +Adds `ExternalValidationPlugin` (`createExternalValidationPlugin`) which, on +`kernel:ready`, validates every federated object against its remote table via +the `external-datasource` service and applies the datasource's +`external.validation.onMismatch` policy: `fail` (throws +`ExternalSchemaMismatchError`, aborting boot — the default), `warn` (logs the +diff), or `ignore`. No-op when federation is unused. diff --git a/.changeset/external-datasource-federation-p3-spec.md b/.changeset/external-datasource-federation-p3-spec.md new file mode 100644 index 000000000..3a5120b8a --- /dev/null +++ b/.changeset/external-datasource-federation-p3-spec.md @@ -0,0 +1,14 @@ +--- +"@objectstack/spec": minor +--- + +External Datasource Federation (ADR-0015) — Phase 3 spec: `external_catalog` +metadata type. + +- Registers `external_catalog` in `MetadataTypeSchema` and + `DEFAULT_METADATA_TYPE_REGISTRY` (system domain, `allowRuntimeCreate: true`, + not org-overridable). +- Adds `data/external-catalog.zod.ts` — `ExternalCatalogSchema` / + `ExternalTableSchema` / `ExternalColumnSchema` for persisting a cached + remote-schema snapshot of a federated datasource (consumed by + `refreshCatalog`, the boot-validation gate, and Studio's schema browser). diff --git a/.changeset/external-datasource-federation-p4-ai.md b/.changeset/external-datasource-federation-p4-ai.md new file mode 100644 index 000000000..178bc178f --- /dev/null +++ b/.changeset/external-datasource-federation-p4-ai.md @@ -0,0 +1,12 @@ +--- +"@objectstack/service-ai": minor +--- + +External Datasource Federation (ADR-0015) — Phase 4: AI awareness. + +`SchemaRetriever.renderSnippet` now annotates federated objects in the +auto-injected schema context, e.g. +`### wh_order — Warehouse Order [external, read-only, datasource=warehouse]`, +so the LLM knows an object comes from a customer's production database and must +not propose schema changes or unsafe writes. `ObjectShape` gains `datasource` ++ `external` (read from object metadata). Managed objects are unannotated. diff --git a/.changeset/external-datasource-federation-p6-write-gate.md b/.changeset/external-datasource-federation-p6-write-gate.md new file mode 100644 index 000000000..ea4c7c9ce --- /dev/null +++ b/.changeset/external-datasource-federation-p6-write-gate.md @@ -0,0 +1,15 @@ +--- +"@objectstack/objectql": minor +--- + +External Datasource Federation (ADR-0015) — write gate (Gate 3) + introspection plumbing. + +- Write gate: ObjectQL `insert`/`update`/`delete` now block writes to a + federated datasource (`schemaMode !== 'managed'`) unless BOTH + `datasource.external.allowWrites` and `object.external.writable` are true, + throwing `ExternalWriteForbiddenError` (code `EXTERNAL_WRITE_FORBIDDEN`). + Managed datasources (and objects without a datasource definition) are + unaffected. New `registerDatasourceDef()` records declarative datasource + ownership; manifests carrying `datasources` are indexed during `registerApp`. +- `engine.introspectDatasource(name)` delegates to the named driver's + `introspectSchema()`, wiring the external-datasource service end-to-end. diff --git a/docs/plans/external-datasource-federation-impl.md b/docs/plans/external-datasource-federation-impl.md new file mode 100644 index 000000000..9879a9907 --- /dev/null +++ b/docs/plans/external-datasource-federation-impl.md @@ -0,0 +1,147 @@ +# Plan: External Datasource Federation (ADR-0015) — Implementation + +> Implementation plan and progress tracker for +> [ADR-0015 — External Datasource Federation](../adr/0015-external-datasource-federation.md). +> The ADR is the design source of truth; this document scopes the work +> against the **current** codebase and records what has shipped. + +## Context + +ObjectStack today owns its `default` datasource and freely runs DDL. +ADR-0015 adds the ability to *federate* a mature external database +(Postgres, Snowflake, BigQuery, …) so the AI/REST/View stack can query it +live, **without ObjectStack ever mutating the remote schema**. + +The decisive design choice — a federated object stays a normal `Object`, +its remote-ness expressed by the **datasource it points to** +(`schemaMode !== 'managed'`) plus an optional **`object.external`** +sub-record — means almost the entire downstream stack (ObjectQL, REST, +Views, AI tools, RBAC, audit) works unchanged. Behavioural differences are +enforced by three runtime gates. + +## Current-state assessment (greenfield) + +A repo-wide grep confirmed **zero** prior implementation of `schemaMode`, +`object.external`, `external_catalog`, `IExternalDatasourceService`, +`type-compat`, or the three error classes. The supporting infrastructure +already exists and is reused: + +| Already present (reused) | Location | +|:--|:--| +| Driver `introspectSchema()` (dialect-aware) | `packages/plugins/driver-sql/src/sql-driver.ts` | +| Per-object datasource routing | `packages/objectql/src/engine.ts`, `Object.datasource` | +| `kernel:ready` hook pattern for plugins | `packages/runtime/src/*-plugin.ts` | +| Metadata type registry | `packages/spec/src/kernel/metadata-plugin.zod.ts` (`DEFAULT_METADATA_TYPE_REGISTRY`) | +| Error formatting helpers | `packages/spec/src/shared/error-map.zod.ts` | +| oclif CLI command groups (e.g. `data/`) | `packages/cli/src/commands/` | +| Service package template + DI | `packages/services/service-*` | + +## The three runtime gates + +| Gate | Layer | Where | Enforces | +|:--|:--|:--|:--| +| **1. DDL** | driver | `sql-driver` `initObjects`/`dropTable` (+ future `applyMigrations`) | No DDL when `schemaMode !== 'managed'`. | +| **2. Boot validation** | runtime | new `external-validation-plugin` on `kernel:ready` | Federated object must match remote table (fail/warn/ignore). | +| **3. Write** | data engine | `IDataEngine.insert/update/delete` | Writes need `datasource.external.allowWrites` **and** `object.external.writable`. | + +## Phased rollout + +| Phase | Scope | Status | +|:-----:|:--|:--| +| **P1** | Spec changes (`schemaMode`, `object.external`, error classes) + DDL gate in `driver-sql` + tests | ✅ **Done** (this branch) | +| **P2** | `IExternalDatasourceService` impl + type-compat matrix + CLI `introspect`/`validate` | 🟡 **Service core done** (matrix + contract + service); REST routes + CLI pending | +| **P3** | Boot-validation plugin in `@objectstack/runtime` + `external_catalog` metadata type + caching | ⬜ Todo | +| **P4** | `SchemaRetriever` annotation + agent prompt + AI safety nets (LIMIT injection, timeout) | ⬜ Todo | +| **P5** | Studio UI in `../objectui` (wizard, schema browser, mapping editor, validation panel) | ⬜ Todo | +| **P6** | Write gate + `allowWrites`/`writable` double opt-in + tests | ⬜ Todo | +| **P7** | Additional drivers (Snowflake / BigQuery / MySQL) | ⬜ Todo | + +**MVP = P1–P4**: connect a read-only Postgres replica, register a few +tables, let AI Data Chat query them safely. + +## P1 — delivered in this change + +Spec is additive and backward-compatible (defaults preserve current +behaviour). + +1. **`packages/spec/src/data/datasource.zod.ts`** + - `SchemaModeSchema` enum (`managed` | `external` | `validate-only`), + exported `SchemaMode` type. + - `ExternalDatasourceSettingsSchema` (label, allowedSchemas, + `allowWrites`, validation policy, `credentialsRef`, `queryTimeoutMs`, + `requirePermission`). + - `Datasource.schemaMode` (default `'managed'`) + `Datasource.external`, + with a `superRefine` enforcing the cross-field invariant (external + settings ⇔ non-managed mode). + +2. **`packages/spec/src/data/object.zod.ts`** + - `ObjectExternalBindingSchema` (remoteName, remoteSchema, `writable`, + columnMap, introspectedAt, ignoreColumns) + `Object.external`. + - The object↔datasource cross-artefact invariant is intentionally + enforced at metadata-load time (P3), not in Zod. + +3. **`packages/spec/src/shared/external-errors.ts`** (new) + - `ExternalSchemaMismatchError` / `ExternalWriteForbiddenError` / + `ExternalSchemaModeViolationError`, each with a stable `code`. + - `SchemaDiffEntry` type + pure `renderDiffMessage()` (P2/P3 consume it). + +4. **DDL gate — `packages/plugins/driver-sql/src/sql-driver.ts`** + - `SqlDriverConfig` gains an optional `schemaMode` (stripped before Knex). + - `assertSchemaMutable()` choke-point throws + `ExternalSchemaModeViolationError` when `schemaMode !== 'managed'`; + called from `initObjects` (covers `syncSchema`) and `dropTable`. + +5. **Tests** — Zod refinements (datasource modes, external settings, + object binding), error classes + diff rendering, and the DDL gate + (managed allows DDL; external/validate-only block create/alter/drop; + `schemaMode` not leaked to Knex). + +## P2 — delivered in this change (service core) + +1. **`packages/spec/src/data/type-compat.ts`** — pure, dialect-aware matrix + (`canonicalizeSqlType` → `suggestFieldType` / `isCompatible`) covering + postgres/mysql/sqlite/snowflake/bigquery/mongo. Returns `true` / `'lossy'` + / `false`. Independently unit-tested. + +2. **`packages/spec/src/contracts/external-datasource-service.ts`** — + `IExternalDatasourceService` + `RemoteTable`, `GenerateDraftOpts`, + `ObjectDraft`, `SchemaValidationResult`/`Report`. Reuses the existing + `IntrospectedSchema` from `schema-diff-service.ts` and `SchemaDiffEntry` + from `external-errors.ts`. + +3. **`packages/services/service-external-datasource`** (new package) — + `ExternalDatasourceService` implements the contract: + - `listRemoteTables` (schema-qualified, `allowedSchemas`-filtered), + - `generateObjectDraft` (type-compat mapping → reviewable `*.object.ts` + source with `// REVIEW:` markers on lossy/unknown columns), + - `validateObject` / `validateAll` (structured diffs: `missing_table`, + `missing_column`, `type_mismatch`; lossy = warning, hard mismatch = + error; honours `columnMap` + `ignoreColumns`), + - `refreshCatalog` (snapshot shape; persistence lands with P3's + `external_catalog` type). + The service takes injected I/O (`introspect` / `getDatasource` / + `getObject` / `listObjects`) so it is decoupled and fully unit-tested; the + `ExternalDatasourceServicePlugin` wires the live `IDataEngine` + + `IMetadataService` and registers it as the `external-datasource` service. + +### Remaining P2 slice (next) + +- **REST routes** under `/api/v1/datasources/:name/external/*` (ADR §6.2). +- **CLI** `os datasource list-tables | introspect | validate` (ADR §6.3) — + thin oclif commands over the REST routes. +- Driver introspection plumbing: expose + `getDatasourceDriver(name)` / `introspectDatasource(name)` on the data + engine so the plugin's default `introspect` works end-to-end. + +### Follow-up notes / open items for later phases + +- **DDL gate plumbing (P3)**: the runtime must inject `Datasource.schemaMode` + into `SqlDriverConfig` when constructing drivers. P1 wires the driver + side and defaults to `'managed'`; the runtime wiring lands with the + boot-validation plugin. +- **`applyMigrations` gate**: `ISchemaDiffService.applyMigrations` also + needs the gate (per ADR §5.1) when the migration runner ships. +- **Lint rule** preventing plugins from bypassing the gate via raw `knex` + (ADR §12 risk row) — defer to P2/P3 alongside the service. +- **error-map envelope**: map the three `code`s into the shared error + envelope when P6 surfaces them over REST. diff --git a/packages/cli/src/commands/datasource/introspect.ts b/packages/cli/src/commands/datasource/introspect.ts new file mode 100644 index 000000000..d8968dbb2 --- /dev/null +++ b/packages/cli/src/commands/datasource/introspect.ts @@ -0,0 +1,85 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { Args, Command, Flags } from '@oclif/core'; +import { writeFile } from 'node:fs/promises'; +import { resolve, isAbsolute } from 'node:path'; + +/** Resolve server URL + token from flags then env (mirrors createApiClient). */ +function resolveTarget(flags: { url?: string; token?: string }): { url: string; token?: string } { + const url = flags.url || process.env.OS_CLOUD_URL || 'http://localhost:3000'; + const token = flags.token || process.env.OS_TOKEN; + return { url, token }; +} + +/** + * `os datasource introspect --table ` — generate an Object + * draft (`*.object.ts`) from a remote table (ADR-0015). + * POST /api/v1/datasources/:name/external/tables/:remote/draft. + */ +export default class DatasourceIntrospect extends Command { + static override description = 'Generate an Object draft from a remote table on an external datasource'; + + static override examples = [ + '$ os datasource introspect warehouse --table fact_orders', + '$ os datasource introspect warehouse --table fact_orders --out objects/wh_order.object.ts', + ]; + + static override args = { + name: Args.string({ description: 'Datasource name', required: true }), + }; + + static override flags = { + url: Flags.string({ char: 'u', description: 'Server URL', env: 'OS_CLOUD_URL' }), + token: Flags.string({ char: 't', description: 'Authentication token', env: 'OS_TOKEN' }), + table: Flags.string({ char: 'T', description: 'Remote table name', required: true }), + out: Flags.string({ char: 'o', description: 'Write the generated source to this file (under the current working directory)' }), + }; + + async run(): Promise { + const { args, flags } = await this.parse(DatasourceIntrospect); + const { url, token } = resolveTarget(flags); + + const res = await fetch( + `${url}/api/v1/datasources/${args.name}/external/tables/${encodeURIComponent(flags.table)}/draft`, + { + method: 'POST', + headers: { + 'content-type': 'application/json', + ...(token ? { authorization: `Bearer ${token}` } : {}), + }, + body: '{}', + }, + ); + const body = (await res.json()) as { + draft?: { source?: string; review?: Array<{ column: string; note: string }> }; + error?: string; + }; + if (body.error) this.error(body.error); + + const draft = body.draft; + if (!draft?.source) { + this.error(`Failed to generate draft for '${flags.table}' on '${args.name}'.`); + return; + } + + if (flags.out) { + // Constrain the output path to the current working directory: the body + // is server-generated TypeScript, so refuse to write outside the project + // tree (defends against a malicious/compromised server supplying an + // absolute or traversing `--out` via shell expansion). + const target = resolve(process.cwd(), flags.out); + if (isAbsolute(flags.out) || !target.startsWith(process.cwd() + '/')) { + this.error(`--out must be a relative path within the current directory: ${flags.out}`); + return; + } + await writeFile(target, draft.source, 'utf8'); + this.log(`Wrote ${flags.out}`); + } else { + this.log(draft.source); + } + + for (const r of draft.review ?? []) { + this.warn(`REVIEW: column '${r.column}' — ${r.note}`); + } + } +} diff --git a/packages/cli/src/commands/datasource/list-tables.ts b/packages/cli/src/commands/datasource/list-tables.ts new file mode 100644 index 000000000..e1425867f --- /dev/null +++ b/packages/cli/src/commands/datasource/list-tables.ts @@ -0,0 +1,59 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { Args, Command, Flags } from '@oclif/core'; + +/** Resolve server URL + token from flags then env (mirrors createApiClient). */ +function resolveTarget(flags: { url?: string; token?: string }): { url: string; token?: string } { + const url = flags.url || process.env.OS_CLOUD_URL || 'http://localhost:3000'; + const token = flags.token || process.env.OS_TOKEN; + return { url, token }; +} + +/** + * `os datasource list-tables ` — list remote tables on a federated + * datasource (ADR-0015). GET /api/v1/datasources/:name/external/tables. + */ +export default class DatasourceListTables extends Command { + static override description = 'List remote tables on an external (federated) datasource'; + + static override examples = [ + '$ os datasource list-tables warehouse', + '$ os datasource list-tables warehouse --schema mart', + ]; + + static override args = { + name: Args.string({ description: 'Datasource name', required: true }), + }; + + static override flags = { + url: Flags.string({ char: 'u', description: 'Server URL', env: 'OS_CLOUD_URL' }), + token: Flags.string({ char: 't', description: 'Authentication token', env: 'OS_TOKEN' }), + schema: Flags.string({ char: 's', description: 'Filter by remote schema' }), + }; + + async run(): Promise { + const { args, flags } = await this.parse(DatasourceListTables); + const { url, token } = resolveTarget(flags); + + const qs = flags.schema ? `?schema=${encodeURIComponent(flags.schema)}` : ''; + const res = await fetch(`${url}/api/v1/datasources/${args.name}/external/tables${qs}`, { + headers: token ? { authorization: `Bearer ${token}` } : {}, + }); + const body = (await res.json()) as { + tables?: Array<{ schema?: string; name: string; columnCount: number; rowCountEstimate?: number }>; + error?: string; + }; + if (body.error) this.error(body.error); + + const tables = body.tables ?? []; + if (tables.length === 0) { + this.log('No remote tables found.'); + return; + } + for (const t of tables) { + const where = t.schema ? `${t.schema}.${t.name}` : t.name; + const rows = t.rowCountEstimate != null ? `, ~${t.rowCountEstimate} rows` : ''; + this.log(` ${where} (${t.columnCount} cols${rows})`); + } + } +} diff --git a/packages/cli/src/commands/datasource/validate.ts b/packages/cli/src/commands/datasource/validate.ts new file mode 100644 index 000000000..3992ce46b --- /dev/null +++ b/packages/cli/src/commands/datasource/validate.ts @@ -0,0 +1,76 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { Args, Command, Flags } from '@oclif/core'; + +/** Resolve server URL + token from flags then env (mirrors createApiClient). */ +function resolveTarget(flags: { url?: string; token?: string }): { url: string; token?: string } { + const url = flags.url || process.env.OS_CLOUD_URL || 'http://localhost:3000'; + const token = flags.token || process.env.OS_TOKEN; + return { url, token }; +} + +/** + * `os datasource validate ` — validate federated objects on a datasource + * against the live remote schema (ADR-0015). Exits non-zero on mismatch. + * POST /api/v1/datasources/:name/external/validate. + */ +export default class DatasourceValidate extends Command { + static override description = 'Validate federated objects against the remote schema of an external datasource'; + + static override examples = ['$ os datasource validate warehouse']; + + static override args = { + name: Args.string({ description: 'Datasource name', required: true }), + }; + + static override flags = { + url: Flags.string({ char: 'u', description: 'Server URL', env: 'OS_CLOUD_URL' }), + token: Flags.string({ char: 't', description: 'Authentication token', env: 'OS_TOKEN' }), + }; + + async run(): Promise { + const { args, flags } = await this.parse(DatasourceValidate); + const { url, token } = resolveTarget(flags); + + const res = await fetch(`${url}/api/v1/datasources/${args.name}/external/validate`, { + method: 'POST', + headers: { + 'content-type': 'application/json', + ...(token ? { authorization: `Bearer ${token}` } : {}), + }, + body: '{}', + }); + const body = (await res.json()) as { + results?: Array<{ + ok: boolean; + object: string; + diffs: Array<{ kind: string; column?: string; expected?: string; actual?: string; severity: string }>; + }>; + error?: string; + }; + if (body.error) this.error(body.error); + + const results = body.results ?? []; + if (results.length === 0) { + this.log('No federated objects to validate.'); + return; + } + + let hasError = false; + for (const r of results) { + if (r.ok && r.diffs.length === 0) { + this.log(`✓ ${r.object} matches`); + continue; + } + for (const d of r.diffs) { + const loc = d.column ? `${r.object}.${d.column}` : r.object; + const detail = d.expected || d.actual ? ` (expected ${d.expected ?? '—'}, actual ${d.actual ?? '—'})` : ''; + const mark = d.severity === 'error' ? '✗' : '⚠'; + this.log(`${mark} ${d.kind}: ${loc}${detail}`); + if (d.severity === 'error') hasError = true; + } + } + + if (hasError) this.error('External schema validation failed.', { exit: 1 }); + } +} diff --git a/packages/objectql/src/engine.ts b/packages/objectql/src/engine.ts index 412be6b4e..5e9232bc7 100644 --- a/packages/objectql/src/engine.ts +++ b/packages/objectql/src/engine.ts @@ -13,7 +13,7 @@ import { ExecutionContext, ExecutionContextSchema } from '@objectstack/spec/kern import { DriverInterface, IDataEngine, Logger, createLogger } from '@objectstack/core'; import { CoreServiceName, StorageNameMapping } from '@objectstack/spec/system'; import { IRealtimeService, RealtimeEventPayload } from '@objectstack/spec/contracts'; -import { pluralToSingular } from '@objectstack/spec/shared'; +import { pluralToSingular, ExternalWriteForbiddenError } from '@objectstack/spec/shared'; import { SchemaRegistry, computeFQN } from './registry.js'; import { ExpressionEngine } from '@objectstack/formula'; import type { Expression } from '@objectstack/spec'; @@ -200,6 +200,12 @@ export class ObjectQL implements IDataEngine { // Package manifests registry (for defaultDatasource lookup) private manifests = new Map(); + // Datasource definitions by name (ADR-0015): carries schemaMode + + // external.allowWrites so the write gate (Gate 3) can enforce federation + // ownership. Populated from manifests in registerApp and via + // registerDatasourceDef. Absent entry ⇒ treated as managed (default DB). + private datasourceDefs = new Map(); + // Per-object hooks with priority support private hooks: Map = new Map([ ['beforeFind', []], ['afterFind', []], @@ -702,6 +708,18 @@ export class ObjectQL implements IDataEngine { this.manifests.set(id, manifest); } + // Index datasource definitions (ADR-0015) so the write gate can read + // schemaMode + external.allowWrites. Manifests may carry `datasources` + // as an array or a name-keyed map. + if (manifest.datasources) { + const dsList = Array.isArray(manifest.datasources) + ? manifest.datasources + : Object.entries(manifest.datasources).map(([name, def]) => ({ name, ...(def as any) })); + for (const ds of dsList) { + if (ds?.name) this.registerDatasourceDef(ds); + } + } + // 1. Register the Package (manifest + lifecycle state) this._registry.installPackage(manifest); this.logger.debug('Installed Package', { id: manifest.id, name: manifest.name, namespace }); @@ -968,6 +986,48 @@ export class ObjectQL implements IDataEngine { } } + /** + * Register a Datasource *definition* (ADR-0015). + * + * Distinct from {@link registerDriver}, which registers a live connection. + * This captures the declarative `schemaMode` + `external.allowWrites` so the + * write gate ({@link assertWriteAllowed}) can enforce external-datasource + * ownership. Safe to call repeatedly; last write wins. + */ + registerDatasourceDef(def: { name: string; schemaMode?: string; external?: { allowWrites?: boolean } }): void { + if (!def?.name) return; + this.datasourceDefs.set(def.name, { schemaMode: def.schemaMode, external: def.external }); + } + + /** + * Write gate — Gate 3 of ADR-0015 §5.3. + * + * Blocks insert/update/delete against a federated datasource + * (`schemaMode !== 'managed'`) unless BOTH the datasource opts in + * (`external.allowWrites`) AND the object opts in (`external.writable`). + * Managed datasources (the common case, including the absence of any + * definition) are unaffected. + */ + private assertWriteAllowed(objectName: string, operation: 'insert' | 'update' | 'delete'): void { + const object = this._registry.getObject(objectName) as any; + const dsName = object?.datasource; + if (!dsName || dsName === 'default') return; + + const ds = this.datasourceDefs.get(dsName); + // No recorded definition, or an explicitly managed one ⇒ allow. + if (!ds || !ds.schemaMode || ds.schemaMode === 'managed') return; + + const dsAllows = ds.external?.allowWrites ?? false; + const objAllows = object?.external?.writable ?? false; + if (!(dsAllows && objAllows)) { + throw new ExternalWriteForbiddenError( + `Write '${operation}' blocked on object '${objectName}': datasource '${dsName}' is external ` + + `(schemaMode=${ds.schemaMode}). Requires datasource.external.allowWrites=true (got ${dsAllows}) ` + + `AND object.external.writable=true (got ${objAllows}).`, + ); + } + } + /** * Set the realtime service for publishing data change events. * Should be called after kernel resolves the realtime service. @@ -1463,6 +1523,7 @@ export class ObjectQL implements IDataEngine { async insert(object: string, data: any | any[], options?: DataEngineInsertOptions): Promise { object = this.resolveObjectName(object); this.logger.debug('Insert operation starting', { object, isBatch: Array.isArray(data) }); + this.assertWriteAllowed(object, 'insert'); const driver = this.getDriver(object); const opCtx: OperationContext = { @@ -1571,6 +1632,7 @@ export class ObjectQL implements IDataEngine { async update(object: string, data: any, options?: EngineUpdateOptions): Promise { object = this.resolveObjectName(object); this.logger.debug('Update operation starting', { object }); + this.assertWriteAllowed(object, 'update'); const driver = this.getDriver(object); // 1. Extract ID from data or where if it's a single update by ID @@ -1652,6 +1714,7 @@ export class ObjectQL implements IDataEngine { async delete(object: string, options?: EngineDeleteOptions): Promise { object = this.resolveObjectName(object); this.logger.debug('Delete operation starting', { object }); + this.assertWriteAllowed(object, 'delete'); const driver = this.getDriver(object); // Extract ID logic similar to update @@ -1981,6 +2044,27 @@ export class ObjectQL implements IDataEngine { return this.drivers.get(name); } + /** + * Introspect a datasource's live remote schema (ADR-0015). + * + * Resolves the driver registered under `datasource` and delegates to its + * `introspectSchema()` capability. Used by the external-datasource service + * (and CLI/REST) to list remote tables and validate federated objects. + * + * @throws if the datasource has no registered driver, or the driver does + * not support introspection. + */ + async introspectDatasource(datasource: string): Promise { + const driver = this.drivers.get(datasource) as any; + if (!driver) { + throw new Error(`[ObjectQL] Datasource '${datasource}' has no registered driver to introspect.`); + } + if (typeof driver.introspectSchema !== 'function') { + throw new Error(`[ObjectQL] Driver for datasource '${datasource}' does not support introspectSchema().`); + } + return driver.introspectSchema(); + } + /** * Get the driver responsible for the given object. * diff --git a/packages/objectql/src/external-write-gate.test.ts b/packages/objectql/src/external-write-gate.test.ts new file mode 100644 index 000000000..738669abb --- /dev/null +++ b/packages/objectql/src/external-write-gate.test.ts @@ -0,0 +1,166 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * Write gate (Gate 3) + introspection plumbing tests — ADR-0015 §5.3. + * + * Uses the real ObjectQL engine (no registry mock) with a minimal in-memory + * driver, so the write gate is exercised through the genuine + * registerApp → composeObject → getObject path (which preserves + * `object.external` / `object.datasource`). + */ + +import { describe, it, expect } from 'vitest'; +import { ObjectQL } from './engine'; +import type { IDataDriver } from '@objectstack/spec/contracts'; +import { ExternalWriteForbiddenError } from '@objectstack/spec/shared'; + +function makeDriver(name: string): IDataDriver { + const store = new Map(); + return { + name, + version: '1.0.0', + async connect() {}, + async disconnect() {}, + async find() { return []; }, + async findOne() { return null; }, + async count() { return 0; }, + async create(object: string, data: any) { + const id = data.id ?? String(store.size + 1); + const row = { ...data, id }; + store.set(`${object}:${id}`, row); + return row; + }, + async update(object: string, id: string, data: any) { + const row = { ...(store.get(`${object}:${id}`) ?? {}), ...data, id }; + store.set(`${object}:${id}`, row); + return row; + }, + async delete(object: string, id: string) { + return store.delete(`${object}:${id}`); + }, + async bulkCreate(object: string, rows: any[]) { + return rows.map((r) => { + const id = r.id ?? String(store.size + 1); + const row = { ...r, id }; + store.set(`${object}:${id}`, row); + return row; + }); + }, + async syncSchema() {}, + async dropTable() {}, + } as unknown as IDataDriver; +} + +function makeEngine(opts: { + dsSchemaMode?: 'managed' | 'external' | 'validate-only'; + dsAllowWrites?: boolean; + objWritable?: boolean; +}) { + const engine = new ObjectQL(); + engine.registerDriver(makeDriver('default'), true); + engine.registerDriver(makeDriver('warehouse')); + engine.registerDatasourceDef({ + name: 'warehouse', + schemaMode: opts.dsSchemaMode ?? 'external', + external: { allowWrites: opts.dsAllowWrites ?? false }, + }); + engine.registerApp({ + id: 'wh_pkg', + name: 'Warehouse', + objects: [ + { + name: 'wh_order', + datasource: 'warehouse', + external: { remoteName: 'fact_orders', writable: opts.objWritable ?? false }, + fields: { order_id: { type: 'text' }, amount: { type: 'number' } }, + }, + ], + } as any); + return engine; +} + +describe('write gate (ADR-0015 Gate 3)', () => { + it('allows writes to a managed (default) datasource object', async () => { + const engine = new ObjectQL(); + engine.registerDriver(makeDriver('default'), true); + engine.registerApp({ + id: 'local_pkg', + name: 'Local', + objects: [{ name: 'task', fields: { title: { type: 'text' } } }], + } as any); + await expect(engine.insert('task', { title: 'ok' })).resolves.toBeDefined(); + }); + + it('blocks insert on a federated object with no double opt-in', async () => { + const engine = makeEngine({ dsAllowWrites: false, objWritable: false }); + await expect(engine.insert('wh_order', { order_id: 'o1' })).rejects.toBeInstanceOf( + ExternalWriteForbiddenError, + ); + }); + + it('blocks insert when only the datasource opts in', async () => { + const engine = makeEngine({ dsAllowWrites: true, objWritable: false }); + await expect(engine.insert('wh_order', { order_id: 'o1' })).rejects.toBeInstanceOf( + ExternalWriteForbiddenError, + ); + }); + + it('blocks insert when only the object opts in', async () => { + const engine = makeEngine({ dsAllowWrites: false, objWritable: true }); + await expect(engine.insert('wh_order', { order_id: 'o1' })).rejects.toBeInstanceOf( + ExternalWriteForbiddenError, + ); + }); + + it('allows insert with the full double opt-in', async () => { + const engine = makeEngine({ dsAllowWrites: true, objWritable: true }); + await expect(engine.insert('wh_order', { order_id: 'o1' })).resolves.toBeDefined(); + }); + + it('blocks update and delete on a read-only federated object', async () => { + const engine = makeEngine({ dsAllowWrites: false, objWritable: false }); + await expect(engine.update('wh_order', { id: 'x', amount: 1 })).rejects.toBeInstanceOf( + ExternalWriteForbiddenError, + ); + await expect(engine.delete('wh_order', { where: { id: 'x' } } as any)).rejects.toBeInstanceOf( + ExternalWriteForbiddenError, + ); + }); + + it('allows writes when the datasource is explicitly managed', async () => { + const engine = makeEngine({ dsSchemaMode: 'managed', dsAllowWrites: false, objWritable: false }); + await expect(engine.insert('wh_order', { order_id: 'o1' })).resolves.toBeDefined(); + }); + + it('reports the stable error code on the thrown error', async () => { + const engine = makeEngine({}); + await engine.insert('wh_order', { order_id: 'o1' }).catch((err: any) => { + expect(err.code).toBe('EXTERNAL_WRITE_FORBIDDEN'); + }); + }); +}); + +describe('engine.introspectDatasource (ADR-0015)', () => { + it('delegates to the named driver introspectSchema()', async () => { + const engine = new ObjectQL(); + engine.registerDriver(makeDriver('default'), true); + const snapshot = { dialect: 'postgres', introspectedAt: 'now', tables: {} }; + const wh = makeDriver('warehouse') as any; + wh.introspectSchema = async () => snapshot; + engine.registerDriver(wh); + await expect(engine.introspectDatasource('warehouse')).resolves.toBe(snapshot); + }); + + it('throws when the datasource has no registered driver', async () => { + const engine = new ObjectQL(); + engine.registerDriver(makeDriver('default'), true); + await expect(engine.introspectDatasource('ghost')).rejects.toThrow(/no registered driver/); + }); + + it('throws when the driver lacks introspectSchema', async () => { + const engine = new ObjectQL(); + engine.registerDriver(makeDriver('default'), true); + engine.registerDriver(makeDriver('plain')); + await expect(engine.introspectDatasource('plain')).rejects.toThrow(/does not support introspectSchema/); + }); +}); diff --git a/packages/plugins/driver-sql/src/sql-driver-ddl-gate.test.ts b/packages/plugins/driver-sql/src/sql-driver-ddl-gate.test.ts new file mode 100644 index 000000000..380e57c59 --- /dev/null +++ b/packages/plugins/driver-sql/src/sql-driver-ddl-gate.test.ts @@ -0,0 +1,85 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * DDL gate tests (ADR-0015 §5.1). + * + * A SqlDriver constructed with `schemaMode !== 'managed'` must reject every + * schema-mutating DDL operation, while a default ('managed') driver behaves + * exactly as before. This guards the "ObjectStack never mutates an external + * database's schema" invariant at the single driver choke-point. + */ + +import { describe, it, expect, afterEach } from 'vitest'; +import { SqlDriver } from '../src/index.js'; +import { ExternalSchemaModeViolationError } from '@objectstack/spec/shared'; + +function makeDriver(schemaMode?: 'managed' | 'external' | 'validate-only') { + return new SqlDriver({ + client: 'better-sqlite3', + connection: { filename: ':memory:' }, + useNullAsDefault: true, + ...(schemaMode ? { schemaMode } : {}), + }); +} + +describe('SqlDriver DDL gate (ADR-0015)', () => { + let driver: SqlDriver; + + afterEach(async () => { + await driver.disconnect(); + }); + + it('defaults to managed mode and allows DDL', async () => { + driver = makeDriver(); + await expect( + driver.initObjects([{ name: 'widgets', fields: { sku: { type: 'text' } } }]), + ).resolves.toBeUndefined(); + // Table actually created. + const k = (driver as any).knex; + expect(await k.schema.hasTable('widgets')).toBe(true); + }); + + it('does not pass schemaMode through to Knex config', () => { + driver = makeDriver('external'); + expect((driver as any).config.schemaMode).toBeUndefined(); + expect((driver as any).config.client).toBe('better-sqlite3'); + }); + + it('blocks initObjects (createTable/alterTable) on external mode', async () => { + driver = makeDriver('external'); + await expect( + driver.initObjects([{ name: 'widgets', fields: { sku: { type: 'text' } } }]), + ).rejects.toBeInstanceOf(ExternalSchemaModeViolationError); + const k = (driver as any).knex; + expect(await k.schema.hasTable('widgets')).toBe(false); + }); + + it('blocks syncSchema on external mode (delegates to initObjects)', async () => { + driver = makeDriver('external'); + await expect( + driver.syncSchema('widgets', { name: 'widgets', fields: {} }), + ).rejects.toBeInstanceOf(ExternalSchemaModeViolationError); + }); + + it('blocks dropTable on external mode', async () => { + driver = makeDriver('external'); + await expect(driver.dropTable('widgets')).rejects.toBeInstanceOf( + ExternalSchemaModeViolationError, + ); + }); + + it('also blocks DDL in validate-only mode', async () => { + driver = makeDriver('validate-only'); + await expect( + driver.initObjects([{ name: 'widgets', fields: {} }]), + ).rejects.toBeInstanceOf(ExternalSchemaModeViolationError); + }); + + it('error carries the stable code and reports the schemaMode', async () => { + driver = makeDriver('external'); + await driver.dropTable('widgets').catch((err: any) => { + expect(err.code).toBe('EXTERNAL_SCHEMA_MODE_VIOLATION'); + expect(err.message).toContain('external'); + }); + }); +}); diff --git a/packages/plugins/driver-sql/src/sql-driver.ts b/packages/plugins/driver-sql/src/sql-driver.ts index a81c18136..5045e3b97 100644 --- a/packages/plugins/driver-sql/src/sql-driver.ts +++ b/packages/plugins/driver-sql/src/sql-driver.ts @@ -7,9 +7,10 @@ * Supports PostgreSQL, MySQL, SQLite, and other SQL databases. */ -import type { QueryAST, DriverOptions } from '@objectstack/spec/data'; +import type { QueryAST, DriverOptions, SchemaMode } from '@objectstack/spec/data'; import type { IDataDriver } from '@objectstack/spec/contracts'; import { StorageNameMapping } from '@objectstack/spec/system'; +import { ExternalSchemaModeViolationError } from '@objectstack/spec/shared'; import knex, { Knex } from 'knex'; import { nanoid } from 'nanoid'; @@ -67,8 +68,13 @@ export interface IntrospectedSchema { /** * SqlDriver configuration — passed directly to Knex. * See https://knexjs.org/guide/#configuration-options + * + * `schemaMode` (ADR-0015) is an ObjectStack-level concern, not a Knex + * option: it is stripped before constructing the Knex instance and gates + * all schema-mutating DDL. Defaults to `'managed'` when omitted, preserving + * legacy behaviour. */ -export type SqlDriverConfig = Knex.Config; +export type SqlDriverConfig = Knex.Config & { schemaMode?: SchemaMode }; // ── SQL Driver ─────────────────────────────────────────────────────────────── @@ -289,9 +295,36 @@ export class SqlDriver implements IDataDriver { return null; } + /** + * Schema ownership mode (ADR-0015). When not `'managed'`, all + * schema-mutating DDL is rejected by {@link assertSchemaMutable}. The + * runtime injects this from `Datasource.schemaMode`; defaults to + * `'managed'` so existing callers are unaffected. + */ + protected readonly schemaMode: SchemaMode; + constructor(config: SqlDriverConfig) { - this.config = config; - this.knex = knex(config); + // `schemaMode` is an ObjectStack concern, not a Knex option — strip it + // before handing the config to Knex. + const { schemaMode, ...knexConfig } = config; + this.schemaMode = schemaMode ?? 'managed'; + this.config = knexConfig; + this.knex = knex(knexConfig); + } + + /** + * DDL gate (ADR-0015 §5.1). Single choke-point asserting that + * schema-mutating DDL is only performed on a `managed` datasource. + * Federated datasources (`external` / `validate-only`) are guests in a + * database ObjectStack does not own and must never run DDL against. + */ + protected assertSchemaMutable(operation: string): void { + if (this.schemaMode !== 'managed') { + throw new ExternalSchemaModeViolationError( + `DDL operation '${operation}' is forbidden: datasource schemaMode='${this.schemaMode}'. ` + + `ObjectStack never mutates the schema of an external database.`, + ); + } } // =================================== @@ -978,6 +1011,7 @@ export class SqlDriver implements IDataDriver { } async dropTable(object: string, _options?: DriverOptions): Promise { + this.assertSchemaMutable('dropTable'); await this.knex.schema.dropTableIfExists(object); } @@ -985,6 +1019,9 @@ export class SqlDriver implements IDataDriver { * Batch-initialise tables from an array of object definitions. */ async initObjects(objects: Array<{ name: string; fields?: Record }>): Promise { + // DDL gate (ADR-0015 §5.1): createTable/alterTable below mutate schema. + // Also covers `syncSchema`, which delegates here. + this.assertSchemaMutable('initObjects'); await this.ensureDatabaseExists(); for (const obj of objects) { diff --git a/packages/plugins/driver-sql/vitest.config.ts b/packages/plugins/driver-sql/vitest.config.ts index 488e95820..4b49e52df 100644 --- a/packages/plugins/driver-sql/vitest.config.ts +++ b/packages/plugins/driver-sql/vitest.config.ts @@ -12,6 +12,7 @@ export default defineConfig({ alias: { '@objectstack/spec/contracts': path.resolve(__dirname, '../../spec/src/contracts/index.ts'), '@objectstack/spec/data': path.resolve(__dirname, '../../spec/src/data/index.ts'), + '@objectstack/spec/shared': path.resolve(__dirname, '../../spec/src/shared/index.ts'), '@objectstack/spec/system': path.resolve(__dirname, '../../spec/src/system/index.ts'), '@objectstack/spec': path.resolve(__dirname, '../../spec/src/index.ts'), }, diff --git a/packages/rest/src/external-datasource-routes.ts b/packages/rest/src/external-datasource-routes.ts new file mode 100644 index 000000000..9442401e7 --- /dev/null +++ b/packages/rest/src/external-datasource-routes.ts @@ -0,0 +1,74 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import type { PluginContext } from '@objectstack/core'; +import type { IHttpServer } from '@objectstack/spec/contracts'; + +/** + * External Datasource Federation REST routes (ADR-0015 §6.2). + * + * Mounted under `/api/v1/datasources/:name/external/*` and served by the + * `external-datasource` service. Every route degrades gracefully + * (`503 external_service_unavailable`) when federation is not wired into the + * host, so the routes are safe to register unconditionally. + * + * GET /datasources/:name/external/tables → listRemoteTables + * POST /datasources/:name/external/tables/:remote/draft → generateObjectDraft + * POST /datasources/:name/external/refresh-catalog → refreshCatalog + * POST /datasources/:name/external/validate → validateAll (this ds) + */ +export function registerExternalDatasourceRoutes( + server: IHttpServer, + ctx: PluginContext, + basePath = '/api/v1', +): void { + const ext = `${basePath}/datasources/:name/external`; + + const externalService = (): any => { + try { + return ctx.getService('external-datasource'); + } catch { + return undefined; + } + }; + + const unavailable = (res: any) => + res.status(503).json({ error: 'external_service_unavailable' }); + + // List remote tables (optionally filtered by ?schema=). + server.get(`${ext}/tables`, async (req: any, res: any) => { + const svc = externalService(); + if (!svc?.listRemoteTables) return unavailable(res); + const schema = typeof req.query?.schema === 'string' ? req.query.schema : undefined; + const tables = await svc.listRemoteTables(req.params.name, { schema }); + res.json({ tables }); + }); + + // Generate an Object draft (structured + *.object.ts source) from a table. + server.post(`${ext}/tables/:remote/draft`, async (req: any, res: any) => { + const svc = externalService(); + if (!svc?.generateObjectDraft) return unavailable(res); + const draft = await svc.generateObjectDraft( + req.params.name, + req.params.remote, + (req.body as Record) ?? {}, + ); + res.json({ draft }); + }); + + // Refresh and return the cached catalog snapshot. + server.post(`${ext}/refresh-catalog`, async (req: any, res: any) => { + const svc = externalService(); + if (!svc?.refreshCatalog) return unavailable(res); + const catalog = await svc.refreshCatalog(req.params.name); + res.json({ catalog }); + }); + + // Validate the federated objects on this datasource. + server.post(`${ext}/validate`, async (req: any, res: any) => { + const svc = externalService(); + if (!svc?.validateAll) return unavailable(res); + const report = await svc.validateAll(); + const results = (report.results ?? []).filter((r: any) => r.datasource === req.params.name); + res.json({ ok: results.every((r: any) => r.ok), results }); + }); +} diff --git a/packages/rest/src/rest-api-plugin.ts b/packages/rest/src/rest-api-plugin.ts index 2cc1cb0e8..473d98953 100644 --- a/packages/rest/src/rest-api-plugin.ts +++ b/packages/rest/src/rest-api-plugin.ts @@ -4,6 +4,7 @@ import { Plugin, PluginContext, IHttpServer } from '@objectstack/core'; import { RestServer, RestKernelManager } from './rest-server.js'; import { ObjectStackProtocol, RestServerConfig } from '@objectstack/spec/api'; import { registerPackageRoutes } from './package-routes.js'; +import { registerExternalDatasourceRoutes } from './external-datasource-routes.js'; import type { PackageService } from '@objectstack/service-package'; export interface RestApiPluginConfig { @@ -192,6 +193,9 @@ export function createRestApiPlugin(config: RestApiPluginConfig = {}): Plugin { }); } else { registerPackageRoutes(server, packageService, versionedBase, { protocol }); + // External Datasource Federation routes (ADR-0015) — + // degrade gracefully when the service is not registered. + registerExternalDatasourceRoutes(server, ctx, versionedBase); if (enableProjectScoping) { registerPackageRoutes(server, packageService, `${versionedBase}/environments/:environmentId`, { protocol, diff --git a/packages/runtime/src/external-validation-plugin.test.ts b/packages/runtime/src/external-validation-plugin.test.ts new file mode 100644 index 000000000..0d82bf529 --- /dev/null +++ b/packages/runtime/src/external-validation-plugin.test.ts @@ -0,0 +1,83 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect, vi } from 'vitest'; +import { ExternalValidationPlugin } from './external-validation-plugin'; +import { ExternalSchemaMismatchError, type SchemaDiffEntry } from '@objectstack/spec/shared'; + +function makeCtx(services: Record) { + const warnings: any[] = []; + const infos: any[] = []; + const ctx = { + getService: (name: string): T => { + if (name in services) return services[name] as T; + throw new Error(`service '${name}' not registered`); + }, + registerService: vi.fn(), + hook: vi.fn(), + trigger: vi.fn(), + logger: { + debug: vi.fn(), + info: (...a: any[]) => infos.push(a), + warn: (...a: any[]) => warnings.push(a), + }, + } as any; + return { ctx, warnings, infos }; +} + +const sampleDiffs: SchemaDiffEntry[] = [ + { kind: 'type_mismatch', remoteName: 'fact_orders', column: 'amount', expected: 'number', actual: 'text', severity: 'error' }, +]; + +describe('ExternalValidationPlugin (ADR-0015 Gate 2)', () => { + it('subscribes to kernel:ready in start()', () => { + const { ctx } = makeCtx({}); + new ExternalValidationPlugin().start(ctx); + expect(ctx.hook).toHaveBeenCalledWith('kernel:ready', expect.any(Function)); + }); + + it('is a no-op when the external-datasource service is absent', async () => { + const { ctx } = makeCtx({}); + await expect(new ExternalValidationPlugin().runValidation(ctx)).resolves.toBeUndefined(); + }); + + it('passes silently when all federated objects validate', async () => { + const { ctx, infos } = makeCtx({ + 'external-datasource': { validateAll: async () => ({ ok: true, results: [{ ok: true, datasource: 'warehouse', object: 'wh_order', diffs: [] }] }) }, + }); + await new ExternalValidationPlugin().runValidation(ctx); + expect(infos.length).toBeGreaterThan(0); + }); + + it('throws ExternalSchemaMismatchError on failure with default (fail) policy', async () => { + const { ctx } = makeCtx({ + 'external-datasource': { validateAll: async () => ({ ok: false, results: [{ ok: false, datasource: 'warehouse', object: 'wh_order', diffs: sampleDiffs }] }) }, + metadata: { get: async () => ({ schemaMode: 'external', external: { validation: { onMismatch: 'fail' } } }) }, + }); + await expect(new ExternalValidationPlugin().runValidation(ctx)).rejects.toBeInstanceOf(ExternalSchemaMismatchError); + }); + + it('warns instead of throwing when onMismatch=warn', async () => { + const { ctx, warnings } = makeCtx({ + 'external-datasource': { validateAll: async () => ({ ok: false, results: [{ ok: false, datasource: 'warehouse', object: 'wh_order', diffs: sampleDiffs }] }) }, + metadata: { get: async () => ({ schemaMode: 'validate-only', external: { validation: { onMismatch: 'warn' } } }) }, + }); + await expect(new ExternalValidationPlugin().runValidation(ctx)).resolves.toBeUndefined(); + expect(warnings.some((w) => String(w[0]).includes('drift'))).toBe(true); + }); + + it('does nothing when onMismatch=ignore', async () => { + const { ctx, warnings } = makeCtx({ + 'external-datasource': { validateAll: async () => ({ ok: false, results: [{ ok: false, datasource: 'warehouse', object: 'wh_order', diffs: sampleDiffs }] }) }, + metadata: { get: async () => ({ schemaMode: 'external', external: { validation: { onMismatch: 'ignore' } } }) }, + }); + await expect(new ExternalValidationPlugin().runValidation(ctx)).resolves.toBeUndefined(); + expect(warnings.length).toBe(0); + }); + + it('defaults to fail when the datasource definition is unavailable', async () => { + const { ctx } = makeCtx({ + 'external-datasource': { validateAll: async () => ({ ok: false, results: [{ ok: false, datasource: 'warehouse', object: 'wh_order', diffs: sampleDiffs }] }) }, + }); + await expect(new ExternalValidationPlugin().runValidation(ctx)).rejects.toBeInstanceOf(ExternalSchemaMismatchError); + }); +}); diff --git a/packages/runtime/src/external-validation-plugin.ts b/packages/runtime/src/external-validation-plugin.ts new file mode 100644 index 000000000..6f6f06cae --- /dev/null +++ b/packages/runtime/src/external-validation-plugin.ts @@ -0,0 +1,124 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import type { Plugin, PluginContext } from '@objectstack/core'; +import { + ExternalSchemaMismatchError, + type SchemaDiffEntry, +} from '@objectstack/spec/shared'; + +/** + * Structural subset of `IExternalDatasourceService` used here, to avoid a hard + * dependency on the service package from runtime. + */ +interface ExternalDatasourceServiceLike { + validateAll(): Promise<{ + ok: boolean; + results: Array<{ ok: boolean; datasource: string; object: string; diffs: SchemaDiffEntry[] }>; + }>; +} + +interface MetadataServiceLike { + get?: (type: string, name: string) => Promise; +} + +interface DatasourceDef { + schemaMode?: string; + external?: { validation?: { onMismatch?: 'fail' | 'warn' | 'ignore' } }; +} + +/** + * Boot-validation plugin — Gate 2 of ADR-0015 §5.2. + * + * On `kernel:ready`, validates every federated object against its remote table + * (via the `external-datasource` service) and applies the datasource's + * `external.validation.onMismatch` policy: + * - `fail` → throws `ExternalSchemaMismatchError` (aborts boot) — default, + * - `warn` → logs the diff and continues, + * - `ignore` → does nothing. + * + * No-op when the `external-datasource` service is not registered (federation + * unused). + */ +export class ExternalValidationPlugin implements Plugin { + name = 'com.objectstack.external-validation'; + type = 'standard'; + version = '1.0.0'; + + init = (_ctx: PluginContext): void => { + // Nothing to register; validation runs on kernel:ready (see start()). + }; + + start = (ctx: PluginContext): void => { + // Subscribe to kernel-ready so validation runs after every plugin (drivers, + // services, manifests) has been registered. + ctx.hook('kernel:ready', async () => { + await this.runValidation(ctx); + }); + }; + + /** Exposed for testing; invoked from the kernel:ready handler. */ + async runValidation(ctx: PluginContext): Promise { + const svc = safeGet(ctx, 'external-datasource'); + if (!svc?.validateAll) { + ctx.logger?.debug?.('[external-validation] service not registered; skipping'); + return; + } + + const metadata = safeGet(ctx, 'metadata'); + let report: Awaited>; + try { + report = await svc.validateAll(); + } catch (err) { + ctx.logger?.warn?.('[external-validation] validateAll failed', { err }); + return; + } + + const failures = report.results.filter((r) => !r.ok); + if (failures.length === 0) { + ctx.logger?.info?.('[external-validation] all federated objects match their remote schema', { + objects: report.results.length, + }); + return; + } + + for (const r of failures) { + const mode = await resolveOnMismatch(metadata, r.datasource); + if (mode === 'ignore') continue; + if (mode === 'warn') { + ctx.logger?.warn?.('[external-validation] external schema drift', { + datasource: r.datasource, + object: r.object, + diffs: r.diffs, + }); + continue; + } + // mode === 'fail' (default) + throw new ExternalSchemaMismatchError(r.datasource, r.object, r.diffs); + } + } +} + +/** Convenience factory mirroring the createXxxPlugin convention. */ +export function createExternalValidationPlugin(): ExternalValidationPlugin { + return new ExternalValidationPlugin(); +} + +async function resolveOnMismatch( + metadata: MetadataServiceLike | undefined, + datasource: string, +): Promise<'fail' | 'warn' | 'ignore'> { + try { + const ds = (await metadata?.get?.('datasource', datasource)) as DatasourceDef | undefined; + return ds?.external?.validation?.onMismatch ?? 'fail'; + } catch { + return 'fail'; + } +} + +function safeGet(ctx: PluginContext, name: string): T | undefined { + try { + return ctx.getService(name); + } catch { + return undefined; + } +} diff --git a/packages/runtime/src/index.ts b/packages/runtime/src/index.ts index 3a014e562..a7a0e47d1 100644 --- a/packages/runtime/src/index.ts +++ b/packages/runtime/src/index.ts @@ -19,6 +19,8 @@ export type { DefaultHostConfigOptions, DefaultHostConfigResult } from './defaul export { DriverPlugin } from './driver-plugin.js'; export { AppPlugin, collectBundleHooks, collectBundleFunctions, collectBundleActions } from './app-plugin.js'; export { SeedLoaderService } from './seed-loader.js'; +// External Datasource Federation — boot-validation gate (ADR-0015, Gate 2) +export { ExternalValidationPlugin, createExternalValidationPlugin } from './external-validation-plugin.js'; export { createDispatcherPlugin } from './dispatcher-plugin.js'; export type { DispatcherPluginConfig } from './dispatcher-plugin.js'; export { createSystemEnvironmentPlugin, SYSTEM_ENVIRONMENT_ID } from './system-environment-plugin.js'; diff --git a/packages/services/service-ai/src/schema-retriever.test.ts b/packages/services/service-ai/src/schema-retriever.test.ts new file mode 100644 index 000000000..8c89934b7 --- /dev/null +++ b/packages/services/service-ai/src/schema-retriever.test.ts @@ -0,0 +1,46 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { SchemaRetriever } from './schema-retriever.js'; + +describe('SchemaRetriever.renderSnippet — federation annotation (ADR-0015)', () => { + it('annotates a read-only federated object with its datasource', () => { + const snippet = SchemaRetriever.renderSnippet([ + { + score: 5, + object: { + name: 'wh_order', + label: 'Warehouse Order', + datasource: 'warehouse', + external: { remoteName: 'fact_orders', writable: false }, + fields: { order_id: { type: 'text' } }, + }, + }, + ] as any); + expect(snippet).toContain('### wh_order'); + expect(snippet).toContain('[external, read-only, datasource=warehouse]'); + }); + + it('marks a writable federated object accordingly', () => { + const snippet = SchemaRetriever.renderSnippet([ + { + score: 5, + object: { + name: 'wh_order', + datasource: 'warehouse', + external: { remoteName: 'fact_orders', writable: true }, + fields: { order_id: { type: 'text' } }, + }, + }, + ] as any); + expect(snippet).toContain('[external, writable, datasource=warehouse]'); + }); + + it('does not annotate a normal managed object', () => { + const snippet = SchemaRetriever.renderSnippet([ + { score: 5, object: { name: 'task', label: 'Task', fields: { title: { type: 'text' } } } }, + ] as any); + expect(snippet).toContain('### task'); + expect(snippet).not.toContain('[external'); + }); +}); diff --git a/packages/services/service-ai/src/schema-retriever.ts b/packages/services/service-ai/src/schema-retriever.ts index fb9b6046e..348bdeeea 100644 --- a/packages/services/service-ai/src/schema-retriever.ts +++ b/packages/services/service-ai/src/schema-retriever.ts @@ -122,7 +122,13 @@ export class SchemaRetriever { if (obj.label) parts.push(obj.label); if (obj.pluralLabel && obj.pluralLabel !== obj.label) parts.push(`(${obj.pluralLabel})`); const header = parts.length > 0 ? ` — ${parts.join(' ')}` : ''; - lines.push(`### ${obj.name}${header}`); + // ADR-0015: warn the model that federated objects come from a customer's + // production database — it must not propose schema changes or unsafe + // writes, and should bound queries with sensible limits/filters. + const badge = obj.external !== undefined + ? ` [external, ${obj.external?.writable ? 'writable' : 'read-only'}, datasource=${obj.datasource ?? 'default'}]` + : ''; + lines.push(`### ${obj.name}${header}${badge}`); const fields = Object.entries(obj.fields ?? {}).slice(0, maxFieldsPerObject); for (const [name, field] of fields) { lines.push(` - ${name}: ${describeField(field)}`); @@ -159,6 +165,10 @@ export interface ObjectShape { pluralLabel?: string; description?: string; fields?: Record; + /** Datasource the object is routed to (ADR-0015). */ + datasource?: string; + /** External-federation binding, when this is a federated object (ADR-0015). */ + external?: { writable?: boolean; remoteName?: string; remoteSchema?: string }; } /** Minimal shape of a field definition. */ diff --git a/packages/services/service-external-datasource/package.json b/packages/services/service-external-datasource/package.json new file mode 100644 index 000000000..cd75d69cb --- /dev/null +++ b/packages/services/service-external-datasource/package.json @@ -0,0 +1,47 @@ +{ + "name": "@objectstack/service-external-datasource", + "version": "7.3.0", + "license": "Apache-2.0", + "description": "External Datasource Federation service for ObjectStack — implements IExternalDatasourceService (introspect, draft, validate) per ADR-0015", + "type": "module", + "main": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js", + "require": "./dist/index.cjs" + } + }, + "scripts": { + "build": "tsup --config ../../../tsup.config.ts", + "test": "vitest run" + }, + "dependencies": { + "@objectstack/core": "workspace:*", + "@objectstack/spec": "workspace:*" + }, + "devDependencies": { + "@types/node": "^25.9.1", + "typescript": "^6.0.3", + "vitest": "^4.1.7" + }, + "keywords": [ + "objectstack", + "service", + "external-datasource", + "federation", + "introspection" + ], + "author": "ObjectStack", + "repository": { + "type": "git", + "url": "https://github.com/objectstack-ai/framework.git", + "directory": "packages/services/service-external-datasource" + }, + "homepage": "https://objectstack.ai/docs", + "bugs": "https://github.com/objectstack-ai/framework/issues", + "publishConfig": { + "access": "public" + } +} diff --git a/packages/services/service-external-datasource/src/__tests__/external-datasource-service.test.ts b/packages/services/service-external-datasource/src/__tests__/external-datasource-service.test.ts new file mode 100644 index 000000000..833553217 --- /dev/null +++ b/packages/services/service-external-datasource/src/__tests__/external-datasource-service.test.ts @@ -0,0 +1,253 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import type { IntrospectedSchema } from '@objectstack/spec/contracts'; +import { + ExternalDatasourceService, + type DatasourceLike, + type ObjectLike, +} from '../external-datasource-service.js'; + +/** Build a fake introspected schema for the `warehouse` datasource. */ +function warehouseSchema(): IntrospectedSchema { + return { + dialect: 'postgres', + introspectedAt: '2026-05-30T00:00:00.000Z', + tables: { + 'mart.fact_orders': { + name: 'mart.fact_orders', + indexes: [], + columns: [ + { name: 'order_id', type: 'text', nullable: false, primaryKey: true }, + { name: 'customer_id', type: 'text', nullable: false, primaryKey: false }, + { name: 'amount', type: 'numeric(10,2)', nullable: true, primaryKey: false }, + { name: 'ordered_at', type: 'timestamptz', nullable: true, primaryKey: false }, + { name: 'metadata', type: 'jsonb', nullable: true, primaryKey: false }, + { name: 'geo', type: 'geography', nullable: true, primaryKey: false }, + ], + }, + 'public.dim_customer': { + name: 'public.dim_customer', + indexes: [], + columns: [ + { name: 'id', type: 'text', nullable: false, primaryKey: true }, + { name: 'name', type: 'varchar(255)', nullable: true, primaryKey: false }, + ], + }, + }, + }; +} + +function makeService(overrides?: { + datasource?: DatasourceLike; + objects?: ObjectLike[]; +}) { + const ds: DatasourceLike = overrides?.datasource ?? { + name: 'warehouse', + schemaMode: 'external', + external: { allowedSchemas: ['mart', 'public'] }, + }; + const objects = overrides?.objects ?? []; + return new ExternalDatasourceService({ + introspect: async () => warehouseSchema(), + getDatasource: async (n) => (n === ds.name ? ds : undefined), + getObject: async (n) => objects.find((o) => o.name === n), + listObjects: async () => objects, + }); +} + +describe('listRemoteTables', () => { + it('lists tables with parsed schema + column counts', async () => { + const svc = makeService(); + const tables = await svc.listRemoteTables('warehouse'); + expect(tables).toHaveLength(2); + const orders = tables.find((t) => t.name === 'fact_orders')!; + expect(orders.schema).toBe('mart'); + expect(orders.columnCount).toBe(6); + }); + + it('filters by requested schema', async () => { + const svc = makeService(); + const tables = await svc.listRemoteTables('warehouse', { schema: 'public' }); + expect(tables.map((t) => t.name)).toEqual(['dim_customer']); + }); + + it('respects allowedSchemas', async () => { + const svc = makeService({ + datasource: { name: 'warehouse', schemaMode: 'external', external: { allowedSchemas: ['mart'] } }, + }); + const tables = await svc.listRemoteTables('warehouse'); + expect(tables.map((t) => t.name)).toEqual(['fact_orders']); + }); +}); + +describe('generateObjectDraft', () => { + it('maps columns to field types and flags lossy/unknown for review', async () => { + const svc = makeService(); + const draft = await svc.generateObjectDraft('warehouse', 'fact_orders'); + + expect(draft.name).toBe('fact_orders'); + expect(draft.datasource).toBe('warehouse'); + const fields = draft.definition.fields as Record; + expect(fields.order_id).toEqual({ type: 'text', primaryKey: true }); + expect(fields.amount.type).toBe('number'); + expect(fields.ordered_at.type).toBe('datetime'); + expect(fields.metadata.type).toBe('json'); + // geography is unknown → defaulted to text + review note. + expect(fields.geo.type).toBe('text'); + expect(draft.review.some((r) => r.column === 'geo')).toBe(true); + + // Source carries the external binding + a REVIEW marker. + expect(draft.source).toContain("remoteName: 'fact_orders'"); + expect(draft.source).toContain("remoteSchema: 'mart'"); + expect(draft.source).toContain('REVIEW:'); + expect(draft.source).toContain("order_id: { type: 'text', primaryKey: true }"); + }); + + it('honours include/exclude/rename/primaryKey options', async () => { + const svc = makeService(); + const draft = await svc.generateObjectDraft('warehouse', 'fact_orders', { + includeColumns: ['order_id', 'amount'], + rename: { amount: 'total' }, + primaryKey: ['order_id'], + }); + const fields = draft.definition.fields as Record; + expect(Object.keys(fields)).toEqual(['order_id', 'total']); + }); + + it('throws when the remote table is missing', async () => { + const svc = makeService(); + await expect(svc.generateObjectDraft('warehouse', 'nope')).rejects.toThrow(/not found/); + }); +}); + +describe('validateObject', () => { + const baseObject: ObjectLike = { + name: 'wh_order', + datasource: 'warehouse', + external: { remoteName: 'fact_orders' }, + fields: { + order_id: { type: 'text' }, + customer_id: { type: 'text' }, + amount: { type: 'number' }, + ordered_at: { type: 'datetime' }, + }, + }; + + it('returns ok for a matching federated object', async () => { + const svc = makeService({ objects: [baseObject] }); + const result = await svc.validateObject('wh_order'); + expect(result.ok).toBe(true); + expect(result.diffs).toHaveLength(0); + }); + + it('reports a type_mismatch error for an incompatible field', async () => { + const svc = makeService({ + objects: [{ ...baseObject, fields: { ...baseObject.fields, amount: { type: 'datetime' } } }], + }); + const result = await svc.validateObject('wh_order'); + expect(result.ok).toBe(false); + expect(result.diffs).toContainEqual( + expect.objectContaining({ kind: 'type_mismatch', column: 'amount', severity: 'error' }), + ); + }); + + it('reports a missing_column error', async () => { + const svc = makeService({ + objects: [{ ...baseObject, fields: { ...baseObject.fields, nonexistent: { type: 'text' } } }], + }); + const result = await svc.validateObject('wh_order'); + expect(result.ok).toBe(false); + expect(result.diffs).toContainEqual( + expect.objectContaining({ kind: 'missing_column', column: 'nonexistent' }), + ); + }); + + it('reports missing_table when the remote table is absent', async () => { + const svc = makeService({ + objects: [{ ...baseObject, external: { remoteName: 'ghost' } }], + }); + const result = await svc.validateObject('wh_order'); + expect(result.ok).toBe(false); + expect(result.diffs[0].kind).toBe('missing_table'); + }); + + it('treats a managed datasource object as nothing-to-validate', async () => { + const svc = makeService({ + datasource: { name: 'warehouse', schemaMode: 'managed' }, + objects: [baseObject], + }); + const result = await svc.validateObject('wh_order'); + expect(result.ok).toBe(true); + expect(result.diffs).toHaveLength(0); + }); + + it('honours columnMap and ignoreColumns', async () => { + const svc = makeService({ + objects: [ + { + name: 'wh_order', + datasource: 'warehouse', + external: { + remoteName: 'fact_orders', + columnMap: { customer_id: 'cust' }, + ignoreColumns: ['metadata'], + }, + fields: { order_id: { type: 'text' }, cust: { type: 'text' } }, + }, + ], + }); + const result = await svc.validateObject('wh_order'); + expect(result.ok).toBe(true); + }); + + it('flags a lossy mapping as a warning without failing', async () => { + const svc = makeService({ + objects: [ + { + name: 'wh_order', + datasource: 'warehouse', + external: { remoteName: 'fact_orders' }, + fields: { order_id: { type: 'text' }, metadata: { type: 'text' } }, + }, + ], + }); + const result = await svc.validateObject('wh_order'); + expect(result.ok).toBe(true); + expect(result.diffs).toContainEqual( + expect.objectContaining({ kind: 'type_mismatch', column: 'metadata', severity: 'warning' }), + ); + }); +}); + +describe('validateAll', () => { + it('aggregates results across federated objects only', async () => { + const svc = makeService({ + objects: [ + { name: 'local_thing', datasource: 'default', fields: { id: { type: 'text' } } }, + { + name: 'wh_order', + datasource: 'warehouse', + external: { remoteName: 'fact_orders' }, + fields: { order_id: { type: 'text' }, amount: { type: 'number' } }, + }, + ], + }); + const report = await svc.validateAll(); + expect(report.ok).toBe(true); + expect(report.results.map((r) => r.object)).toEqual(['wh_order']); + }); +}); + +describe('refreshCatalog', () => { + it('produces a snapshot with suggested field types', async () => { + const svc = makeService(); + const catalog = (await svc.refreshCatalog('warehouse')) as { + datasource: string; + tables: Array<{ remoteName: string; columns: Array<{ name: string; suggestedFieldType?: string }> }>; + }; + expect(catalog.datasource).toBe('warehouse'); + const orders = catalog.tables.find((t) => t.remoteName === 'fact_orders')!; + expect(orders.columns.find((c) => c.name === 'amount')?.suggestedFieldType).toBe('number'); + }); +}); diff --git a/packages/services/service-external-datasource/src/external-datasource-service.ts b/packages/services/service-external-datasource/src/external-datasource-service.ts new file mode 100644 index 000000000..7a7a5ac7a --- /dev/null +++ b/packages/services/service-external-datasource/src/external-datasource-service.ts @@ -0,0 +1,387 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * ExternalDatasourceService — implements {@link IExternalDatasourceService} + * (ADR-0015 §6) on top of driver introspection. + * + * The service is intentionally decoupled from the kernel: all I/O + * (introspection, metadata reads) is injected via + * {@link ExternalDatasourceServiceConfig}, so the introspection/draft/validate + * logic is pure and unit-testable. The kernel plugin wires the real + * `IDataEngine` + `IMetadataService` callbacks in. + */ + +import type { + IExternalDatasourceService, + RemoteTable, + GenerateDraftOpts, + ObjectDraft, + SchemaValidationResult, + SchemaValidationReport, + IntrospectedSchema, + IntrospectedTable, +} from '@objectstack/spec/contracts'; +import type { SchemaDiffEntry } from '@objectstack/spec/shared'; +import { + suggestFieldType, + isCompatible, + type SqlDialect, + type FieldType, +} from '@objectstack/spec/data'; + +/** Minimal datasource shape the service reads (subset of `Datasource`). */ +export interface DatasourceLike { + name: string; + schemaMode?: 'managed' | 'external' | 'validate-only'; + external?: { + allowedSchemas?: string[]; + validation?: { onMismatch?: 'fail' | 'warn' | 'ignore' }; + }; +} + +/** Minimal object shape the service reads (subset of `ServiceObject`). */ +export interface ObjectLike { + name: string; + label?: string; + datasource?: string; + external?: { + remoteName?: string; + remoteSchema?: string; + columnMap?: Record; + ignoreColumns?: string[]; + }; + fields?: Record; +} + +export interface Logger { + warn: (message: string, meta?: unknown) => void; + info?: (message: string, meta?: unknown) => void; +} + +/** + * Injected dependencies. The plugin supplies real implementations backed by + * the driver registry and `IMetadataService`; tests supply fakes. + */ +export interface ExternalDatasourceServiceConfig { + /** Introspect a datasource's live schema via its driver. */ + introspect: (datasource: string) => Promise; + /** Resolve a datasource definition by name. */ + getDatasource: (name: string) => Promise; + /** Resolve one object definition by name. */ + getObject: (name: string) => Promise; + /** List all object definitions (for `validateAll`). */ + listObjects: () => Promise; + logger?: Logger; +} + +/** Columns ObjectStack manages itself — never validated against the remote. */ +const BUILTIN_COLUMNS = new Set(['id', 'created_at', 'updated_at']); + +/** Split a possibly schema-qualified name (`mart.fact_orders`). */ +function parseQualified(raw: string): { schema?: string; name: string } { + const idx = raw.indexOf('.'); + if (idx === -1) return { name: raw }; + return { schema: raw.slice(0, idx), name: raw.slice(idx + 1) }; +} + +/** Normalise a remote table name into a snake_case object name. */ +function toObjectName(remoteName: string): string { + const { name } = parseQualified(remoteName); + return name + .replace(/[^a-zA-Z0-9_]/g, '_') + .replace(/^[^a-z_]/, (c) => `_${c.toLowerCase()}`) + .toLowerCase(); +} + +/** snake_case → Title Case label. */ +function toLabel(name: string): string { + return name + .split('_') + .filter(Boolean) + .map((w) => w.charAt(0).toUpperCase() + w.slice(1)) + .join(' '); +} + +export class ExternalDatasourceService implements IExternalDatasourceService { + constructor(private readonly config: ExternalDatasourceServiceConfig) {} + + private get logger(): Logger | undefined { + return this.config.logger; + } + + private findTable(schema: IntrospectedSchema, remoteName: string): IntrospectedTable | undefined { + const want = parseQualified(remoteName).name; + for (const table of Object.values(schema.tables)) { + if (table.name === remoteName) return table; + if (parseQualified(table.name).name === want) return table; + } + return undefined; + } + + async listRemoteTables( + datasource: string, + opts?: { schema?: string }, + ): Promise { + const [schema, ds] = await Promise.all([ + this.config.introspect(datasource), + this.config.getDatasource(datasource), + ]); + const allowed = ds?.external?.allowedSchemas; + + const tables: RemoteTable[] = []; + for (const table of Object.values(schema.tables)) { + const { schema: tableSchema, name } = parseQualified(table.name); + if (opts?.schema && tableSchema && tableSchema !== opts.schema) continue; + // allowedSchemas only filters tables we can attribute to a schema. + if (allowed && tableSchema && !allowed.includes(tableSchema)) continue; + tables.push({ schema: tableSchema, name, columnCount: table.columns.length }); + } + return tables; + } + + async generateObjectDraft( + datasource: string, + remoteName: string, + opts: GenerateDraftOpts = {}, + ): Promise { + const schema = await this.config.introspect(datasource); + const table = this.findTable(schema, remoteName); + if (!table) { + throw new Error( + `Remote table '${remoteName}' not found on datasource '${datasource}'.`, + ); + } + const dialect = schema.dialect as SqlDialect | undefined; + // Derive the remote schema from the matched table's qualified name (the + // caller may pass an unqualified `remoteName`). + const matched = parseQualified(table.name); + const remoteSchema = opts.remoteSchema ?? matched.schema; + const resolvedRemoteName = matched.name; + + const include = opts.includeColumns ? new Set(opts.includeColumns) : undefined; + const exclude = opts.excludeColumns ? new Set(opts.excludeColumns) : new Set(); + const pkOverride = opts.primaryKey ? new Set(opts.primaryKey) : undefined; + + const fields: Record = {}; + const review: ObjectDraft['review'] = []; + + for (const col of table.columns) { + if (include && !include.has(col.name)) continue; + if (exclude.has(col.name)) continue; + + const fieldName = opts.rename?.[col.name] ?? col.name; + const suggested = suggestFieldType(col.type, dialect); + const fieldType: FieldType = suggested ?? 'text'; + if (!suggested) { + review.push({ + column: col.name, + remoteType: col.type, + note: `unrecognised remote type — defaulted to 'text', verify`, + }); + } else if (isCompatible(col.type, fieldType, dialect) === 'lossy') { + review.push({ + column: col.name, + remoteType: col.type, + note: `mapped lossy to '${fieldType}'`, + }); + } + + const isPk = pkOverride ? pkOverride.has(col.name) : col.primaryKey; + fields[fieldName] = isPk ? { type: fieldType, primaryKey: true } : { type: fieldType }; + } + + const name = toObjectName(resolvedRemoteName); + const definition: Record = { + name, + label: toLabel(name), + datasource, + external: { + ...(remoteSchema ? { remoteSchema } : {}), + remoteName: resolvedRemoteName, + }, + fields, + }; + + return { + name, + datasource, + definition, + source: renderObjectSource(definition, fields, review), + review, + }; + } + + async refreshCatalog(datasource: string): Promise { + const schema = await this.config.introspect(datasource); + return { + name: `${datasource}_catalog`, + datasource, + snapshotAt: new Date().toISOString(), + dialect: schema.dialect, + tables: Object.values(schema.tables).map((t) => { + const { schema: s, name } = parseQualified(t.name); + return { + remoteSchema: s, + remoteName: name, + columns: t.columns.map((c) => ({ + name: c.name, + sqlType: c.type, + nullable: c.nullable, + primaryKey: c.primaryKey, + suggestedFieldType: suggestFieldType(c.type, schema.dialect as SqlDialect), + })), + }; + }), + }; + } + + async validateObject(objectName: string): Promise { + const obj = await this.config.getObject(objectName); + if (!obj) { + throw new Error(`Object '${objectName}' not found.`); + } + const datasource = obj.datasource ?? 'default'; + const ds = await this.config.getDatasource(datasource); + + // Not a federated object → nothing to validate. + if (!ds || !ds.schemaMode || ds.schemaMode === 'managed') { + return { ok: true, datasource, object: objectName, diffs: [] }; + } + + const schema = await this.config.introspect(datasource); + const dialect = schema.dialect as SqlDialect | undefined; + const remoteName = obj.external?.remoteName ?? obj.name; + const table = this.findTable(schema, remoteName); + + const diffs: SchemaDiffEntry[] = []; + + if (!table) { + diffs.push({ + kind: 'missing_table', + remoteSchema: obj.external?.remoteSchema, + remoteName, + severity: 'error', + }); + return { ok: false, datasource, object: objectName, diffs }; + } + + const columnsByName = new Map(table.columns.map((c) => [c.name, c])); + const ignore = new Set(obj.external?.ignoreColumns ?? []); + // columnMap is remoteColumn → fieldName; invert for field → remoteColumn. + const fieldToRemote = new Map(); + for (const [remoteCol, fieldName] of Object.entries(obj.external?.columnMap ?? {})) { + fieldToRemote.set(fieldName, remoteCol); + } + + for (const [fieldName, field] of Object.entries(obj.fields ?? {})) { + if (BUILTIN_COLUMNS.has(fieldName)) continue; + const remoteCol = fieldToRemote.get(fieldName) ?? fieldName; + if (ignore.has(remoteCol)) continue; + + const col = columnsByName.get(remoteCol); + if (!col) { + diffs.push({ + kind: 'missing_column', + remoteName, + column: remoteCol, + severity: 'error', + }); + continue; + } + const fieldType = (field.type ?? 'text') as FieldType; + const compat = isCompatible(col.type, fieldType, dialect); + if (compat === false) { + diffs.push({ + kind: 'type_mismatch', + remoteName, + column: remoteCol, + expected: fieldType, + actual: col.type, + severity: 'error', + }); + } else if (compat === 'lossy') { + diffs.push({ + kind: 'type_mismatch', + remoteName, + column: remoteCol, + expected: fieldType, + actual: col.type, + severity: 'warning', + }); + } + } + + const ok = !diffs.some((d) => d.severity === 'error'); + return { ok, datasource, object: objectName, diffs }; + } + + async validateAll(): Promise { + const objects = await this.config.listObjects(); + const federated = objects.filter( + (o) => o.external !== undefined || (o.datasource && o.datasource !== 'default'), + ); + + const results = await Promise.all( + federated.map((o) => + this.validateObject(o.name).catch((err): SchemaValidationResult => { + this.logger?.warn(`validateObject('${o.name}') failed`, err); + return { + ok: false, + datasource: o.datasource ?? 'default', + object: o.name, + diffs: [ + { + kind: 'missing_table', + remoteName: o.external?.remoteName ?? o.name, + actual: err instanceof Error ? err.message : String(err), + severity: 'error', + }, + ], + }; + }), + ), + ); + + const ok = results.every((r) => r.ok); + return { ok, results }; + } +} + +/** Render a reviewable `*.object.ts` source string for an object draft. */ +function renderObjectSource( + definition: Record, + fields: Record, + review: ObjectDraft['review'], +): string { + const reviewByColumn = new Map(review.map((r) => [r.column, r.note])); + const external = definition.external as { remoteSchema?: string; remoteName?: string }; + + const fieldLines = Object.entries(fields).map(([fieldName, f]) => { + const note = reviewByColumn.get(fieldName); + const pk = f.primaryKey ? ', primaryKey: true' : ''; + const comment = note ? ` // REVIEW: ${note}` : ''; + return ` ${fieldName}: { type: '${f.type}'${pk} },${comment}`; + }); + + const externalLine = external.remoteSchema + ? ` external: { remoteSchema: '${external.remoteSchema}', remoteName: '${external.remoteName}' },` + : ` external: { remoteName: '${external.remoteName}' },`; + + return [ + `// Generated by \`os datasource introspect\` (ADR-0015). Review before committing.`, + `import type { ServiceObjectInput } from '@objectstack/spec/data';`, + ``, + `const ${definition.name as string}: ServiceObjectInput = {`, + ` name: '${definition.name as string}',`, + ` label: '${definition.label as string}',`, + ` datasource: '${definition.datasource as string}',`, + externalLine, + ` fields: {`, + ...fieldLines, + ` },`, + `};`, + ``, + `export default ${definition.name as string};`, + ``, + ].join('\n'); +} diff --git a/packages/services/service-external-datasource/src/index.ts b/packages/services/service-external-datasource/src/index.ts new file mode 100644 index 000000000..0f38672f4 --- /dev/null +++ b/packages/services/service-external-datasource/src/index.ts @@ -0,0 +1,14 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +// Core service +export { ExternalDatasourceService } from './external-datasource-service.js'; +export type { + ExternalDatasourceServiceConfig, + DatasourceLike, + ObjectLike, + Logger, +} from './external-datasource-service.js'; + +// Kernel plugin +export { ExternalDatasourceServicePlugin } from './plugin.js'; +export type { ExternalDatasourceServicePluginOptions } from './plugin.js'; diff --git a/packages/services/service-external-datasource/src/plugin.ts b/packages/services/service-external-datasource/src/plugin.ts new file mode 100644 index 000000000..54be8f431 --- /dev/null +++ b/packages/services/service-external-datasource/src/plugin.ts @@ -0,0 +1,103 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import type { Plugin, PluginContext } from '@objectstack/core'; +import type { IntrospectedSchema } from '@objectstack/spec/contracts'; +import { + ExternalDatasourceService, + type ExternalDatasourceServiceConfig, + type DatasourceLike, + type ObjectLike, + type Logger, +} from './external-datasource-service.js'; + +/** + * Minimal surfaces the plugin needs from the data engine + metadata service. + * Kept structural so the plugin doesn't hard-depend on concrete classes. + */ +interface DataEngineLike { + /** Resolve a driver by datasource name and introspect its live schema. */ + introspectDatasource?: (datasource: string) => Promise; + getDatasourceDriver?: (datasource: string) => { introspectSchema?: () => Promise } | undefined; +} + +interface MetadataServiceLike { + get: (type: string, name: string) => Promise; + getObject?: (name: string) => Promise; + listObjects?: () => Promise; + list?: (type: string) => Promise; +} + +export interface ExternalDatasourceServicePluginOptions { + /** Override the introspection function (mainly for tests). */ + introspect?: (datasource: string) => Promise; + logger?: Logger; +} + +/** + * ExternalDatasourceServicePlugin — registers `IExternalDatasourceService` + * into the kernel as the `'external-datasource'` service (ADR-0015 §6.1). + * + * It bridges the decoupled {@link ExternalDatasourceService} to the live + * `IDataEngine` (for driver introspection) and `IMetadataService` (for object + * + datasource reads). + */ +export class ExternalDatasourceServicePlugin implements Plugin { + name = 'com.objectstack.service-external-datasource'; + version = '1.0.0'; + type = 'standard' as const; + dependencies: string[] = []; + + private service?: ExternalDatasourceService; + private readonly options: ExternalDatasourceServicePluginOptions; + + constructor(options: ExternalDatasourceServicePluginOptions = {}) { + this.options = options; + } + + async init(ctx: PluginContext): Promise { + const engine = safeGetService(ctx, 'data'); + const metadata = safeGetService(ctx, 'metadata'); + + const introspect: ExternalDatasourceServiceConfig['introspect'] = + this.options.introspect ?? + (async (datasource: string) => { + if (engine?.introspectDatasource) return engine.introspectDatasource(datasource); + const driver = engine?.getDatasourceDriver?.(datasource); + if (driver?.introspectSchema) return driver.introspectSchema(); + throw new Error( + `Cannot introspect datasource '${datasource}': no driver introspection available.`, + ); + }); + + const config: ExternalDatasourceServiceConfig = { + introspect, + getDatasource: async (n) => (await metadata?.get('datasource', n)) as DatasourceLike | undefined, + getObject: async (n) => + (metadata?.getObject ? await metadata.getObject(n) : await metadata?.get('object', n)) as ObjectLike | undefined, + listObjects: async () => + ((metadata?.listObjects + ? await metadata.listObjects() + : await metadata?.list?.('object')) ?? []) as ObjectLike[], + logger: this.options.logger, + }; + + this.service = new ExternalDatasourceService(config); + ctx.registerService('external-datasource', this.service); + } + + async start(ctx: PluginContext): Promise { + if (this.service) await ctx.trigger('external-datasource:ready', this.service); + } + + async destroy(): Promise { + this.service = undefined; + } +} + +function safeGetService(ctx: PluginContext, name: string): T | undefined { + try { + return ctx.getService(name); + } catch { + return undefined; + } +} diff --git a/packages/services/service-external-datasource/tsconfig.json b/packages/services/service-external-datasource/tsconfig.json new file mode 100644 index 000000000..0b8b99d88 --- /dev/null +++ b/packages/services/service-external-datasource/tsconfig.json @@ -0,0 +1,17 @@ +{ + "extends": "../../../tsconfig.json", + "compilerOptions": { + "outDir": "dist", + "rootDir": "src", + "types": [ + "node" + ] + }, + "include": [ + "src" + ], + "exclude": [ + "node_modules", + "dist" + ] +} diff --git a/packages/spec/src/contracts/external-datasource-service.ts b/packages/spec/src/contracts/external-datasource-service.ts new file mode 100644 index 000000000..b964ffdd2 --- /dev/null +++ b/packages/spec/src/contracts/external-datasource-service.ts @@ -0,0 +1,117 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * IExternalDatasourceService — External Datasource Federation contract + * (ADR-0015 §4.5, §6). + * + * The service that turns a federated datasource (`schemaMode !== 'managed'`) + * into something the rest of ObjectStack can use: it lists remote tables, + * drafts `Object` definitions from them, and validates declared objects + * against the live remote schema. Implemented in + * `@objectstack/service-external-datasource` on top of the driver's + * introspection capability; consumed by the CLI, the boot-validation plugin + * (Gate 2), and the REST layer. + */ + +import type { SchemaDiffEntry } from '../shared/external-errors'; + +/** + * A remote table discovered via introspection, filtered by the datasource's + * `external.allowedSchemas`. + */ +export interface RemoteTable { + /** Remote schema/database qualifier, when the dialect has one. */ + schema?: string; + /** Remote table/view name. */ + name: string; + /** Number of columns. */ + columnCount: number; + /** Approximate row count, when the driver can supply it cheaply. */ + rowCountEstimate?: number; +} + +/** + * Options controlling how a remote table is turned into an `Object` draft. + */ +export interface GenerateDraftOpts { + /** Restrict to a single remote schema. */ + remoteSchema?: string; + /** Remote column → local field name overrides. */ + rename?: Record; + /** Override primary-key detection. */ + primaryKey?: string[]; + /** Only include these remote columns. */ + includeColumns?: string[]; + /** Exclude these remote columns. */ + excludeColumns?: string[]; +} + +/** + * A generated `Object` draft: both the structured definition and a ready-to- + * write `*.object.ts` source string (with `// REVIEW:` markers on lossy + * column mappings). + */ +export interface ObjectDraft { + /** Suggested object name (snake_case), derived from the remote table. */ + name: string; + /** The datasource this object is bound to. */ + datasource: string; + /** The structured object definition (parseable by `ObjectSchema`). */ + definition: Record; + /** Rendered TypeScript source for an `*.object.ts` file. */ + source: string; + /** Columns whose mapping is lossy or unknown, surfaced for human review. */ + review: Array<{ column: string; remoteType: string; note: string }>; +} + +/** Per-object validation outcome. */ +export interface SchemaValidationResult { + ok: boolean; + datasource: string; + object: string; + diffs: SchemaDiffEntry[]; +} + +/** Aggregate validation outcome across many federated objects. */ +export interface SchemaValidationReport { + ok: boolean; + results: SchemaValidationResult[]; +} + +/** + * External datasource service contract. + * + * All methods are keyed by ObjectStack identifiers (`datasource` / object + * `name`), never by live connections — credential resolution and driver + * acquisition are the implementation's concern. + */ +export interface IExternalDatasourceService { + /** + * List remote tables on a federated datasource, filtered by + * `external.allowedSchemas`. + */ + listRemoteTables(datasource: string, opts?: { schema?: string }): Promise; + + /** + * Generate an `Object` draft (structured + `*.object.ts` source) from a + * remote table, using the type-compat matrix to map columns to field types. + */ + generateObjectDraft( + datasource: string, + remoteName: string, + opts?: GenerateDraftOpts, + ): Promise; + + /** + * Refresh and persist the cached remote schema snapshot + * (`external_catalog`). Returns the snapshot. (Persistence lands with the + * `external_catalog` metadata type.) + */ + refreshCatalog(datasource: string): Promise; + + /** Validate one federated object against the live remote table. */ + validateObject(objectName: string): Promise; + + /** Validate every federated object, parallelised per datasource. */ + validateAll(): Promise; +} diff --git a/packages/spec/src/contracts/index.ts b/packages/spec/src/contracts/index.ts index e149552db..0448fc76c 100644 --- a/packages/spec/src/contracts/index.ts +++ b/packages/spec/src/contracts/index.ts @@ -48,6 +48,7 @@ export * from './embedder.js'; // Provisioning & Deployment export * from './provisioning-service.js'; export * from './schema-diff-service.js'; +export * from './external-datasource-service.js'; export * from './deploy-pipeline-service.js'; export * from './tenant-router.js'; export * from './app-lifecycle-service.js'; diff --git a/packages/spec/src/data/datasource.test.ts b/packages/spec/src/data/datasource.test.ts index 2993a4b25..72d1265d4 100644 --- a/packages/spec/src/data/datasource.test.ts +++ b/packages/spec/src/data/datasource.test.ts @@ -4,6 +4,8 @@ import { DatasourceCapabilities, DriverDefinitionSchema, DriverType, + SchemaModeSchema, + ExternalDatasourceSettingsSchema, type Datasource, type DatasourceCapabilitiesType, } from './datasource.zod'; @@ -474,3 +476,93 @@ describe('DatasourceSchema - ssl', () => { expect(result.ssl).toBeUndefined(); }); }); + +describe('SchemaMode & External Federation (ADR-0015)', () => { + it('should default schemaMode to "managed" with no external block', () => { + const ds = DatasourceSchema.parse({ + name: 'default', + driver: 'postgres', + config: {}, + }); + expect(ds.schemaMode).toBe('managed'); + expect(ds.external).toBeUndefined(); + }); + + it('should accept the three valid schema modes', () => { + expect(() => SchemaModeSchema.parse('managed')).not.toThrow(); + expect(() => SchemaModeSchema.parse('external')).not.toThrow(); + expect(() => SchemaModeSchema.parse('validate-only')).not.toThrow(); + }); + + it('should reject an unknown schema mode', () => { + expect(() => SchemaModeSchema.parse('replica')).toThrow(); + }); + + it('should accept schemaMode="external" with an external block and apply defaults', () => { + const ds = DatasourceSchema.parse({ + name: 'warehouse', + driver: 'postgres', + config: { connectionString: 'postgres://...' }, + schemaMode: 'external', + external: { label: 'Analytics Warehouse' }, + }); + expect(ds.schemaMode).toBe('external'); + // ExternalDatasourceSettingsSchema defaults + expect(ds.external?.allowWrites).toBe(false); + expect(ds.external?.queryTimeoutMs).toBe(30_000); + expect(ds.external?.validation.onMismatch).toBe('fail'); + expect(ds.external?.validation.checkOnBoot).toBe(true); + }); + + it('should require external settings when schemaMode != "managed"', () => { + const result = DatasourceSchema.safeParse({ + name: 'warehouse', + driver: 'postgres', + config: {}, + schemaMode: 'external', + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some((i) => i.path.includes('external'))).toBe(true); + } + }); + + it('should forbid external settings when schemaMode === "managed"', () => { + const result = DatasourceSchema.safeParse({ + name: 'default', + driver: 'postgres', + config: {}, + schemaMode: 'managed', + external: { allowWrites: true }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some((i) => i.path.includes('external'))).toBe(true); + } + }); + + it('should require external settings for validate-only mode too', () => { + const result = DatasourceSchema.safeParse({ + name: 'warehouse', + driver: 'postgres', + config: {}, + schemaMode: 'validate-only', + }); + expect(result.success).toBe(false); + }); + + it('should parse a fully-specified external settings block', () => { + const settings = ExternalDatasourceSettingsSchema.parse({ + label: 'Snowflake — ANALYTICS / PROD', + allowedSchemas: ['public', 'mart'], + allowWrites: true, + validation: { onMismatch: 'warn', checkOnBoot: false, checkIntervalMs: 60_000 }, + credentialsRef: 'secret:warehouse/readonly', + queryTimeoutMs: 15_000, + requirePermission: 'analytics_admin', + }); + expect(settings.allowedSchemas).toEqual(['public', 'mart']); + expect(settings.validation.onMismatch).toBe('warn'); + expect(settings.validation.checkIntervalMs).toBe(60_000); + }); +}); diff --git a/packages/spec/src/data/datasource.zod.ts b/packages/spec/src/data/datasource.zod.ts index 3abfe0d16..b6221a85e 100644 --- a/packages/spec/src/data/datasource.zod.ts +++ b/packages/spec/src/data/datasource.zod.ts @@ -88,6 +88,55 @@ export const DatasourceCapabilities = z.object({ dynamicSchema: z.boolean().default(false), }); +/** + * Schema Ownership Mode (ADR-0015) + * + * Distinguishes "ObjectStack owns this schema" from "this is somebody + * else's production database — never touch DDL". Gates migrations, + * boot-time validation, and writes. + * + * - `managed` — ObjectStack owns the schema: DDL + migrations allowed. + * - `external` — Mature external DB: DDL forbidden; mismatch fails boot. + * - `validate-only` — Like `external`, but mismatches warn instead of fail. + */ +export const SchemaModeSchema = z + .enum(['managed', 'external', 'validate-only']) + .describe('Schema ownership mode'); + +export type SchemaMode = z.infer; + +/** + * External Datasource Settings (ADR-0015) + * + * Present only when `schemaMode !== 'managed'`. Carries the federation + * policy for a mature external database: write gating, schema whitelist, + * boot/drift validation behaviour, credentials reference, and query caps. + */ +export const ExternalDatasourceSettingsSchema = z.object({ + label: z.string().optional() + .describe('Display label, e.g. "Snowflake — ANALYTICS / PROD"'), + allowedSchemas: z.array(z.string()).optional() + .describe('Whitelist of remote schemas/databases that may be exposed.'), + allowWrites: z.boolean().default(false) + .describe('Global write gate. Individual objects must also opt in via object.external.writable.'), + validation: z.object({ + onMismatch: z.enum(['fail', 'warn', 'ignore']).default('fail') + .describe('What to do when a federated object diverges from the remote table.'), + checkOnBoot: z.boolean().default(true) + .describe('Validate federated objects against the remote schema at boot.'), + checkIntervalMs: z.number().optional() + .describe('Optional background drift-check interval in milliseconds.'), + }).default({ onMismatch: 'fail', checkOnBoot: true }).describe('Boot/drift validation policy'), + credentialsRef: z.string().optional() + .describe('Reference into the secrets store; never inline credentials.'), + queryTimeoutMs: z.number().default(30_000) + .describe('Hard cap on per-query execution time.'), + requirePermission: z.string().optional() + .describe('Optional convenience: gate the entire datasource behind a single role.'), +}).describe('External datasource federation settings (schemaMode != "managed")'); + +export type ExternalDatasourceSettings = z.infer; + /** * Datasource Schema * Represents a connection to an external data store. @@ -162,6 +211,34 @@ export const DatasourceSchema = lazySchema(() => z.object({ /** Is enabled? */ active: z.boolean().default(true).describe('Is datasource enabled'), + + /** + * Schema Ownership Mode (ADR-0015) + * Declares whether ObjectStack owns this schema (`managed`, default) or + * is a guest in a mature external database (`external` / `validate-only`). + */ + schemaMode: SchemaModeSchema.default('managed'), + + /** + * External Federation Settings (ADR-0015) + * Required when `schemaMode !== 'managed'`; forbidden otherwise. + */ + external: ExternalDatasourceSettingsSchema.optional(), +}).superRefine((ds, ctx) => { + if (ds.schemaMode !== 'managed' && !ds.external) { + ctx.addIssue({ + code: 'custom', + path: ['external'], + message: `schemaMode='${ds.schemaMode}' requires 'external' settings.`, + }); + } + if (ds.schemaMode === 'managed' && ds.external) { + ctx.addIssue({ + code: 'custom', + path: ['external'], + message: `'external' settings only apply when schemaMode != 'managed'.`, + }); + } })); export type Datasource = z.infer; diff --git a/packages/spec/src/data/external-catalog.test.ts b/packages/spec/src/data/external-catalog.test.ts new file mode 100644 index 000000000..aedc8165d --- /dev/null +++ b/packages/spec/src/data/external-catalog.test.ts @@ -0,0 +1,65 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { + ExternalCatalogSchema, + ExternalTableSchema, + ExternalColumnSchema, +} from './external-catalog.zod'; + +describe('ExternalCatalog (ADR-0015 §4.3)', () => { + it('parses a full catalog snapshot', () => { + const catalog = ExternalCatalogSchema.parse({ + name: 'warehouse_catalog', + datasource: 'warehouse', + snapshotAt: '2026-05-30T00:00:00.000Z', + dialect: 'postgres', + tables: [ + { + remoteSchema: 'mart', + remoteName: 'fact_orders', + columns: [ + { name: 'order_id', sqlType: 'text', nullable: false, primaryKey: true, suggestedFieldType: 'text' }, + { name: 'amount', sqlType: 'numeric(10,2)', nullable: true }, + ], + rowCountEstimate: 12_400_000, + }, + ], + }); + expect(catalog.name).toBe('warehouse_catalog'); + expect(catalog.tables[0].columns[0].primaryKey).toBe(true); + // primaryKey defaults to false when omitted. + expect(catalog.tables[0].columns[1].primaryKey).toBe(false); + }); + + it('rejects a non-snake_case catalog name', () => { + expect(() => + ExternalCatalogSchema.parse({ + name: 'Warehouse-Catalog', + datasource: 'warehouse', + snapshotAt: '2026-05-30T00:00:00.000Z', + tables: [], + }), + ).toThrow(); + }); + + it('rejects a non-datetime snapshotAt', () => { + expect(() => + ExternalCatalogSchema.parse({ + name: 'warehouse_catalog', + datasource: 'warehouse', + snapshotAt: 'today', + tables: [], + }), + ).toThrow(); + }); + + it('column + table sub-schemas validate independently', () => { + expect(() => + ExternalColumnSchema.parse({ name: 'x', sqlType: 'text', nullable: true }), + ).not.toThrow(); + expect(() => + ExternalTableSchema.parse({ remoteName: 't', columns: [] }), + ).not.toThrow(); + }); +}); diff --git a/packages/spec/src/data/external-catalog.zod.ts b/packages/spec/src/data/external-catalog.zod.ts new file mode 100644 index 000000000..d7a0711a7 --- /dev/null +++ b/packages/spec/src/data/external-catalog.zod.ts @@ -0,0 +1,57 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * ExternalCatalog — cached remote-schema snapshot for a federated datasource + * (ADR-0015 §4.3). + * + * Introspecting a mature warehouse on every boot is expensive, so the + * `IExternalDatasourceService.refreshCatalog` persists a snapshot of the + * remote tables/columns as an `external_catalog` metadata record. The + * boot-validation gate (Gate 2) and Studio's schema browser read from it; + * drift is detected by diffing a fresh introspection against the snapshot. + */ + +import { z } from 'zod'; +import { lazySchema } from '../shared/lazy-schema'; + +/** A single remote column captured in a catalog snapshot. */ +export const ExternalColumnSchema = z.object({ + name: z.string().describe('Remote column name'), + sqlType: z.string().describe('Raw remote SQL type (e.g. "numeric(10,2)")'), + nullable: z.boolean().describe('Whether the remote column is nullable'), + primaryKey: z.boolean().default(false).describe('Part of the remote primary key'), + suggestedFieldType: z.string().optional() + .describe('ObjectStack field type suggested by the type-compat matrix'), +}); + +export type ExternalColumn = z.infer; + +/** A single remote table/view captured in a catalog snapshot. */ +export const ExternalTableSchema = z.object({ + remoteSchema: z.string().optional().describe('Remote schema/database qualifier'), + remoteName: z.string().describe('Remote table/view name'), + columns: z.array(ExternalColumnSchema).describe('Remote columns'), + indexes: z.array(z.object({ + name: z.string(), + columns: z.array(z.string()), + unique: z.boolean(), + })).optional().describe('Remote indexes, when introspectable'), + rowCountEstimate: z.number().optional().describe('Approximate row count'), +}); + +export type ExternalTable = z.infer; + +/** + * The persisted snapshot of a federated datasource's remote schema. + * Conventionally named `_catalog`. + */ +export const ExternalCatalogSchema = lazySchema(() => z.object({ + name: z.string().regex(/^[a-z_][a-z0-9_]*$/) + .describe('Catalog id, conventionally `_catalog`.'), + datasource: z.string().describe('Datasource.name this catalog snapshots.'), + snapshotAt: z.string().datetime().describe('When the snapshot was taken (ISO 8601).'), + dialect: z.string().optional().describe('Remote SQL dialect, when known.'), + tables: z.array(ExternalTableSchema).describe('Snapshotted remote tables.'), +})); + +export type ExternalCatalog = z.infer; diff --git a/packages/spec/src/data/index.ts b/packages/spec/src/data/index.ts index 76ef6629f..76eb8e4e5 100644 --- a/packages/spec/src/data/index.ts +++ b/packages/spec/src/data/index.ts @@ -31,6 +31,10 @@ export * from './document.zod'; export * from './external-lookup.zod'; export * from './datasource.zod'; +// External Datasource Federation — SQL↔field type compatibility (ADR-0015) +export * from './type-compat'; +export * from './external-catalog.zod'; + // Analytics Protocol (Semantic Layer) export * from './analytics.zod'; diff --git a/packages/spec/src/data/object.test.ts b/packages/spec/src/data/object.test.ts index f8ba1cafb..6b3830e5b 100644 --- a/packages/spec/src/data/object.test.ts +++ b/packages/spec/src/data/object.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from 'vitest'; -import { ObjectSchema, ObjectCapabilities, IndexSchema, ObjectFieldGroupSchema, type ServiceObject } from './object.zod'; +import { ObjectSchema, ObjectCapabilities, IndexSchema, ObjectFieldGroupSchema, ObjectExternalBindingSchema, type ServiceObject } from './object.zod'; describe('ObjectCapabilities', () => { it('should apply default values correctly', () => { @@ -896,4 +896,43 @@ describe('ObjectSchema.fieldGroups', () => { { key: 'workflow', label: 'Workflow', icon: 'workflow', defaultExpanded: true }, ]); }); + + describe('External Binding (ADR-0015)', () => { + it('should leave external undefined by default', () => { + const obj = ObjectSchema.parse({ name: 'account', fields: {} }); + expect(obj.external).toBeUndefined(); + }); + + it('should accept a minimal external binding and default writable to false', () => { + const obj = ObjectSchema.parse({ + name: 'wh_order', + datasource: 'warehouse', + external: { remoteSchema: 'mart', remoteName: 'fact_orders' }, + fields: { order_id: { type: 'text' } }, + }); + expect(obj.external?.remoteSchema).toBe('mart'); + expect(obj.external?.remoteName).toBe('fact_orders'); + expect(obj.external?.writable).toBe(false); + }); + + it('should accept a full external binding with column map and opt-in write', () => { + const binding = ObjectExternalBindingSchema.parse({ + remoteName: 'fact_orders', + remoteSchema: 'mart', + writable: true, + columnMap: { ORDER_ID: 'order_id', CUST_ID: 'customer_id' }, + introspectedAt: '2026-05-30T00:00:00.000Z', + ignoreColumns: ['_etl_loaded_at'], + }); + expect(binding.writable).toBe(true); + expect(binding.columnMap?.ORDER_ID).toBe('order_id'); + expect(binding.ignoreColumns).toEqual(['_etl_loaded_at']); + }); + + it('should reject a non-datetime introspectedAt', () => { + expect(() => + ObjectExternalBindingSchema.parse({ introspectedAt: 'yesterday' }), + ).toThrow(); + }); + }); }); diff --git a/packages/spec/src/data/object.zod.ts b/packages/spec/src/data/object.zod.ts index 0f5f7d546..b9dca1643 100644 --- a/packages/spec/src/data/object.zod.ts +++ b/packages/spec/src/data/object.zod.ts @@ -301,6 +301,37 @@ export type ObjectFieldGroupInput = z.input; * files: true * ``` */ + +/** + * External Binding (ADR-0015) + * + * Optional per-object descriptor that binds this object to a remote table + * on a federated datasource (one whose `schemaMode !== 'managed'`). When + * present, the object is "external": DDL is forbidden, the table is + * validated against the remote schema at boot, and writes require a double + * opt-in (`datasource.external.allowWrites` **and** this `writable`). + * + * The cross-field invariant ("`external` only when the object's datasource + * has `schemaMode !== 'managed'`") is enforced at metadata-load time, not + * in this schema, because the datasource may live in another artefact. + */ +export const ObjectExternalBindingSchema = z.object({ + remoteName: z.string().optional() + .describe('Remote table/view name. Defaults to object.name.'), + remoteSchema: z.string().optional() + .describe('Remote schema/database qualifier.'), + writable: z.boolean().default(false) + .describe('Per-object write opt-in (also requires datasource.external.allowWrites).'), + columnMap: z.record(z.string(), z.string()).optional() + .describe('Remote column name → local field name.'), + introspectedAt: z.string().datetime().optional() + .describe('Set by `os datasource introspect`; informational.'), + ignoreColumns: z.array(z.string()).optional() + .describe('Remote columns to skip during validation (dev convenience).'), +}).describe('External datasource binding (ADR-0015)'); + +export type ObjectExternalBinding = z.infer; + const ObjectSchemaBase = z.object({ /** * Identity & Metadata @@ -433,9 +464,18 @@ const ObjectSchemaBase = z.object({ */ datasource: z.string().optional().default('default').describe('Target Datasource ID. "default" is the primary DB.'), + /** + * External Binding (ADR-0015) + * Present only for federated objects routed to a datasource whose + * `schemaMode !== 'managed'`. Describes the remote table binding and + * per-object writability. See {@link ObjectExternalBindingSchema}. + */ + external: ObjectExternalBindingSchema.optional() + .describe('Remote table binding for federated (external) objects.'), - /** - * Data Model + + /** + * Data Model */ fields: z.record(z.string().regex(/^[a-z_][a-z0-9_]*$/, { message: 'Field names must be lowercase snake_case (e.g., "first_name", "company", "annual_revenue")', diff --git a/packages/spec/src/data/type-compat.test.ts b/packages/spec/src/data/type-compat.test.ts new file mode 100644 index 000000000..7a0209f45 --- /dev/null +++ b/packages/spec/src/data/type-compat.test.ts @@ -0,0 +1,92 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { + canonicalizeSqlType, + suggestFieldType, + isCompatible, +} from './type-compat'; + +describe('canonicalizeSqlType (ADR-0015 §4.6)', () => { + it('strips length/precision parameters', () => { + expect(canonicalizeSqlType('varchar(255)')).toBe('text'); + expect(canonicalizeSqlType('numeric(10,2)')).toBe('decimal'); + expect(canonicalizeSqlType('char(1)')).toBe('text'); + }); + + it('normalises timezone qualifiers', () => { + expect(canonicalizeSqlType('timestamp without time zone')).toBe('datetime'); + expect(canonicalizeSqlType('timestamp with time zone')).toBe('datetime'); + }); + + it('detects array notation', () => { + expect(canonicalizeSqlType('text[]')).toBe('array'); + expect(canonicalizeSqlType('_int4')).toBe('array'); + }); + + it('applies postgres dialect aliases', () => { + expect(canonicalizeSqlType('jsonb', 'postgres')).toBe('json'); + expect(canonicalizeSqlType('timestamptz', 'postgres')).toBe('datetime'); + expect(canonicalizeSqlType('int8', 'postgres')).toBe('bigint'); + expect(canonicalizeSqlType('bool', 'postgres')).toBe('boolean'); + }); + + it('applies snowflake/bigquery/mongo aliases', () => { + expect(canonicalizeSqlType('NUMBER', 'snowflake')).toBe('decimal'); + expect(canonicalizeSqlType('VARIANT', 'snowflake')).toBe('json'); + expect(canonicalizeSqlType('INT64', 'bigquery')).toBe('bigint'); + expect(canonicalizeSqlType('STRING', 'bigquery')).toBe('text'); + expect(canonicalizeSqlType('objectId', 'mongo')).toBe('text'); + }); + + it('falls back to unknown for unrecognised types', () => { + expect(canonicalizeSqlType('geography')).toBe('unknown'); + expect(canonicalizeSqlType('')).toBe('unknown'); + }); +}); + +describe('suggestFieldType', () => { + it('suggests sensible defaults per canonical type', () => { + expect(suggestFieldType('varchar(255)')).toBe('text'); + expect(suggestFieldType('integer')).toBe('number'); + expect(suggestFieldType('numeric(10,2)')).toBe('number'); + expect(suggestFieldType('boolean')).toBe('boolean'); + expect(suggestFieldType('timestamptz', 'postgres')).toBe('datetime'); + expect(suggestFieldType('date')).toBe('date'); + expect(suggestFieldType('jsonb', 'postgres')).toBe('json'); + expect(suggestFieldType('vector', 'postgres')).toBe('vector'); + }); + + it('returns undefined for unknown types', () => { + expect(suggestFieldType('geometry')).toBeUndefined(); + }); +}); + +describe('isCompatible', () => { + it('returns true for exact mappings', () => { + expect(isCompatible('varchar(255)', 'text')).toBe(true); + expect(isCompatible('integer', 'number')).toBe(true); + expect(isCompatible('boolean', 'toggle')).toBe(true); + expect(isCompatible('timestamptz', 'datetime', 'postgres')).toBe(true); + expect(isCompatible('numeric(10,2)', 'currency')).toBe(true); + expect(isCompatible('jsonb', 'json', 'postgres')).toBe(true); + }); + + it('returns "lossy" for usable-but-imperfect mappings', () => { + expect(isCompatible('jsonb', 'text', 'postgres')).toBe('lossy'); + expect(isCompatible('date', 'datetime')).toBe('lossy'); + expect(isCompatible('integer', 'currency')).toBe('lossy'); + }); + + it('returns false for incompatible mappings', () => { + expect(isCompatible('integer', 'datetime')).toBe(false); + expect(isCompatible('boolean', 'json')).toBe(false); + expect(isCompatible('varchar(255)', 'number')).toBe(false); + }); + + it('treats unknown remote types as lossy only against text/json', () => { + expect(isCompatible('geometry', 'text')).toBe('lossy'); + expect(isCompatible('geometry', 'json')).toBe('lossy'); + expect(isCompatible('geometry', 'number')).toBe(false); + }); +}); diff --git a/packages/spec/src/data/type-compat.ts b/packages/spec/src/data/type-compat.ts new file mode 100644 index 000000000..be6a2c645 --- /dev/null +++ b/packages/spec/src/data/type-compat.ts @@ -0,0 +1,220 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * SQL ⇆ ObjectStack field-type compatibility matrix (ADR-0015 §4.6). + * + * A pure, dialect-aware module mapping remote SQL column types + * (`text`, `varchar(255)`, `numeric(10,2)`, `timestamptz`, `jsonb`, …) to + * ObjectStack field types, and answering "can this remote column back this + * field type?". Used by `IExternalDatasourceService`: + * + * - `generateObjectDraft` → {@link suggestFieldType} to draft `*.object.ts`. + * - `validateObject` → {@link isCompatible} to diff a declared field + * against the remote column. + * + * No I/O, no driver coupling — operates on raw type strings so it can be + * unit-tested independently and extended per dialect without touching the + * runtime. + */ + +import type { FieldType } from './field.zod'; + +/** SQL dialects whose type vocabularies the matrix understands. */ +export type SqlDialect = + | 'postgres' + | 'mysql' + | 'sqlite' + | 'snowflake' + | 'bigquery' + | 'mongo'; + +/** + * Result of a compatibility check: + * - `true` — exact / safe mapping. + * - `'lossy'`— usable but information may be lost (e.g. `jsonb` → `text`, + * `numeric(38,0)` → JS `number`). Generated drafts flag these `// REVIEW:`. + * - `false` — incompatible; the validator emits a `type_mismatch` diff. + */ +export type Compatibility = boolean | 'lossy'; + +/** + * Canonical "base" SQL type after stripping parameters/qualifiers and + * applying dialect aliases. Internal vocabulary the matrix is keyed on. + */ +type CanonicalSqlType = + | 'text' + | 'integer' + | 'bigint' + | 'decimal' + | 'float' + | 'boolean' + | 'date' + | 'time' + | 'datetime' + | 'json' + | 'uuid' + | 'binary' + | 'enum' + | 'array' + | 'vector' + | 'unknown'; + +/** + * Per-dialect aliases mapping vendor type names to a {@link CanonicalSqlType}. + * Only entries that differ from the shared base map (below) need listing. + */ +const DIALECT_ALIASES: Partial>> = { + postgres: { + int2: 'integer', int4: 'integer', int8: 'bigint', serial: 'integer', bigserial: 'bigint', + float4: 'float', float8: 'float', numeric: 'decimal', bool: 'boolean', + timestamptz: 'datetime', timestamp: 'datetime', timetz: 'time', + jsonb: 'json', json: 'json', uuid: 'uuid', bytea: 'binary', bpchar: 'text', + citext: 'text', vector: 'vector', + }, + mysql: { + tinyint: 'integer', smallint: 'integer', mediumint: 'integer', int: 'integer', + bigint: 'bigint', double: 'float', real: 'float', decimal: 'decimal', + datetime: 'datetime', timestamp: 'datetime', tinytext: 'text', mediumtext: 'text', + longtext: 'text', json: 'json', blob: 'binary', longblob: 'binary', enum: 'enum', + }, + sqlite: { + integer: 'integer', int: 'integer', real: 'float', numeric: 'decimal', + text: 'text', blob: 'binary', + }, + snowflake: { + number: 'decimal', int: 'integer', integer: 'integer', bigint: 'bigint', + float: 'float', double: 'float', string: 'text', varchar: 'text', variant: 'json', + object: 'json', array: 'array', boolean: 'boolean', timestamp_ntz: 'datetime', + timestamp_tz: 'datetime', timestamp_ltz: 'datetime', binary: 'binary', + }, + bigquery: { + int64: 'bigint', float64: 'float', numeric: 'decimal', bignumeric: 'decimal', + string: 'text', bytes: 'binary', bool: 'boolean', boolean: 'boolean', + timestamp: 'datetime', datetime: 'datetime', struct: 'json', json: 'json', + array: 'array', record: 'json', + }, + mongo: { + objectid: 'text', string: 'text', double: 'float', int: 'integer', long: 'bigint', + decimal: 'decimal', bool: 'boolean', boolean: 'boolean', date: 'datetime', + object: 'json', array: 'array', bindata: 'binary', + }, +}; + +/** + * Dialect-agnostic base map of common ANSI/SQL type names to canonical types. + * Consulted after dialect aliases. + */ +const BASE_ALIASES: Record = { + text: 'text', varchar: 'text', char: 'text', character: 'text', string: 'text', + 'character varying': 'text', nvarchar: 'text', nchar: 'text', clob: 'text', + integer: 'integer', int: 'integer', smallint: 'integer', tinyint: 'integer', + bigint: 'bigint', + decimal: 'decimal', numeric: 'decimal', number: 'decimal', money: 'decimal', + float: 'float', double: 'float', 'double precision': 'float', real: 'float', + boolean: 'boolean', bool: 'boolean', bit: 'boolean', + date: 'date', + time: 'time', + datetime: 'datetime', timestamp: 'datetime', + json: 'json', jsonb: 'json', + uuid: 'uuid', guid: 'uuid', + binary: 'binary', varbinary: 'binary', blob: 'binary', bytes: 'binary', + enum: 'enum', + array: 'array', + vector: 'vector', +}; + +/** + * For each canonical SQL type: the suggested ObjectStack field type plus the + * set of field types it is exactly / lossily compatible with. + */ +const CANONICAL_TO_FIELD: Record< + CanonicalSqlType, + { suggested: FieldType; exact: FieldType[]; lossy: FieldType[] } +> = { + text: { suggested: 'text', exact: ['text', 'textarea', 'email', 'url', 'phone', 'markdown', 'html', 'richtext', 'code', 'select', 'color'], lossy: [] }, + integer: { suggested: 'number', exact: ['number', 'autonumber', 'rating', 'percent'], lossy: ['currency', 'boolean'] }, + bigint: { suggested: 'number', exact: ['number', 'autonumber'], lossy: ['currency'] }, + decimal: { suggested: 'number', exact: ['number', 'currency', 'percent'], lossy: ['rating'] }, + float: { suggested: 'number', exact: ['number', 'currency', 'percent', 'slider'], lossy: [] }, + boolean: { suggested: 'boolean', exact: ['boolean', 'toggle'], lossy: [] }, + date: { suggested: 'date', exact: ['date'], lossy: ['datetime'] }, + time: { suggested: 'time', exact: ['time'], lossy: [] }, + datetime: { suggested: 'datetime', exact: ['datetime'], lossy: ['date'] }, + json: { suggested: 'json', exact: ['json', 'composite', 'repeater', 'record', 'location', 'address', 'tags', 'multiselect'], lossy: ['text'] }, + uuid: { suggested: 'text', exact: ['text', 'lookup', 'master_detail'], lossy: [] }, + binary: { suggested: 'file', exact: ['file', 'image', 'signature'], lossy: ['text'] }, + enum: { suggested: 'select', exact: ['select', 'radio', 'text'], lossy: [] }, + array: { suggested: 'multiselect', exact: ['multiselect', 'checkboxes', 'tags', 'json'], lossy: ['text'] }, + vector: { suggested: 'vector', exact: ['vector'], lossy: ['json'] }, + unknown: { suggested: 'text', exact: [], lossy: ['text', 'json'] }, +}; + +/** + * Reduce a raw SQL type string to its canonical form. + * + * Strips length/precision (`varchar(255)` → `varchar`), array suffixes + * (`text[]` → array), and qualifiers (`timestamp without time zone` → + * `timestamp`), then applies dialect aliases, then the base map. + */ +export function canonicalizeSqlType(rawType: string, dialect?: SqlDialect): CanonicalSqlType { + if (!rawType) return 'unknown'; + let t = rawType.trim().toLowerCase(); + + // Postgres / generic array notation: `text[]`, `_int4`. + const isArray = t.endsWith('[]') || t.startsWith('_'); + if (isArray) return 'array'; + + // Drop precision/length: `numeric(10,2)` → `numeric`, `varchar(255)` → + // `varchar`. Linear substring slice (no regex backtracking). + const paren = t.indexOf('('); + if (paren !== -1) t = t.slice(0, paren).trim(); + + // Normalise common trailing qualifiers using literal, anchored suffix + // checks — avoids polynomial-backtracking regexes on uncontrolled input. + for (const suffix of [' without time zone', ' with time zone', ' unsigned', ' signed']) { + if (t.endsWith(suffix)) { + t = t.slice(0, t.length - suffix.length).trim(); + break; + } + } + + // `timestamp with time zone` collapsed to `timestamp` above; `timestamptz` + // handled via alias. + const dialectMap = dialect ? DIALECT_ALIASES[dialect] : undefined; + if (dialectMap && dialectMap[t]) return dialectMap[t]; + if (BASE_ALIASES[t]) return BASE_ALIASES[t]; + + // `timestamp with time zone` may survive as `timestamp` already mapped; + // anything else is unknown. + return 'unknown'; +} + +/** + * The ObjectStack field type best suited to back a given remote SQL column. + * Returns `undefined` only when the type is wholly unrecognised (the caller + * may fall back to `text` and flag it for review). + */ +export function suggestFieldType(rawType: string, dialect?: SqlDialect): FieldType | undefined { + const canonical = canonicalizeSqlType(rawType, dialect); + if (canonical === 'unknown') return undefined; + return CANONICAL_TO_FIELD[canonical].suggested; +} + +/** + * Whether a remote SQL column type can back the given ObjectStack field type. + * + * @returns `true` (exact), `'lossy'` (usable with possible loss), or `false`. + */ +export function isCompatible( + rawType: string, + fieldType: FieldType, + dialect?: SqlDialect, +): Compatibility { + const canonical = canonicalizeSqlType(rawType, dialect); + const entry = CANONICAL_TO_FIELD[canonical]; + if (entry.exact.includes(fieldType)) return true; + if (entry.lossy.includes(fieldType)) return 'lossy'; + // Unknown remote types are permissive-but-lossy against text/json only, + // already encoded in CANONICAL_TO_FIELD.unknown. + return false; +} diff --git a/packages/spec/src/kernel/metadata-plugin.zod.ts b/packages/spec/src/kernel/metadata-plugin.zod.ts index 57043b616..df6b194a9 100644 --- a/packages/spec/src/kernel/metadata-plugin.zod.ts +++ b/packages/spec/src/kernel/metadata-plugin.zod.ts @@ -92,6 +92,7 @@ export const MetadataTypeSchema = lazySchema(() => z.enum([ // System Protocol 'datasource', // Data connections (DatasourceSchema) + 'external_catalog', // Cached remote schema snapshot for federated datasources (ADR-0015) 'translation', // i18n resources (TranslationSchema) 'router', // API routes 'function', // Serverless functions @@ -587,6 +588,7 @@ export const DEFAULT_METADATA_TYPE_REGISTRY: MetadataTypeRegistryEntry[] = [ // System Protocol { type: 'datasource', label: 'Datasource', filePatterns: ['**/*.datasource.ts', '**/*.datasource.yml'], supportsOverlay: false, allowOrgOverride: false, allowRuntimeCreate: false, supportsVersioning: false, executionPinned: false, loadOrder: 5, domain: 'system' }, + { type: 'external_catalog', label: 'External Catalog', filePatterns: ['**/*.external-catalog.ts', '**/*.external-catalog.yml', '**/*.external-catalog.json'], supportsOverlay: false, allowOrgOverride: false, allowRuntimeCreate: true, supportsVersioning: false, executionPinned: false, loadOrder: 6, domain: 'system' }, { type: 'translation', label: 'Translation', filePatterns: ['**/*.translation.ts', '**/*.translation.yml', '**/*.translation.json'], supportsOverlay: true, allowOrgOverride: true, allowRuntimeCreate: true, supportsVersioning: false, executionPinned: false, loadOrder: 90, domain: 'system' }, { type: 'router', label: 'Router', filePatterns: ['**/*.router.ts'], supportsOverlay: false, allowOrgOverride: false, allowRuntimeCreate: false, supportsVersioning: false, executionPinned: false, loadOrder: 40, domain: 'system' }, { type: 'function', label: 'Function', filePatterns: ['**/*.function.ts'], supportsOverlay: false, allowOrgOverride: false, allowRuntimeCreate: false, supportsVersioning: false, executionPinned: false, loadOrder: 40, domain: 'system' }, diff --git a/packages/spec/src/shared/external-errors.test.ts b/packages/spec/src/shared/external-errors.test.ts new file mode 100644 index 000000000..527a3322e --- /dev/null +++ b/packages/spec/src/shared/external-errors.test.ts @@ -0,0 +1,89 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +import { describe, it, expect } from 'vitest'; +import { + EXTERNAL_ERROR_CODES, + renderDiffMessage, + ExternalSchemaMismatchError, + ExternalWriteForbiddenError, + ExternalSchemaModeViolationError, + type SchemaDiffEntry, +} from './external-errors'; + +describe('External error codes (ADR-0015)', () => { + it('exposes stable codes', () => { + expect(EXTERNAL_ERROR_CODES.schemaMismatch).toBe('EXTERNAL_SCHEMA_MISMATCH'); + expect(EXTERNAL_ERROR_CODES.writeForbidden).toBe('EXTERNAL_WRITE_FORBIDDEN'); + expect(EXTERNAL_ERROR_CODES.schemaModeViolation).toBe('EXTERNAL_SCHEMA_MODE_VIOLATION'); + }); +}); + +describe('renderDiffMessage', () => { + it('renders a header with no entries', () => { + const msg = renderDiffMessage('warehouse', 'wh_order', []); + expect(msg).toContain("Object 'wh_order'"); + expect(msg).toContain("datasource 'warehouse'"); + expect(msg.split('\n')).toHaveLength(1); + }); + + it('renders one line per diff entry with type detail', () => { + const diffs: SchemaDiffEntry[] = [ + { kind: 'missing_column', remoteSchema: 'mart', remoteName: 'fact_orders', column: 'amount', severity: 'error' }, + { + kind: 'type_mismatch', + remoteSchema: 'mart', + remoteName: 'fact_orders', + column: 'ordered_at', + expected: 'datetime', + actual: 'text', + severity: 'warning', + }, + ]; + const lines = renderDiffMessage('warehouse', 'wh_order', diffs).split('\n'); + expect(lines).toHaveLength(3); // header + 2 entries + expect(lines[1]).toContain('missing_column'); + expect(lines[1]).toContain('mart.fact_orders.amount'); + expect(lines[2]).toContain('type_mismatch'); + expect(lines[2]).toContain('expected datetime'); + expect(lines[2]).toContain('actual text'); + }); +}); + +describe('ExternalSchemaMismatchError', () => { + it('carries code, datasource, object, diffs and a rendered message', () => { + const diffs: SchemaDiffEntry[] = [ + { kind: 'missing_table', remoteName: 'fact_orders', severity: 'error' }, + ]; + const err = new ExternalSchemaMismatchError('warehouse', 'wh_order', diffs); + expect(err).toBeInstanceOf(Error); + expect(err.code).toBe('EXTERNAL_SCHEMA_MISMATCH'); + expect(err.name).toBe('ExternalSchemaMismatchError'); + expect(err.datasource).toBe('warehouse'); + expect(err.object).toBe('wh_order'); + expect(err.diffs).toBe(diffs); + expect(err.message).toContain('missing_table'); + }); +}); + +describe('ExternalWriteForbiddenError', () => { + it('has a stable code and a default message', () => { + const err = new ExternalWriteForbiddenError(); + expect(err.code).toBe('EXTERNAL_WRITE_FORBIDDEN'); + expect(err.name).toBe('ExternalWriteForbiddenError'); + expect(err.message.length).toBeGreaterThan(0); + }); + + it('accepts a custom message', () => { + const err = new ExternalWriteForbiddenError('nope'); + expect(err.message).toBe('nope'); + }); +}); + +describe('ExternalSchemaModeViolationError', () => { + it('has a stable code and a default message', () => { + const err = new ExternalSchemaModeViolationError(); + expect(err.code).toBe('EXTERNAL_SCHEMA_MODE_VIOLATION'); + expect(err.name).toBe('ExternalSchemaModeViolationError'); + expect(err.message.length).toBeGreaterThan(0); + }); +}); diff --git a/packages/spec/src/shared/external-errors.ts b/packages/spec/src/shared/external-errors.ts new file mode 100644 index 000000000..9f9174de1 --- /dev/null +++ b/packages/spec/src/shared/external-errors.ts @@ -0,0 +1,136 @@ +// Copyright (c) 2025 ObjectStack. Licensed under the Apache-2.0 license. + +/** + * External Datasource Federation — shared error contract. + * + * Implements the error surface of ADR-0015 (External Datasource + * Federation). These are protocol-level error *types* and stable `code` + * strings shared by every layer that enforces the three runtime gates + * (DDL gate, boot-validation gate, write gate). No runtime/business logic + * lives here beyond pure message rendering — consistent with + * `error-map.zod.ts` in the same directory. + * + * @see docs/adr/0015-external-datasource-federation.md §4.4, §5 + */ + +/** + * Stable error codes for the federation gates. Mirrored on each error + * class's `code` field so callers can branch without `instanceof` across + * package boundaries. + */ +export const EXTERNAL_ERROR_CODES = { + schemaMismatch: 'EXTERNAL_SCHEMA_MISMATCH', + writeForbidden: 'EXTERNAL_WRITE_FORBIDDEN', + schemaModeViolation: 'EXTERNAL_SCHEMA_MODE_VIOLATION', +} as const; + +export type ExternalErrorCode = + (typeof EXTERNAL_ERROR_CODES)[keyof typeof EXTERNAL_ERROR_CODES]; + +/** + * The kinds of divergence the schema validator can report between a + * federated `Object` definition and the remote table it binds to. + */ +export type SchemaDiffEntryKind = + | 'missing_table' + | 'missing_column' + | 'type_mismatch' + | 'nullability_mismatch' + | 'unmapped_column' + | 'pk_mismatch'; + +/** + * A single divergence entry. Produced by the validation gate (ADR §5.2) + * and carried by {@link ExternalSchemaMismatchError}. + */ +export interface SchemaDiffEntry { + kind: SchemaDiffEntryKind; + /** Remote schema/database qualifier, when known. */ + remoteSchema?: string; + /** Remote table/view name, when known. */ + remoteName?: string; + /** Affected column/field name, for column-scoped diffs. */ + column?: string; + /** What the local object declared. */ + expected?: string; + /** What the remote table actually has. */ + actual?: string; + severity: 'error' | 'warning'; +} + +/** Human-readable one-line summary of a single diff entry. */ +function renderDiffEntry(entry: SchemaDiffEntry): string { + const where = [entry.remoteSchema, entry.remoteName] + .filter(Boolean) + .join('.'); + const col = entry.column ? `.${entry.column}` : ''; + const detail = + entry.expected !== undefined || entry.actual !== undefined + ? ` (expected ${entry.expected ?? '—'}, actual ${entry.actual ?? '—'})` + : ''; + const mark = entry.severity === 'error' ? '✗' : '⚠'; + return ` ${mark} ${entry.kind}: ${where}${col}${detail}`; +} + +/** + * Render a multi-line, actionable diff message. Kept pure so it can be + * unit-tested independently of the runtime gates. + */ +export function renderDiffMessage( + datasource: string, + object: string, + diffs: SchemaDiffEntry[], +): string { + const header = `Object '${object}' does not match its remote table on datasource '${datasource}':`; + if (diffs.length === 0) return header; + return [header, ...diffs.map(renderDiffEntry)].join('\n'); +} + +/** + * Thrown by the boot-validation gate when a federated object diverges + * from the remote table (ADR §5.2). Carries the structured diff so + * callers (CLI, Studio, audit) can render it however they like. + */ +export class ExternalSchemaMismatchError extends Error { + readonly code = EXTERNAL_ERROR_CODES.schemaMismatch; + + constructor( + readonly datasource: string, + readonly object: string, + readonly diffs: SchemaDiffEntry[], + ) { + super(renderDiffMessage(datasource, object, diffs)); + this.name = 'ExternalSchemaMismatchError'; + } +} + +/** + * Thrown by the write gate when a write is attempted against an external + * datasource without the required double opt-in + * (`datasource.external.allowWrites` **and** `object.external.writable`) + * — ADR §5.3. + */ +export class ExternalWriteForbiddenError extends Error { + readonly code = EXTERNAL_ERROR_CODES.writeForbidden; + + constructor(message = 'Writes are forbidden on this external datasource.') { + super(message); + this.name = 'ExternalWriteForbiddenError'; + } +} + +/** + * Thrown by the DDL gate when schema-mutating DDL (createTable, + * alterTable, dropTable, applyMigrations) is attempted against a + * datasource whose `schemaMode !== 'managed'` — ADR §5.1. + */ +export class ExternalSchemaModeViolationError extends Error { + readonly code = EXTERNAL_ERROR_CODES.schemaModeViolation; + + constructor( + message = 'DDL is forbidden on a non-managed datasource (schemaMode != "managed").', + ) { + super(message); + this.name = 'ExternalSchemaModeViolationError'; + } +} diff --git a/packages/spec/src/shared/index.ts b/packages/spec/src/shared/index.ts index 765229c31..8f01f57da 100644 --- a/packages/spec/src/shared/index.ts +++ b/packages/spec/src/shared/index.ts @@ -13,6 +13,7 @@ export * from './metadata-types.zod'; export * from './branded-types.zod'; export * from './suggestions.zod'; export * from './error-map.zod'; +export * from './external-errors'; export * from './metadata-collection.zod'; export * from './lazy-schema'; export * from './expression.zod'; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4b658d356..a09a3261a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1650,6 +1650,25 @@ importers: specifier: ^4.1.7 version: 4.1.7(@opentelemetry/api@1.9.1)(@types/node@25.9.1)(@vitest/coverage-v8@4.1.7)(happy-dom@20.9.0)(msw@2.14.6(@types/node@25.9.1)(typescript@6.0.3))(vite@8.0.14(@types/node@25.9.1)(esbuild@0.28.0)(jiti@2.7.0)(tsx@4.22.3)(yaml@2.9.0)) + packages/services/service-external-datasource: + dependencies: + '@objectstack/core': + specifier: workspace:* + version: link:../../core + '@objectstack/spec': + specifier: workspace:* + version: link:../../spec + devDependencies: + '@types/node': + specifier: ^25.9.1 + version: 25.9.1 + typescript: + specifier: ^6.0.3 + version: 6.0.3 + vitest: + specifier: ^4.1.7 + version: 4.1.7(@opentelemetry/api@1.9.1)(@types/node@25.9.1)(@vitest/coverage-v8@4.1.7)(happy-dom@20.9.0)(msw@2.14.6(@types/node@25.9.1)(typescript@6.0.3))(vite@8.0.14(@types/node@25.9.1)(esbuild@0.28.0)(jiti@2.7.0)(tsx@4.22.3)(yaml@2.9.0)) + packages/services/service-feed: dependencies: '@objectstack/core':