From c97c5e2dc763295c9c1688fc14c9303b2a17b709 Mon Sep 17 00:00:00 2001 From: Petr Date: Fri, 29 May 2026 17:23:44 +0200 Subject: [PATCH 1/5] feat(headless): token-only invocation via __env__ project (#359) Let a daemon / container / CI run kbagent with only a token in the environment -- no `kbagent project add`, no config.json on disk. Setting KBAGENT_PROJECT_FROM_ENV=1 together with KBC_TOKEN + KBC_STORAGE_API_URL makes ConfigStore synthesize an in-memory project under the reserved alias `__env__`. Because both the CLI and `kbagent serve` resolve projects through the same ConfigStore.load() chokepoint, a single env-injection covers both consumption styles: kbagent --json storage file-upload --project __env__ --file X kbagent serve # POST endpoints take project=__env__ Security: - The `__env__` project is marked `ephemeral` and stripped by ConfigStore.save(), so the env token is never persisted, even when a write op triggers a config.json write. - Opt-in is the explicit flag, not the mere presence of KBC_TOKEN, to avoid a phantom project on a dev machine that exported KBC_TOKEN only for `project add`. - Flag set but credentials missing -> fail fast (exit 5), not a silent skip. Tests: 7 unit (test_config_store.py) + 3 E2E (test_e2e.py). Docs: changelog, keboola-expert.md, gotchas.md, commands-reference.md, context.py AGENT_CONTEXT, CLAUDE.md. Version 0.49.0 -> 0.50.0. --- .claude-plugin/marketplace.json | 2 +- CLAUDE.md | 1 + plugins/kbagent/.claude-plugin/plugin.json | 2 +- plugins/kbagent/agents/keboola-expert.md | 1 + .../kbagent/references/commands-reference.md | 5 +- .../skills/kbagent/references/gotchas.md | 35 ++++++ pyproject.toml | 2 +- src/keboola_agent_cli/changelog.py | 4 + src/keboola_agent_cli/commands/context.py | 7 ++ src/keboola_agent_cli/config_store.py | 93 +++++++++++++- src/keboola_agent_cli/constants.py | 11 ++ src/keboola_agent_cli/models.py | 10 ++ tests/test_config_store.py | 113 ++++++++++++++++++ tests/test_e2e.py | 57 +++++++++ uv.lock | 2 +- 15 files changed, 336 insertions(+), 9 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 820ad3d6..3f5818fd 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -10,7 +10,7 @@ "plugins": [ { "name": "kbagent", - "version": "0.49.0", + "version": "0.50.0", "source": "./plugins/kbagent", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "category": "development" diff --git a/CLAUDE.md b/CLAUDE.md index 2e00a429..7be7cbe3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -294,6 +294,7 @@ plugins/kbagent/ ``` # Global options: --json, --verbose, --no-color, --config-dir, --hint client|service (deprecated, use kbagent serve REST API), --deny-writes, --deny-destructive, --allow-env-manage-token +# Headless / token-only (0.50.0+): export KBAGENT_PROJECT_FROM_ENV=1 + KBC_TOKEN + KBC_STORAGE_API_URL to synthesize an in-memory `__env__` project (no `project add`, no config.json on disk; token never persisted). Use `--project __env__`. Same env setup also powers `kbagent serve`. kbagent project add --project NAME --url URL --token TOKEN kbagent project list diff --git a/plugins/kbagent/.claude-plugin/plugin.json b/plugins/kbagent/.claude-plugin/plugin.json index 5443b507..73a4b99c 100644 --- a/plugins/kbagent/.claude-plugin/plugin.json +++ b/plugins/kbagent/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "kbagent", - "version": "0.49.0", + "version": "0.50.0", "description": "AI-friendly interface to Keboola Connection projects — explore configs, jobs, lineage, call MCP tools, manage dev branches, and debug SQL in workspaces", "author": { "name": "Keboola", diff --git a/plugins/kbagent/agents/keboola-expert.md b/plugins/kbagent/agents/keboola-expert.md index 7ed7aa3b..2e153171 100644 --- a/plugins/kbagent/agents/keboola-expert.md +++ b/plugins/kbagent/agents/keboola-expert.md @@ -180,6 +180,7 @@ a critical failure. | Remove a secret / env-var key from a data app | `kbagent data-app secrets-remove --project P --app-id N --key 'KEY' --yes` (0.43.9+: `#` optional; removes encrypted + plain) -- idempotent; missing keys exit 0, `removed: 0` | `tool call update_config` with the secrets sub-dict deleted -- ONLY for batch removes needing a custom change description | `config update --set 'parameters.dataApp.secrets={}'` -- drops EVERY secret, not just the named ones | | Pre-flight a data-app repo before create | `kbagent data-app validate-repo --git-repo URL --type python-js [--git-pat-env VAR]` (0.29.0+) -- BLOCKING / WARN / OK with help-doc citations; ≤5 GitHub API calls regardless of repo size | git-clone the repo locally and inspect by hand | `data-app create --dry-run` (only shows the request bodies; does not validate repo structure) | | Rename a project alias | `kbagent project edit --project OLD --new-alias NEW [--dry-run]` (0.31.0+) -- cascades through `config.json` (`projects` key + `default_project`) and the nested-sync directory `//`. Combined with `--url`/`--token` in one call, those mutations target the new alias post-rename. `--dry-run` previews collision detection, planned disk-rename method, and the lineage-cache warning without mutating state. **Lineage cache (if any) is NOT auto-updated**: rebuild via `kbagent lineage build` after the rename | `kbagent project remove` + `kbagent project add` (re-enters the token; loses any nested sync workspace) | hand-editing `~/.config/keboola-agent-cli/config.json` (no validation, easy to miss `default_project` cascade) | +| Run kbagent headless from a daemon / container / CI with only a token in env (no `project add`, no `config.json`) | Export `KBAGENT_PROJECT_FROM_ENV=1` + `KBC_TOKEN` + `KBC_STORAGE_API_URL`, then `kbagent --json storage file-upload --project __env__ --file X` (0.50.0+). Synthesizes an in-memory `__env__` project; token NEVER persisted (stripped on any save); same env setup also powers `kbagent serve` (POST `project=__env__`) | a one-shot `kbagent project add --project env --token ... --url ...` (works but writes the token to `config.json` on disk -- defeats "no local config") | hand-writing a `config.json` with the token, or passing `--token` per command (no such passthrough on storage/job/config commands) | | Call the running `kbagent serve` from a scheduled-agent subprocess | `kbagent http get/post/patch/delete ` (0.40.0+) -- uses `KBAGENT_SERVE_URL` + `KBAGENT_SERVE_TOKEN` env vars auto-injected by the scheduler. `kbagent http get /openapi.json` to discover endpoints. Treats the live serve as source-of-truth (no stale local config) | forking `kbagent ` (also fine -- `KBAGENT_CONFIG_DIR` is propagated so the spawned CLI sees the SAME config the serve uses; no more "I'm in the wrong directory" surprises) | `curl $KBAGENT_SERVE_URL/...` by hand (works, but `kbagent http` adds auth header automatically, structured error mapping, and JSON-mode formatting) | | Launch the web UI for an end-user (browser dashboard, no Node BFF) | `kbagent serve --ui [--port PORT] [--ui-dist PATH]` (0.40.0+) -- single-process FastAPI mounts the bundled React SPA at `/`, sets an HttpOnly `kbagent_session` cookie on `GET /` so the browser is auto-authenticated. EventSource SSE works via the same cookie -- no token in URL, JS heap, or access log. Requires the bundled wheel (Node 20+ on the install host) OR `make web-build` from a checkout. CORS origins customisable via `--cors-origin` | `kbagent serve` (plain API) + Vite dev server + Node BFF -- the legacy three-process flow with hot reload, see `web/README.md` "Dev mode" section | inventing a `--token-in-url` flag; running uvicorn directly against `web.frontend.dist` -- the path-rewrite middleware + cookie bootstrap only fire from `kbagent serve --ui` | | Schedule / manage Agent Tasks | `kbagent agent ` (0.44.0+) -- CRUD `list/show/create/update/delete`, exec `run [--stream]`, history `runs/run-detail/run-events`, util `test/cron-preview/prompt-improve`. Local-only; cron needs `kbagent serve`. See [agent-tasks-cli-workflow](../skills/kbagent/references/agent-tasks-cli-workflow.md) | `kbagent http /agents...` (0.40.0+) in scheduled subprocesses; Web UI for human authoring | hand-editing `agents.json` | diff --git a/plugins/kbagent/skills/kbagent/references/commands-reference.md b/plugins/kbagent/skills/kbagent/references/commands-reference.md index 3f2cb1aa..81502847 100644 --- a/plugins/kbagent/skills/kbagent/references/commands-reference.md +++ b/plugins/kbagent/skills/kbagent/references/commands-reference.md @@ -310,8 +310,9 @@ CLI parity for the `/agents` REST surface. Reads/writes `/agents.jso ## Environment Variables | Variable | Purpose | |----------|---------| -| `KBC_TOKEN` | Fallback for `--token` | -| `KBC_STORAGE_API_URL` | Default stack URL | +| `KBC_TOKEN` | Fallback for `--token`. Also the credential source for headless `__env__` mode (see `KBAGENT_PROJECT_FROM_ENV`) | +| `KBC_STORAGE_API_URL` | Default stack URL. Also the stack source for headless `__env__` mode | +| `KBAGENT_PROJECT_FROM_ENV` | Set to `1`/`true`/`yes`/`on` to synthesize an in-memory project `__env__` from `KBC_TOKEN` + `KBC_STORAGE_API_URL` (since 0.50.0). Headless / token-only: no `project add`, no `config.json` on disk; token stays in memory (never persisted). Use `--project __env__`. Works for CLI and `kbagent serve`. Fails fast if creds missing | | `KBC_MANAGE_API_TOKEN` | Manage API token (org setup, project refresh, data-app password). Default-DENY since 0.28.0: requires top-level `--allow-env-manage-token` to opt in, otherwise ignored with a warning. | | `KBAGENT_CONFIG_DIR` | Override config directory | | `KBAGENT_SERVE_URL` | Self-URL of `kbagent serve` (used by `kbagent http`; auto-injected into scheduled-agent subprocesses) | diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index 3c28dee6..1dbc5a65 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -2258,3 +2258,38 @@ command without `--dry-run` themselves. Reads (`dev-portal list`, `dev-portal get`) are unrestricted — peer-research patterns ("show me how MySQL and Postgres extractors configure themselves") are agent-friendly via `list --vendor` + `get --app`. + +## Headless / token-only invocation: the `__env__` project (since v0.50.0) + +A daemon, container, or CI job that has only a token in its environment can run +kbagent with **no `kbagent project add` and no `config.json` on disk**. Set all +three: + +```bash +export KBAGENT_PROJECT_FROM_ENV=1 +export KBC_TOKEN= +export KBC_STORAGE_API_URL=https://connection..keboola.com +kbagent --json storage file-upload --project __env__ --file screenshot.png +``` + +kbagent synthesizes an in-memory project under the reserved alias `__env__`. +Pass it as `--project __env__` (or rely on it being the sole/default project for +commands that fall back to the default). + +Gotchas: +- **Opt-in is the flag, not the token.** `KBC_TOKEN` alone does nothing here — + it stays a `project add` fallback. Only `KBAGENT_PROJECT_FROM_ENV` (truthy: + `1`/`true`/`yes`/`on`) triggers injection. This avoids a phantom project on a + dev machine that exported `KBC_TOKEN` for an unrelated `project add`. +- **Token is never persisted.** `__env__` is `ephemeral`; even if a write op + triggers a `config.json` write, the env token is stripped first. There is no + way to leak it to disk through normal operation. +- **Fail-fast.** Flag set but `KBC_TOKEN` or `KBC_STORAGE_API_URL` missing → + exit 5 (`config error`) with a clear message, not a silent skip. +- **Same chokepoint for `serve`.** `kbagent serve` started with the same three + env vars exposes `__env__` too — POST endpoints take `project=__env__`. Both + CLI and serve resolve through `ConfigStore.load()`, so one env setup covers + both consumption styles. +- The alias is literally `__env__` (double underscore both sides) — chosen so it + cannot collide with a real user alias. A real project already registered under + `__env__` wins; no injection happens. diff --git a/pyproject.toml b/pyproject.toml index 4ee46bfd..ac96ab37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "keboola-agent-cli" -version = "0.49.0" +version = "0.50.0" description = "AI-friendly CLI for managing Keboola projects" readme = "README.md" requires-python = ">=3.12" diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index 651f8113..e77a6d4e 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -8,6 +8,10 @@ # Ordered newest-first. Each value is a list of brief one-line descriptions. CHANGELOG: dict[str, list[str]] = { + "0.50.0": [ + "New: headless / token-only invocation (issue #359). Set `KBAGENT_PROJECT_FROM_ENV=1` together with `KBC_TOKEN` + `KBC_STORAGE_API_URL` and kbagent synthesizes an in-memory project under the reserved alias `__env__` -- no `kbagent project add`, no `config.json` on disk. Lets a daemon (e.g. the jasnost bridge), a container, or a CI job run any storage/job/config command with `--project __env__`, or talk to a `kbagent serve` started the same way. Both the CLI and `serve` resolve the project through the same `ConfigStore.load()` chokepoint, so both work from the single env-injection.", + "Security: the env-synthesized `__env__` project lives in memory only. It is marked `ephemeral` and stripped by `ConfigStore.save()`, so the `KBC_TOKEN` from the environment is never written to `config.json`. Opt-in is explicit (the `KBAGENT_PROJECT_FROM_ENV` flag, not the mere presence of `KBC_TOKEN`) to avoid a phantom project surprising a developer who exported the token only for `project add`. If the flag is set but the credential vars are missing, the CLI fails fast with a clear error instead of silently skipping.", + ], "0.49.0": [ "New: `kbagent dev-portal` command group — v1 operations against the Keboola Developer Portal (`apps-api.keboola.com`). Lets component developers inspect and update portal entries without leaving the terminal. Read commands (`dev-portal list --vendor V`, `dev-portal get --app VENDOR.APP_ID`) are unrestricted and support peer-config research (pull reference schemas from existing extractors/writers for design reference). Write commands (`dev-portal create`, `dev-portal patch`, `dev-portal upload-icon`, `dev-portal publish`, `dev-portal deprecate`) always print the full pending request diff and then require the user to type a random hex code on a real terminal; there is no `--yes` flag and no env-var bypass; non-TTY shells exit 6 (`EXIT_PERMISSION_DENIED`). `--dry-run` produces the same preview and exits 0 -- the agent-safe path.", "New: multi-identity credential storage for the Developer Portal. Portal logins (email + password, with optional MFA for personal accounts) are stored per-alias in the same `config.json` as KB project tokens under 0600 protection. Identity lifecycle: `dev-portal identity add --alias A --username U [--password P | --password-stdin] [--role-hint vendor|admin] [--vendor V]`, `identity list`, `identity remove`, `identity edit`, `identity use ALIAS`, `identity current`, `identity verify`.", diff --git a/src/keboola_agent_cli/commands/context.py b/src/keboola_agent_cli/commands/context.py index d2957c3e..1d15dc1b 100644 --- a/src/keboola_agent_cli/commands/context.py +++ b/src/keboola_agent_cli/commands/context.py @@ -1233,6 +1233,13 @@ KBC_MASTER_TOKEN_* Per-project master token (e.g. KBC_MASTER_TOKEN_PROD) KBAGENT_CONFIG_DIR Override config directory KBAGENT_PROJECT Override the pinned default project for this shell/session (beats pin, loses to --project) + KBAGENT_PROJECT_FROM_ENV Set to "1" (or true/yes/on) to synthesize an in-memory project under the + reserved alias __env__ from KBC_TOKEN + KBC_STORAGE_API_URL (since 0.50.0). + Headless / token-only mode: no `project add`, no config.json on disk. Use + `--project __env__` (or rely on it as the sole/default project). The token + lives in memory only -- it is NEVER persisted, even if a write op runs. + Works for both the CLI and `kbagent serve`. Fails fast if the flag is set + but KBC_TOKEN / KBC_STORAGE_API_URL are missing. KBAGENT_MAX_PARALLEL_WORKERS Max concurrent threads for multi-project ops (default 10, max 100) KBAGENT_AUTO_UPDATE Set to "false" to disable automatic update on startup KBAGENT_UPDATED_FROM Set to an older version to trigger "What's new" display on next run diff --git a/src/keboola_agent_cli/config_store.py b/src/keboola_agent_cli/config_store.py index 87f32f46..0d17e55f 100644 --- a/src/keboola_agent_cli/config_store.py +++ b/src/keboola_agent_cli/config_store.py @@ -14,7 +14,14 @@ import platformdirs -from .constants import ENV_CONFIG_DIR, LOCAL_CONFIG_DIR_NAME +from .constants import ( + ENV_CONFIG_DIR, + ENV_KBC_STORAGE_API_URL, + ENV_KBC_TOKEN, + ENV_PROJECT_ALIAS, + ENV_PROJECT_FROM_ENV, + LOCAL_CONFIG_DIR_NAME, +) from .errors import ConfigError from .models import AppConfig, DeveloperPortalIdentity, ProjectConfig @@ -142,7 +149,7 @@ def load(self) -> AppConfig: logger.debug("Loading config from %s", self._config_path) if not self._config_path.exists(): logger.debug("Config file does not exist, returning empty config") - return AppConfig() + return self._inject_env_project(AppConfig()) fd: int | None = None try: @@ -176,10 +183,86 @@ def load(self) -> AppConfig: ) try: - return AppConfig.model_validate(data) + config = AppConfig.model_validate(data) except Exception as exc: raise ConfigError(f"Config file has invalid structure: {exc}") from exc + return self._inject_env_project(config) + + def _inject_env_project(self, config: AppConfig) -> AppConfig: + """Synthesize an in-memory project from env vars when opted in (issue #359). + + When ``KBAGENT_PROJECT_FROM_ENV`` is truthy, read ``KBC_TOKEN`` and + ``KBC_STORAGE_API_URL`` and inject a project under the reserved alias + ``__env__`` so a headless daemon / container / CI can run kbagent with + no ``project add`` and no config.json on disk. Both CLI and ``serve`` + funnel through ``load()``, so this single chokepoint covers both. + + The injected project is marked ``ephemeral=True``; ``save()`` strips it + so the env token is never persisted. Opt-in is explicit (the flag), not + the mere presence of ``KBC_TOKEN``, to avoid a phantom project on a dev + machine that exported the token only for ``project add``. + + A real project already registered under ``__env__`` is left untouched. + + Raises: + ConfigError: If the flag is set but the credential env vars are + missing (fail fast rather than silently skip). + """ + flag = os.environ.get(ENV_PROJECT_FROM_ENV, "").strip().lower() + if flag not in ("1", "true", "yes", "on"): + return config + + if ENV_PROJECT_ALIAS in config.projects: + return config + + token = os.environ.get(ENV_KBC_TOKEN) + url = os.environ.get(ENV_KBC_STORAGE_API_URL) + if not token or not url: + missing = [ + name + for name, value in ((ENV_KBC_TOKEN, token), (ENV_KBC_STORAGE_API_URL, url)) + if not value + ] + raise ConfigError( + f"{ENV_PROJECT_FROM_ENV} is set but {' and '.join(missing)} " + f"{'is' if len(missing) == 1 else 'are'} missing. Set both " + f"{ENV_KBC_TOKEN} and {ENV_KBC_STORAGE_API_URL}, or unset " + f"{ENV_PROJECT_FROM_ENV}." + ) + + config.projects[ENV_PROJECT_ALIAS] = ProjectConfig( + stack_url=url, + token=token, + project_name="env (headless)", + ephemeral=True, + ) + if not config.default_project: + config.default_project = ENV_PROJECT_ALIAS + logger.debug("Injected ephemeral '%s' project from environment", ENV_PROJECT_ALIAS) + return config + + @staticmethod + def _strip_ephemeral_projects(config: AppConfig) -> AppConfig: + """Return a copy of ``config`` with ephemeral (env-synthesized) projects removed. + + Defends against persisting an env token to disk: mutation methods do + ``load() -> mutate -> save()``, and ``load()`` may have injected the + ``__env__`` project. The original object is left intact because callers + keep using it after ``save()`` returns. If ``default_project`` pointed + at a stripped ephemeral alias, it is blanked (the next ``load()`` + re-injects and re-defaults it). + """ + ephemeral_aliases = {alias for alias, p in config.projects.items() if p.ephemeral} + if not ephemeral_aliases: + return config + clean = config.model_copy(deep=True) + for alias in ephemeral_aliases: + clean.projects.pop(alias, None) + if clean.default_project in ephemeral_aliases: + clean.default_project = next(iter(clean.projects), "") + return clean + def save(self, config: AppConfig) -> None: """Save configuration to disk with secure file permissions (0600). @@ -195,6 +278,10 @@ def save(self, config: AppConfig) -> None: try: self._config_dir.mkdir(parents=True, exist_ok=True, mode=0o700) self._ensure_gitignore() + # Never persist env-synthesized projects (issue #359): strip any + # ephemeral entry so the KBC_TOKEN from the environment stays in + # memory only. Operate on a copy -- callers reuse the AppConfig. + config = self._strip_ephemeral_projects(config) # Prepend the agent-facing warning as the first field so any LLM # that reads config.json sees it BEFORE any token value. payload = { diff --git a/src/keboola_agent_cli/constants.py b/src/keboola_agent_cli/constants.py index e516cbfe..f94306d6 100644 --- a/src/keboola_agent_cli/constants.py +++ b/src/keboola_agent_cli/constants.py @@ -149,6 +149,17 @@ # Overrides the persisted `default_project` pin for a single invocation/session. ENV_KBAGENT_PROJECT: str = "KBAGENT_PROJECT" +# --- Headless / env-only project (issue #359) --- +# Opt-in flag that makes ConfigStore synthesize an in-memory project from +# KBC_TOKEN + KBC_STORAGE_API_URL, so a daemon / container / CI can run kbagent +# (CLI or `serve`) with no `kbagent project add` and no config.json on disk. +# Explicit opt-in (not mere presence of KBC_TOKEN) avoids a phantom project +# surprising a dev who exported KBC_TOKEN only for `kbagent project add`. +ENV_PROJECT_FROM_ENV: str = "KBAGENT_PROJECT_FROM_ENV" +# Reserved alias for the synthesized project. Double-underscore marks it as a +# synthetic, never-persisted entry that cannot collide with a user alias. +ENV_PROJECT_ALIAS: str = "__env__" + # --- Environment Variable Names --- ENV_MAX_PARALLEL_WORKERS: str = "KBAGENT_MAX_PARALLEL_WORKERS" ENV_KBC_TOKEN: str = "KBC_TOKEN" diff --git a/src/keboola_agent_cli/models.py b/src/keboola_agent_cli/models.py index 2e4f4b41..4eb82b40 100644 --- a/src/keboola_agent_cli/models.py +++ b/src/keboola_agent_cli/models.py @@ -28,6 +28,16 @@ class ProjectConfig(BaseModel): default=None, description="Organization name (populated via `org setup` or when verify_token returns it)", ) + ephemeral: bool = Field( + default=False, + exclude=True, + description=( + "True for an in-memory project synthesized from KBC_TOKEN + " + "KBC_STORAGE_API_URL (headless mode, issue #359). Excluded from " + "serialization and stripped by ConfigStore.save() so the env " + "token is never written to disk." + ), + ) @field_validator("stack_url") @classmethod diff --git a/tests/test_config_store.py b/tests/test_config_store.py index 59440086..76c123ad 100644 --- a/tests/test_config_store.py +++ b/tests/test_config_store.py @@ -795,3 +795,116 @@ def test_set_default(self, config_store): config_store.set_default_dev_portal_identity("beta") cfg = config_store.load() assert cfg.default_dev_portal_identity == "beta" + + +class TestEnvProjectInjection: + """Headless env-only project injection (issue #359). + + KBAGENT_PROJECT_FROM_ENV=1 + KBC_TOKEN + KBC_STORAGE_API_URL make load() + synthesize an in-memory '__env__' project; save() never persists it. + """ + + TOKEN = "901-99999-fakeHeadlessTokenDoNotUseXXXXX" + URL = "https://connection.keboola.com" + + def _opt_in(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("KBAGENT_PROJECT_FROM_ENV", "1") + monkeypatch.setenv("KBC_TOKEN", self.TOKEN) + monkeypatch.setenv("KBC_STORAGE_API_URL", self.URL) + + def test_not_injected_without_opt_in( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """KBC_TOKEN alone (no flag) must NOT create a phantom project.""" + monkeypatch.delenv("KBAGENT_PROJECT_FROM_ENV", raising=False) + monkeypatch.setenv("KBC_TOKEN", self.TOKEN) + monkeypatch.setenv("KBC_STORAGE_API_URL", self.URL) + config = ConfigStore(config_dir=tmp_config_dir).load() + assert config.projects == {} + + def test_injected_into_empty_config( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """With opt-in and no config file, '__env__' is injected and defaulted.""" + self._opt_in(monkeypatch) + config = ConfigStore(config_dir=tmp_config_dir).load() + assert "__env__" in config.projects + env_proj = config.projects["__env__"] + assert env_proj.token == self.TOKEN + assert env_proj.stack_url == self.URL + assert env_proj.ephemeral is True + assert config.default_project == "__env__" + + def test_opt_in_truthy_variants( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Accept common truthy spellings of the opt-in flag.""" + monkeypatch.setenv("KBC_TOKEN", self.TOKEN) + monkeypatch.setenv("KBC_STORAGE_API_URL", self.URL) + for value in ("true", "YES", "On", "1"): + monkeypatch.setenv("KBAGENT_PROJECT_FROM_ENV", value) + config = ConfigStore(config_dir=tmp_config_dir).load() + assert "__env__" in config.projects, value + + def test_missing_creds_fail_fast( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Flag set but creds missing must raise, not silently skip.""" + monkeypatch.setenv("KBAGENT_PROJECT_FROM_ENV", "1") + monkeypatch.delenv("KBC_TOKEN", raising=False) + monkeypatch.setenv("KBC_STORAGE_API_URL", self.URL) + with pytest.raises(ConfigError, match="KBC_TOKEN"): + ConfigStore(config_dir=tmp_config_dir).load() + + def test_does_not_override_real_alias( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """A real project already named '__env__' is left untouched.""" + store = ConfigStore(config_dir=tmp_config_dir) + store.save( + AppConfig( + default_project="__env__", + projects={ + "__env__": ProjectConfig( + stack_url="https://other.keboola.com", + token="901-11111-realPersistedTokenXXXXXXXXXXXX", + ) + }, + ) + ) + self._opt_in(monkeypatch) + config = store.load() + assert config.projects["__env__"].token == "901-11111-realPersistedTokenXXXXXXXXXXXX" + assert config.projects["__env__"].ephemeral is False + + def test_ephemeral_never_persisted( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """save() after a load() that injected '__env__' must not write the token.""" + self._opt_in(monkeypatch) + store = ConfigStore(config_dir=tmp_config_dir) + config = store.load() # injects __env__ + config.projects["real"] = ProjectConfig( + stack_url=self.URL, token="901-22222-realTokenForRealProjectXXXXX" + ) + store.save(config) + + raw = (tmp_config_dir / "config.json").read_text() + assert "__env__" not in raw + assert self.TOKEN not in raw + assert "real" in raw + # In-memory object passed by the caller is left intact. + assert "__env__" in config.projects + + def test_default_blanked_when_ephemeral_stripped( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """If default_project pointed at the stripped '__env__', it is reset on disk.""" + self._opt_in(monkeypatch) + store = ConfigStore(config_dir=tmp_config_dir) + config = store.load() # default_project == "__env__" + assert config.default_project == "__env__" + store.save(config) + + on_disk = json.loads((tmp_config_dir / "config.json").read_text()) + assert on_disk["default_project"] == "" diff --git a/tests/test_e2e.py b/tests/test_e2e.py index 08ddf890..4fd19e93 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -10612,3 +10612,60 @@ def test_list_apps_against_real_portal(self) -> None: ], ) assert result.exit_code == 0, result.output + + +@skip_without_credentials +@pytest.mark.e2e +class TestHeadlessEnvProject: + """Headless / token-only invocation against the real API (issue #359). + + Verifies that KBAGENT_PROJECT_FROM_ENV=1 + KBC_TOKEN + KBC_STORAGE_API_URL + let kbagent run with an EMPTY config dir (no `project add`, no config.json), + and that the env token is never written to disk. + """ + + @pytest.fixture(autouse=True) + def setup(self, tmp_path: Path) -> None: + self.token = os.environ[ENV_TOKEN] + raw_url = os.environ.get(ENV_URL, "connection.keboola.com") + self.url = raw_url if raw_url.startswith("https://") else f"https://{raw_url}" + self.config_dir = tmp_path / "empty-config" + self.config_dir.mkdir() + + def _headless_env(self) -> dict[str, str]: + return { + "KBAGENT_PROJECT_FROM_ENV": "1", + "KBC_TOKEN": self.token, + "KBC_STORAGE_API_URL": self.url, + } + + def test_headless_lists_env_project(self) -> None: + _step("HEADLESS-1", "project list resolves __env__ from env, no config.json") + with patch.dict(os.environ, self._headless_env()): + result = _invoke(self.config_dir, ["--json", "project", "list"]) + data = _json_ok(result) + aliases = {p["alias"] for p in data["data"]} + assert "__env__" in aliases, data + # No config.json was written -- token stays in memory only. + assert not (self.config_dir / "config.json").exists() + + def test_headless_storage_call_hits_api(self) -> None: + _step("HEADLESS-2", "storage buckets --project __env__ reaches the real API") + with patch.dict(os.environ, self._headless_env()): + result = _invoke( + self.config_dir, + ["--json", "storage", "buckets", "--project", "__env__"], + ) + # status=ok proves the env token authenticated a real API call. + _json_ok(result) + assert not (self.config_dir / "config.json").exists() + + def test_headless_requires_opt_in_flag(self) -> None: + _step("HEADLESS-3", "KBC_TOKEN without the opt-in flag => no phantom project") + env = {"KBC_TOKEN": self.token, "KBC_STORAGE_API_URL": self.url} + with patch.dict(os.environ, env): + # Ensure the flag is absent for this assertion. + os.environ.pop("KBAGENT_PROJECT_FROM_ENV", None) + result = _invoke(self.config_dir, ["--json", "project", "list"]) + data = _json_ok(result) + assert data["data"] == [], data diff --git a/uv.lock b/uv.lock index 3f328c91..971fe4a8 100644 --- a/uv.lock +++ b/uv.lock @@ -496,7 +496,7 @@ wheels = [ [[package]] name = "keboola-agent-cli" -version = "0.49.0" +version = "0.50.0" source = { editable = "." } dependencies = [ { name = "croniter" }, From b2c28be054ac0603f4b142e26c5537720c542fa1 Mon Sep 17 00:00:00 2001 From: Petr Date: Fri, 29 May 2026 17:37:09 +0200 Subject: [PATCH 2/5] feat(project): normalize stack URLs (bare host, deep-link, trailing slash) UX follow-up on the headless mode. `KBC_STORAGE_API_URL` (and `project add --url` / `project edit --url`) previously rejected anything that was not already a clean `https://` base -- a bare host like `connection.keboola.com` raised a pydantic ValidationError traceback. Add `normalize_stack_url()` as the single source of truth, used by the ProjectConfig field validator (safety net + clean stored value) and by ProjectService.add_project / edit_project (so token verification hits the right host). It accepts: - bare host connection.keboola.com - trailing slash https://connection.keboola.com/ - surrounding whitespace (paste artifact) - full project deep-link https://connection.keboola.com/admin/projects/10105/dashboard and reduces every form to https://. Explicit non-https schemes (http://, file://, ftp://) are still rejected (SSRF / protocol-abuse guard). An unusable URL in the headless `__env__` injection now raises a clean ConfigError (exit 5) instead of a raw ValidationError traceback. Tests: 6 new model tests + 2 new env-injection tests; updated the old "reject no scheme" test to assert normalization. Full non-e2e suite: 3771 passed. --- .../skills/kbagent/references/gotchas.md | 6 +++ src/keboola_agent_cli/changelog.py | 1 + src/keboola_agent_cli/config_store.py | 21 +++++--- src/keboola_agent_cli/models.py | 53 ++++++++++++++++--- .../services/project_service.py | 11 +++- tests/test_config_store.py | 20 +++++++ tests/test_models.py | 51 +++++++++++++++--- 7 files changed, 142 insertions(+), 21 deletions(-) diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index 1dbc5a65..ba7b40dd 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -2293,3 +2293,9 @@ Gotchas: - The alias is literally `__env__` (double underscore both sides) — chosen so it cannot collide with a real user alias. A real project already registered under `__env__` wins; no injection happens. +- **`KBC_STORAGE_API_URL` is forgiving (since v0.50.0).** A bare host + (`connection.keboola.com`), a trailing slash, or a full project deep-link + (`.../admin/projects/123/dashboard`) all normalize to `https://`. Same + normalization applies to `project add --url` / `project edit --url`. Explicit + `http://` / `file://` is still rejected; a bad URL fails fast with a clean + config error (exit 5), not a traceback. diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index e77a6d4e..6053b65d 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -10,6 +10,7 @@ CHANGELOG: dict[str, list[str]] = { "0.50.0": [ "New: headless / token-only invocation (issue #359). Set `KBAGENT_PROJECT_FROM_ENV=1` together with `KBC_TOKEN` + `KBC_STORAGE_API_URL` and kbagent synthesizes an in-memory project under the reserved alias `__env__` -- no `kbagent project add`, no `config.json` on disk. Lets a daemon (e.g. the jasnost bridge), a container, or a CI job run any storage/job/config command with `--project __env__`, or talk to a `kbagent serve` started the same way. Both the CLI and `serve` resolve the project through the same `ConfigStore.load()` chokepoint, so both work from the single env-injection.", + "UX: stack URLs are now normalized everywhere a project is created (`project add`, `project edit --url`, and the headless `__env__` injection). A bare host (`connection.keboola.com`), a trailing slash, surrounding whitespace, or even a full project deep-link (`https://connection.keboola.com/admin/projects/10105/dashboard`) are all reduced to the clean `https://` base instead of erroring. Explicit non-https schemes (`http://`, `file://`, ...) are still rejected as an SSRF / protocol-abuse guard, and an unusable URL in `KBC_STORAGE_API_URL` now fails fast with a clean config error rather than a raw pydantic traceback.", "Security: the env-synthesized `__env__` project lives in memory only. It is marked `ephemeral` and stripped by `ConfigStore.save()`, so the `KBC_TOKEN` from the environment is never written to `config.json`. Opt-in is explicit (the `KBAGENT_PROJECT_FROM_ENV` flag, not the mere presence of `KBC_TOKEN`) to avoid a phantom project surprising a developer who exported the token only for `project add`. If the flag is set but the credential vars are missing, the CLI fails fast with a clear error instead of silently skipping.", ], "0.49.0": [ diff --git a/src/keboola_agent_cli/config_store.py b/src/keboola_agent_cli/config_store.py index 0d17e55f..9588d179 100644 --- a/src/keboola_agent_cli/config_store.py +++ b/src/keboola_agent_cli/config_store.py @@ -13,6 +13,7 @@ from pathlib import Path import platformdirs +from pydantic import ValidationError from .constants import ( ENV_CONFIG_DIR, @@ -231,12 +232,20 @@ def _inject_env_project(self, config: AppConfig) -> AppConfig: f"{ENV_PROJECT_FROM_ENV}." ) - config.projects[ENV_PROJECT_ALIAS] = ProjectConfig( - stack_url=url, - token=token, - project_name="env (headless)", - ephemeral=True, - ) + try: + config.projects[ENV_PROJECT_ALIAS] = ProjectConfig( + stack_url=url, + token=token, + project_name="env (headless)", + ephemeral=True, + ) + except ValidationError as exc: + # Convert pydantic's raw error into a clean fail-fast message -- + # this runs inside load(), which callers only guard for ConfigError. + reason = "; ".join(e.get("msg", "") for e in exc.errors()) or str(exc) + raise ConfigError( + f"{ENV_KBC_STORAGE_API_URL}={url!r} is not a usable stack URL: {reason}" + ) from exc if not config.default_project: config.default_project = ENV_PROJECT_ALIAS logger.debug("Injected ephemeral '%s' project from environment", ENV_PROJECT_ALIAS) diff --git a/src/keboola_agent_cli/models.py b/src/keboola_agent_cli/models.py index 4eb82b40..51353e24 100644 --- a/src/keboola_agent_cli/models.py +++ b/src/keboola_agent_cli/models.py @@ -1,10 +1,49 @@ """Pydantic models shared across all layers of the application.""" from typing import Any +from urllib.parse import urlparse from pydantic import BaseModel, Field, field_validator +def normalize_stack_url(value: str) -> str: + """Normalize a user-supplied Keboola stack URL to its scheme+host base. + + Accepts, in order of forgiveness: + - a bare host ``connection.keboola.com`` + - a full base URL ``https://connection.keboola.com`` + - a full base URL + slash ``https://connection.keboola.com/`` + - a full project deep-link ``https://connection.keboola.com/admin/projects/10105/dashboard`` + + and reduces every form to ``https://`` (path/query/fragment dropped). + A missing scheme defaults to ``https://``. Any *explicit* non-https scheme + (``http://``, ``file://``, ``ftp://``, ...) is rejected -- this is an + SSRF / protocol-abuse guard, so we never silently upgrade a typed-out + ``http://`` to https. + + Raises: + ValueError: empty input, an explicit non-https scheme, or no host. + """ + raw = value.strip() + if not raw: + raise ValueError("Stack URL must not be empty.") + # No scheme typed -> assume https so urlparse sees a netloc, not a path. + if "://" not in raw: + raw = f"https://{raw}" + parsed = urlparse(raw) + if parsed.scheme != "https": + raise ValueError( + f"Stack URL must use https:// scheme, got: {parsed.scheme or '(none)'}://. " + "Plain HTTP, file://, and other protocols are not allowed." + ) + if not parsed.netloc: + raise ValueError( + f"Stack URL has no host: {value!r}. Expected e.g. " + "'connection.keboola.com' or 'https://connection.keboola.com'." + ) + return f"https://{parsed.netloc}" + + class ProjectConfig(BaseModel): """Configuration for a single Keboola project connection.""" @@ -42,13 +81,13 @@ class ProjectConfig(BaseModel): @field_validator("stack_url") @classmethod def validate_stack_url_scheme(cls, v: str) -> str: - """Enforce HTTPS scheme on stack URL to prevent SSRF and protocol abuse.""" - if not v.startswith("https://"): - raise ValueError( - f"Stack URL must use https:// scheme, got: {v!r}. " - "Plain HTTP, file://, and other protocols are not allowed." - ) - return v + """Normalize the stack URL to ``https://`` (see ``normalize_stack_url``). + + Accepts a bare host, a full base URL, or a full project deep-link and + reduces it to the scheme+host base; rejects explicit non-https schemes + (SSRF / protocol-abuse guard). + """ + return normalize_stack_url(v) class DeveloperPortalIdentity(BaseModel): diff --git a/src/keboola_agent_cli/services/project_service.py b/src/keboola_agent_cli/services/project_service.py index 8b2e01bc..af9788e5 100644 --- a/src/keboola_agent_cli/services/project_service.py +++ b/src/keboola_agent_cli/services/project_service.py @@ -14,7 +14,7 @@ from ..constants import ENV_KBAGENT_PROJECT from ..errors import ConfigError, KeboolaApiError, mask_token -from ..models import ProjectConfig +from ..models import ProjectConfig, normalize_stack_url from .base import BaseService # Filesystem-safe slug constraint for ``--new-alias``. Aliases land on disk @@ -51,6 +51,10 @@ def add_project(self, alias: str, stack_url: str, token: str) -> dict[str, Any]: KeboolaApiError: If token verification fails. ConfigError: If the alias already exists. """ + # Accept a bare host or a full project deep-link, not just a clean base + # URL -- normalize before we build the verification client so the token + # check hits the right host (and the stored value is the clean base). + stack_url = normalize_stack_url(stack_url) client = self._client_factory(stack_url, token) try: token_info = client.verify_token() @@ -153,6 +157,11 @@ def edit_project( "--new-alias (matching the current alias is a no-op)." ) + # Normalize a bare host / full project deep-link to the clean base URL + # up front so the dry-run preview and the real edit agree on the value. + if stack_url is not None: + stack_url = normalize_stack_url(stack_url) + # ----- dry-run path: validate everything, mutate nothing ---------- if dry_run: planned_rename: dict[str, Any] | None = None diff --git a/tests/test_config_store.py b/tests/test_config_store.py index 76c123ad..a73c56da 100644 --- a/tests/test_config_store.py +++ b/tests/test_config_store.py @@ -856,6 +856,26 @@ def test_missing_creds_fail_fast( with pytest.raises(ConfigError, match="KBC_TOKEN"): ConfigStore(config_dir=tmp_config_dir).load() + def test_bare_host_url_normalized( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """A bare host in KBC_STORAGE_API_URL is normalized to https://, not rejected.""" + monkeypatch.setenv("KBAGENT_PROJECT_FROM_ENV", "1") + monkeypatch.setenv("KBC_TOKEN", self.TOKEN) + monkeypatch.setenv("KBC_STORAGE_API_URL", "connection.keboola.com") + config = ConfigStore(config_dir=tmp_config_dir).load() + assert config.projects["__env__"].stack_url == "https://connection.keboola.com" + + def test_invalid_url_fails_clean( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """An http:// env URL raises a clean ConfigError, not a raw ValidationError.""" + monkeypatch.setenv("KBAGENT_PROJECT_FROM_ENV", "1") + monkeypatch.setenv("KBC_TOKEN", self.TOKEN) + monkeypatch.setenv("KBC_STORAGE_API_URL", "http://connection.keboola.com") + with pytest.raises(ConfigError, match="not a usable stack URL"): + ConfigStore(config_dir=tmp_config_dir).load() + def test_does_not_override_real_alias( self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch ) -> None: diff --git a/tests/test_models.py b/tests/test_models.py index 99788560..de49817b 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -275,13 +275,50 @@ def test_project_add_rejects_ftp_url(self) -> None: token="901-token", ) - def test_project_add_rejects_no_scheme(self) -> None: - """URL without scheme is rejected with a ValidationError.""" - with pytest.raises(ValidationError, match="https://"): - ProjectConfig( - stack_url="connection.keboola.com", - token="901-token", - ) + def test_bare_host_is_normalized_to_https(self) -> None: + """A bare host (no scheme) gets https:// prepended instead of rejected.""" + config = ProjectConfig( + stack_url="connection.keboola.com", + token="901-token", + ) + assert config.stack_url == "https://connection.keboola.com" + + def test_full_project_link_reduced_to_base(self) -> None: + """A full project deep-link is reduced to scheme+host.""" + config = ProjectConfig( + stack_url="https://connection.keboola.com/admin/projects/10105/dashboard", + token="901-token", + ) + assert config.stack_url == "https://connection.keboola.com" + + def test_trailing_slash_stripped(self) -> None: + """A trailing slash is dropped from the normalized base URL.""" + config = ProjectConfig( + stack_url="https://connection.keboola.com/", + token="901-token", + ) + assert config.stack_url == "https://connection.keboola.com" + + def test_bare_host_with_path_reduced_to_base(self) -> None: + """A bare host + path (no scheme) normalizes to https://.""" + config = ProjectConfig( + stack_url="connection.north-europe.azure.keboola.com/admin/projects/7", + token="901-token", + ) + assert config.stack_url == "https://connection.north-europe.azure.keboola.com" + + def test_surrounding_whitespace_trimmed(self) -> None: + """Leading/trailing whitespace (paste artifact) is trimmed.""" + config = ProjectConfig( + stack_url=" https://connection.keboola.com ", + token="901-token", + ) + assert config.stack_url == "https://connection.keboola.com" + + def test_empty_url_rejected(self) -> None: + """An empty / whitespace-only URL is rejected.""" + with pytest.raises(ValidationError, match="empty"): + ProjectConfig(stack_url=" ", token="901-token") def test_project_add_accepts_https_url(self) -> None: """https:// URL is accepted without error.""" From 5fdfb52a4fb5f1a9385d11d4688c16480bf7137c Mon Sep 17 00:00:00 2001 From: Petr Date: Fri, 29 May 2026 17:45:54 +0200 Subject: [PATCH 3/5] fix(headless): recover __env__ project_id from token, drop fake name `project list` showed `project_name="env (headless)"` and a null Project ID for the env-injected project -- the fake name was misleading and the ID was simply missing. ConfigStore.load() must stay offline (it runs many times per command and per serve request), so it cannot call verify_token to fetch the real project name. But Keboola Storage tokens are `{projectId}-{tokenId}- {secret}`, so the project_id is recovered offline from the token prefix. The project_name is left blank (honest) instead of a fake placeholder; `project status` / `project info` verify against the API and show the real name when a command actually needs it. Tests: assert project_id is parsed (901-...) and name is blank; a non-numeric token prefix leaves project_id unset without crashing. --- .../kbagent/skills/kbagent/references/gotchas.md | 5 +++++ src/keboola_agent_cli/config_store.py | 9 ++++++++- tests/test_config_store.py | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/plugins/kbagent/skills/kbagent/references/gotchas.md b/plugins/kbagent/skills/kbagent/references/gotchas.md index ba7b40dd..97a1e4ee 100644 --- a/plugins/kbagent/skills/kbagent/references/gotchas.md +++ b/plugins/kbagent/skills/kbagent/references/gotchas.md @@ -2293,6 +2293,11 @@ Gotchas: - The alias is literally `__env__` (double underscore both sides) — chosen so it cannot collide with a real user alias. A real project already registered under `__env__` wins; no injection happens. +- **`__env__` shows `project_id` but a blank name in `project list`.** `load()` + is offline, so the injection recovers `project_id` from the token prefix + (`{projectId}-{tokenId}-{secret}`) but cannot fetch the real project name. + Run `kbagent project status --project __env__` (or `project info`) to verify + the token against the API and see the real name. - **`KBC_STORAGE_API_URL` is forgiving (since v0.50.0).** A bare host (`connection.keboola.com`), a trailing slash, or a full project deep-link (`.../admin/projects/123/dashboard`) all normalize to `https://`. Same diff --git a/src/keboola_agent_cli/config_store.py b/src/keboola_agent_cli/config_store.py index 9588d179..f0a424d7 100644 --- a/src/keboola_agent_cli/config_store.py +++ b/src/keboola_agent_cli/config_store.py @@ -232,11 +232,18 @@ def _inject_env_project(self, config: AppConfig) -> AppConfig: f"{ENV_PROJECT_FROM_ENV}." ) + # Keboola Storage tokens are `{projectId}-{tokenId}-{secret}`, so we can + # recover the project_id offline from the prefix. The real project_name + # needs an API call (verify_token) -- load() must stay offline, so it is + # left blank here; `project status` / `project info` show the verified + # name when a command actually talks to the API. + prefix = token.split("-", 1)[0] + project_id = int(prefix) if prefix.isdigit() else None try: config.projects[ENV_PROJECT_ALIAS] = ProjectConfig( stack_url=url, token=token, - project_name="env (headless)", + project_id=project_id, ephemeral=True, ) except ValidationError as exc: diff --git a/tests/test_config_store.py b/tests/test_config_store.py index a73c56da..c4d3969b 100644 --- a/tests/test_config_store.py +++ b/tests/test_config_store.py @@ -833,8 +833,22 @@ def test_injected_into_empty_config( assert env_proj.token == self.TOKEN assert env_proj.stack_url == self.URL assert env_proj.ephemeral is True + # project_id is recovered offline from the token prefix (901-99999-...). + assert env_proj.project_id == 901 + # project_name needs an API call -> left blank by the offline injection. + assert env_proj.project_name == "" assert config.default_project == "__env__" + def test_non_numeric_token_prefix_yields_no_project_id( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """A token whose prefix isn't numeric leaves project_id unset, no crash.""" + monkeypatch.setenv("KBAGENT_PROJECT_FROM_ENV", "1") + monkeypatch.setenv("KBC_TOKEN", "abc-def-notNumericPrefixXXXXXXXXXXXX") + monkeypatch.setenv("KBC_STORAGE_API_URL", self.URL) + config = ConfigStore(config_dir=tmp_config_dir).load() + assert config.projects["__env__"].project_id is None + def test_opt_in_truthy_variants( self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch ) -> None: From 209d33b62589f3d36f50d411d68be1f946f5a2e9 Mon Sep 17 00:00:00 2001 From: Petr Date: Fri, 29 May 2026 21:34:47 +0200 Subject: [PATCH 4/5] review(#363): version gate, guard __env__ mutations, service URL test Address the kbagent-pr-reviewer findings on #363: - NB-1: add the 0.50.0 headless / URL-normalization entry to the Rule 6 VERSION GATE in keboola-expert.md (highest silent-drift surface). - NB-2: reject remove/edit/rename/set-branch on the env-synthesized __env__ project with a clear ConfigError instead of reporting a success that silently vanishes on the next load(). A real persisted project under the same alias (ephemeral=False) stays mutable. - NIT-1: add a service-layer test asserting add_project() normalizes a bare-host / deep-link URL through normalize_stack_url() before the verification client and before persisting. Tests: +5 (guard x2, service normalization x1, project_id parse x2 from earlier). Full non-e2e suite: 3775 passed. --- plugins/kbagent/agents/keboola-expert.md | 1 + src/keboola_agent_cli/changelog.py | 2 +- src/keboola_agent_cli/config_store.py | 25 +++++++++++++++++ tests/test_config_store.py | 34 ++++++++++++++++++++++++ tests/test_services.py | 22 +++++++++++++++ 5 files changed, 83 insertions(+), 1 deletion(-) diff --git a/plugins/kbagent/agents/keboola-expert.md b/plugins/kbagent/agents/keboola-expert.md index 2e153171..f7047d33 100644 --- a/plugins/kbagent/agents/keboola-expert.md +++ b/plugins/kbagent/agents/keboola-expert.md @@ -113,6 +113,7 @@ a critical failure. `sync push` fresh-CREATE variable-link resolution + `--branch ` default-tree promote = 0.47.2+, `feature` group (stack/project/user feature flags, Manage API) = 0.48.0+, `dev-portal` command group = 0.49.0+, + headless `__env__` project (`KBAGENT_PROJECT_FROM_ENV=1` + `KBC_TOKEN` + `KBC_STORAGE_API_URL`) + forgiving stack-URL normalization (bare host / full project deep-link) = 0.50.0+, `storage retype` is a future composite), you MUST refuse the task and return a handoff message to the parent: `"Cannot proceed safely on kbagent . Missing: . diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index 6053b65d..c4114cef 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -11,7 +11,7 @@ "0.50.0": [ "New: headless / token-only invocation (issue #359). Set `KBAGENT_PROJECT_FROM_ENV=1` together with `KBC_TOKEN` + `KBC_STORAGE_API_URL` and kbagent synthesizes an in-memory project under the reserved alias `__env__` -- no `kbagent project add`, no `config.json` on disk. Lets a daemon (e.g. the jasnost bridge), a container, or a CI job run any storage/job/config command with `--project __env__`, or talk to a `kbagent serve` started the same way. Both the CLI and `serve` resolve the project through the same `ConfigStore.load()` chokepoint, so both work from the single env-injection.", "UX: stack URLs are now normalized everywhere a project is created (`project add`, `project edit --url`, and the headless `__env__` injection). A bare host (`connection.keboola.com`), a trailing slash, surrounding whitespace, or even a full project deep-link (`https://connection.keboola.com/admin/projects/10105/dashboard`) are all reduced to the clean `https://` base instead of erroring. Explicit non-https schemes (`http://`, `file://`, ...) are still rejected as an SSRF / protocol-abuse guard, and an unusable URL in `KBC_STORAGE_API_URL` now fails fast with a clean config error rather than a raw pydantic traceback.", - "Security: the env-synthesized `__env__` project lives in memory only. It is marked `ephemeral` and stripped by `ConfigStore.save()`, so the `KBC_TOKEN` from the environment is never written to `config.json`. Opt-in is explicit (the `KBAGENT_PROJECT_FROM_ENV` flag, not the mere presence of `KBC_TOKEN`) to avoid a phantom project surprising a developer who exported the token only for `project add`. If the flag is set but the credential vars are missing, the CLI fails fast with a clear error instead of silently skipping.", + "Security: the env-synthesized `__env__` project lives in memory only. It is marked `ephemeral` and stripped by `ConfigStore.save()`, so the `KBC_TOKEN` from the environment is never written to `config.json`. Opt-in is explicit (the `KBAGENT_PROJECT_FROM_ENV` flag, not the mere presence of `KBC_TOKEN`) to avoid a phantom project surprising a developer who exported the token only for `project add`. If the flag is set but the credential vars are missing, the CLI fails fast with a clear error instead of silently skipping. Mutating ops that target the synthesized project (`project remove/edit/rename`, branch switch) are rejected with an actionable message rather than reporting a success that silently vanishes on the next load. `project list` recovers the `project_id` offline from the token prefix; the real project name shows via `project status` / `project info`.", ], "0.49.0": [ "New: `kbagent dev-portal` command group — v1 operations against the Keboola Developer Portal (`apps-api.keboola.com`). Lets component developers inspect and update portal entries without leaving the terminal. Read commands (`dev-portal list --vendor V`, `dev-portal get --app VENDOR.APP_ID`) are unrestricted and support peer-config research (pull reference schemas from existing extractors/writers for design reference). Write commands (`dev-portal create`, `dev-portal patch`, `dev-portal upload-icon`, `dev-portal publish`, `dev-portal deprecate`) always print the full pending request diff and then require the user to type a random hex code on a real terminal; there is no `--yes` flag and no env-var bypass; non-TTY shells exit 6 (`EXIT_PERMISSION_DENIED`). `--dry-run` produces the same preview and exits 0 -- the agent-safe path.", diff --git a/src/keboola_agent_cli/config_store.py b/src/keboola_agent_cli/config_store.py index f0a424d7..c169781b 100644 --- a/src/keboola_agent_cli/config_store.py +++ b/src/keboola_agent_cli/config_store.py @@ -279,6 +279,27 @@ def _strip_ephemeral_projects(config: AppConfig) -> AppConfig: clean.default_project = next(iter(clean.projects), "") return clean + @staticmethod + def _reject_ephemeral_mutation(config: AppConfig, alias: str, operation: str) -> None: + """Block mutations targeting an env-synthesized project (issue #359). + + A `__env__` project injected from `KBAGENT_PROJECT_FROM_ENV` exists only + in memory and is stripped on save, so `remove`/`edit`/`rename`/branch + ops would otherwise report success and then silently vanish on the next + `load()`. Reject them with a clear, actionable message instead. A real + persisted project that happens to use the alias (``ephemeral=False``) is + unaffected. + """ + project = config.projects.get(alias) + if project is not None and project.ephemeral: + raise ConfigError( + f"Project '{alias}' is synthesized from environment variables " + f"({ENV_PROJECT_FROM_ENV}) and cannot be {operation} -- it lives " + f"only in memory. To change it, update {ENV_KBC_TOKEN} / " + f"{ENV_KBC_STORAGE_API_URL}; to manage a persisted project, unset " + f"{ENV_PROJECT_FROM_ENV} and use 'project add'." + ) + def save(self, config: AppConfig) -> None: """Save configuration to disk with secure file permissions (0600). @@ -387,6 +408,7 @@ def remove_project(self, alias: str) -> None: config = self.load() if alias not in config.projects: raise ConfigError(f"Project '{alias}' not found.") + self._reject_ephemeral_mutation(config, alias, "removed") del config.projects[alias] if config.default_project == alias: config.default_project = next(iter(config.projects), "") @@ -410,6 +432,7 @@ def set_project_branch(self, alias: str, branch_id: int | None) -> None: config = self.load() if alias not in config.projects: raise ConfigError(f"Project '{alias}' not found.") + self._reject_ephemeral_mutation(config, alias, "modified") config.projects[alias].active_branch_id = branch_id self.save(config) @@ -428,6 +451,7 @@ def edit_project(self, alias: str, **kwargs: str | int | None) -> None: config = self.load() if alias not in config.projects: raise ConfigError(f"Project '{alias}' not found.") + self._reject_ephemeral_mutation(config, alias, "edited") project = config.projects[alias] for key, value in kwargs.items(): if hasattr(project, key) and value is not None: @@ -455,6 +479,7 @@ def rename_project(self, old_alias: str, new_alias: str) -> None: config = self.load() if old_alias not in config.projects: raise ConfigError(f"Project '{old_alias}' not found.") + self._reject_ephemeral_mutation(config, old_alias, "renamed") if new_alias in config.projects: raise ConfigError( f"Cannot rename '{old_alias}' to '{new_alias}': " diff --git a/tests/test_config_store.py b/tests/test_config_store.py index c4d3969b..74abb084 100644 --- a/tests/test_config_store.py +++ b/tests/test_config_store.py @@ -930,6 +930,40 @@ def test_ephemeral_never_persisted( # In-memory object passed by the caller is left intact. assert "__env__" in config.projects + def test_mutating_env_project_is_rejected( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """remove/edit/rename/set-branch on __env__ fail clearly, not silently.""" + self._opt_in(monkeypatch) + store = ConfigStore(config_dir=tmp_config_dir) + with pytest.raises(ConfigError, match="synthesized from environment"): + store.remove_project("__env__") + with pytest.raises(ConfigError, match="synthesized from environment"): + store.edit_project("__env__", token="901-77777-otherXXXXXXXXXXXXXXXXXX") + with pytest.raises(ConfigError, match="synthesized from environment"): + store.rename_project("__env__", "renamed") + with pytest.raises(ConfigError, match="synthesized from environment"): + store.set_project_branch("__env__", 123) + + def test_real_persisted_env_alias_still_mutable( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """A real (non-ephemeral) project under the __env__ alias stays editable.""" + store = ConfigStore(config_dir=tmp_config_dir) + store.save( + AppConfig( + projects={ + "__env__": ProjectConfig( + stack_url="https://real.keboola.com", + token="901-11111-realPersistedTokenXXXXXXXXXXXX", + ) + }, + ) + ) + # No opt-in -> the persisted entry is the only one; editing must work. + store.edit_project("__env__", project_name="Renamed") + assert store.get_project("__env__").project_name == "Renamed" + def test_default_blanked_when_ephemeral_stripped( self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch ) -> None: diff --git a/tests/test_services.py b/tests/test_services.py index cde0bed9..c022554e 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -47,6 +47,28 @@ def test_add_project_success(self, tmp_config_dir: Path) -> None: mock_client.verify_token.assert_called_once() mock_client.close.assert_called_once() + def test_add_project_normalizes_bare_host_url(self, tmp_config_dir: Path) -> None: + """add_project passes a bare host through normalize_stack_url before storing.""" + store = ConfigStore(config_dir=tmp_config_dir) + captured: dict[str, str] = {} + + def factory(url: str, token: str): + captured["url"] = url + return make_mock_client(project_name="Production", project_id=9999) + + service = ProjectService(config_store=store, client_factory=factory) + + result = service.add_project( + alias="prod", + stack_url="connection.keboola.com/admin/projects/9999/dashboard", + token="901-55555-fakeTestTokenDoNotUseXXXXXXXX", + ) + + # Verification client and the stored/returned URL all use the clean base. + assert captured["url"] == "https://connection.keboola.com" + assert result["stack_url"] == "https://connection.keboola.com" + assert store.get_project("prod").stack_url == "https://connection.keboola.com" + def test_add_project_invalid_token(self, tmp_config_dir: Path) -> None: """add_project raises KeboolaApiError when token verification fails.""" store = ConfigStore(config_dir=tmp_config_dir) From 67f2810711cee5cb17565c0071f78fef6fdb2f76 Mon Sep 17 00:00:00 2001 From: Petr Date: Fri, 29 May 2026 21:42:19 +0200 Subject: [PATCH 5/5] fix(headless): skip org-info backfill for ephemeral __env__ project Devin review flagged that `project status` in headless mode could write a config.json to disk via `_backfill_org_info`: the __env__ project always has empty org_id/org_name, so the backfill kept trying to persist it. `save()` strips the ephemeral entry (so no token leaked), but the file was still created -- breaking the "no config.json on disk" promise -- and the futile backfill re-ran on every `project status`. Skip ephemeral projects when building the backfill update set. When __env__ is the only candidate, the update set stays empty and no file is written at all. Test: get_status() under env-injection leaves the config dir file-free. Full non-e2e suite: 3776 passed. --- src/keboola_agent_cli/changelog.py | 2 +- .../services/project_service.py | 7 ++++++ tests/test_services.py | 23 +++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/keboola_agent_cli/changelog.py b/src/keboola_agent_cli/changelog.py index c4114cef..58b2d70d 100644 --- a/src/keboola_agent_cli/changelog.py +++ b/src/keboola_agent_cli/changelog.py @@ -11,7 +11,7 @@ "0.50.0": [ "New: headless / token-only invocation (issue #359). Set `KBAGENT_PROJECT_FROM_ENV=1` together with `KBC_TOKEN` + `KBC_STORAGE_API_URL` and kbagent synthesizes an in-memory project under the reserved alias `__env__` -- no `kbagent project add`, no `config.json` on disk. Lets a daemon (e.g. the jasnost bridge), a container, or a CI job run any storage/job/config command with `--project __env__`, or talk to a `kbagent serve` started the same way. Both the CLI and `serve` resolve the project through the same `ConfigStore.load()` chokepoint, so both work from the single env-injection.", "UX: stack URLs are now normalized everywhere a project is created (`project add`, `project edit --url`, and the headless `__env__` injection). A bare host (`connection.keboola.com`), a trailing slash, surrounding whitespace, or even a full project deep-link (`https://connection.keboola.com/admin/projects/10105/dashboard`) are all reduced to the clean `https://` base instead of erroring. Explicit non-https schemes (`http://`, `file://`, ...) are still rejected as an SSRF / protocol-abuse guard, and an unusable URL in `KBC_STORAGE_API_URL` now fails fast with a clean config error rather than a raw pydantic traceback.", - "Security: the env-synthesized `__env__` project lives in memory only. It is marked `ephemeral` and stripped by `ConfigStore.save()`, so the `KBC_TOKEN` from the environment is never written to `config.json`. Opt-in is explicit (the `KBAGENT_PROJECT_FROM_ENV` flag, not the mere presence of `KBC_TOKEN`) to avoid a phantom project surprising a developer who exported the token only for `project add`. If the flag is set but the credential vars are missing, the CLI fails fast with a clear error instead of silently skipping. Mutating ops that target the synthesized project (`project remove/edit/rename`, branch switch) are rejected with an actionable message rather than reporting a success that silently vanishes on the next load. `project list` recovers the `project_id` offline from the token prefix; the real project name shows via `project status` / `project info`.", + "Security: the env-synthesized `__env__` project lives in memory only. It is marked `ephemeral` and stripped by `ConfigStore.save()`, so the `KBC_TOKEN` from the environment is never written to `config.json`. Opt-in is explicit (the `KBAGENT_PROJECT_FROM_ENV` flag, not the mere presence of `KBC_TOKEN`) to avoid a phantom project surprising a developer who exported the token only for `project add`. If the flag is set but the credential vars are missing, the CLI fails fast with a clear error instead of silently skipping. Mutating ops that target the synthesized project (`project remove/edit/rename`, branch switch) are rejected with an actionable message rather than reporting a success that silently vanishes on the next load. `project list` recovers the `project_id` offline from the token prefix; the real project name shows via `project status` / `project info`. The org-info backfill that `project status` runs skips the ephemeral project, so even `project status` writes nothing to disk in headless mode.", ], "0.49.0": [ "New: `kbagent dev-portal` command group — v1 operations against the Keboola Developer Portal (`apps-api.keboola.com`). Lets component developers inspect and update portal entries without leaving the terminal. Read commands (`dev-portal list --vendor V`, `dev-portal get --app VENDOR.APP_ID`) are unrestricted and support peer-config research (pull reference schemas from existing extractors/writers for design reference). Write commands (`dev-portal create`, `dev-portal patch`, `dev-portal upload-icon`, `dev-portal publish`, `dev-portal deprecate`) always print the full pending request diff and then require the user to type a random hex code on a real terminal; there is no `--yes` flag and no env-var bypass; non-TTY shells exit 6 (`EXIT_PERMISSION_DENIED`). `--dry-run` produces the same preview and exits 0 -- the agent-safe path.", diff --git a/src/keboola_agent_cli/services/project_service.py b/src/keboola_agent_cli/services/project_service.py index af9788e5..48353079 100644 --- a/src/keboola_agent_cli/services/project_service.py +++ b/src/keboola_agent_cli/services/project_service.py @@ -635,6 +635,13 @@ def _backfill_org_info( current = self._config_store.get_project(alias) if current is None: continue + if current.ephemeral: + # Env-synthesized __env__ (issue #359): its org info can never + # be persisted (save() strips it), so backfilling is futile and + # would trigger a spurious config.json write on disk -- breaking + # the "no config.json in headless mode" guarantee and repeating + # on every `project status`. Skip it. + continue if current.org_id is not None and current.org_name: continue # already populated; skip updates[alias] = (new_id, new_name) diff --git a/tests/test_services.py b/tests/test_services.py index c022554e..578c9b80 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -494,6 +494,29 @@ def test_status_backfills_org_info_for_legacy_projects(self, tmp_config_dir: Pat assert refreshed.org_id == 438 assert refreshed.org_name == "Keboola Demo" + def test_status_no_backfill_for_ephemeral_env_project( + self, tmp_config_dir: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Headless __env__ status must not trigger a config.json write (issue #359). + + The env project's org info can never be persisted (save strips it), so + backfilling it would create a spurious config.json on disk and repeat on + every `project status`. get_status() must leave the dir file-free. + """ + monkeypatch.setenv("KBAGENT_PROJECT_FROM_ENV", "1") + monkeypatch.setenv("KBC_TOKEN", "901-99999-fakeHeadlessTokenDoNotUseXXXXX") + monkeypatch.setenv("KBC_STORAGE_API_URL", "https://connection.keboola.com") + store = ConfigStore(config_dir=tmp_config_dir) + mock_client = make_mock_client(org_id=438, org_name="Keboola Demo") + service = ProjectService( + config_store=store, + client_factory=lambda url, token: mock_client, + ) + + service.get_status() + + assert not (tmp_config_dir / "config.json").exists() + def test_status_no_backfill_when_org_info_already_set(self, tmp_config_dir: Path) -> None: """Projects with org info already populated must not be re-written.""" store = ConfigStore(config_dir=tmp_config_dir)