From 5d5b6f4c9a48f157a6b909172c4914f7b787609b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?=
 <54936225+sourcehawk@users.noreply.github.com>
Date: Sat, 30 May 2026 18:13:37 +0200
Subject: [PATCH 1/8] fix(cloud): capture stderr in CLIResult

execCLI used cmd.Output(), discarding the child's stderr where gcloud
and aws write their error context. A non-zero run_cli returned an empty
stdout and no explanation. Capture stderr into a capped buffer, surface
it as CLIResult.Stderr (json "stderr,omitempty"), and truncate it at the
same byte limit as stdout. The no-shell, minimal-env, closed-stdin
guarantees and the "non-zero exit is a normal result" contract are
unchanged.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pkg/mcp/cloud/harness.go      | 20 ++++++++++++++++++--
 pkg/mcp/cloud/harness_test.go | 24 ++++++++++++++++++++++++
 pkg/mcp/cloud/provider.go     |  1 +
 3 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/pkg/mcp/cloud/harness.go b/pkg/mcp/cloud/harness.go
index c185be7..8d11dc0 100644
--- a/pkg/mcp/cloud/harness.go
+++ b/pkg/mcp/cloud/harness.go
@@ -1,6 +1,7 @@
 package cloud
 
 import (
+	"bytes"
 	"context"
 	"errors"
 	"os/exec"
@@ -16,20 +17,35 @@ const defaultOutputLimit = 64 * 1024
 // so a poisoned PATH cannot redirect the binary and ambient secrets do not
 // leak), closed stdin (no interactive prompt), and stdout capped at limit. A
 // non-zero exit is a normal result carried in ExitCode, not a Go error; a Go
-// error means the process could not be run at all.
+// error means the process could not be run at all. Stderr — where gcloud/aws
+// write their error context — is captured alongside stdout and capped at the
+// same limit, so a non-zero exit carries an explanation instead of an empty
+// result.
 func execCLI(ctx context.Context, binPath string, argv []string, env []string, limit int) (CLIResult, error) {
 	cmd := exec.CommandContext(ctx, binPath, argv...)
 	cmd.Env = env
 	cmd.Stdin = nil
 
-	out, err := cmd.Output()
+	var stdout, stderr bytes.Buffer
+	cmd.Stdout = &stdout
+	cmd.Stderr = &stderr
+	err := cmd.Run()
+
 	res := CLIResult{}
+	out := stdout.Bytes()
 	if len(out) > limit {
 		out = out[:limit]
 		res.Truncated = true
 	}
 	res.Stdout = string(out)
 
+	errOut := stderr.Bytes()
+	if len(errOut) > limit {
+		errOut = errOut[:limit]
+		res.Truncated = true
+	}
+	res.Stderr = string(errOut)
+
 	if err != nil {
 		var exitErr *exec.ExitError
 		if errors.As(err, &exitErr) {
diff --git a/pkg/mcp/cloud/harness_test.go b/pkg/mcp/cloud/harness_test.go
index 9730e34..0d4861f 100644
--- a/pkg/mcp/cloud/harness_test.go
+++ b/pkg/mcp/cloud/harness_test.go
@@ -17,3 +17,27 @@ func TestExecCLIExitCode(t *testing.T) {
 	require.NoError(t, err, "non-zero exit should not be a Go error")
 	assert.Equal(t, 1, r.ExitCode)
 }
+
+// TestExecCLICapturesStderr proves a non-zero exit carries the child's stderr,
+// the context gcloud/aws write errors to. Without it run_cli would surface an
+// empty stdout and no explanation for the failure.
+func TestExecCLICapturesStderr(t *testing.T) {
+	t.Parallel()
+	// /bin/sh here is only the test fixture producing a stderr write + nonzero
+	// exit; the harness itself never shells (see harness_security_test.go).
+	r, err := execCLI(context.Background(), "/bin/sh",
+		[]string{"-c", "echo boom 1>&2; exit 3"}, nil, 4096)
+	require.NoError(t, err, "non-zero exit should not be a Go error")
+	assert.Equal(t, 3, r.ExitCode)
+	assert.Contains(t, r.Stderr, "boom", "stderr must be captured")
+}
+
+// TestExecCLITruncatesStderr caps stderr at the same limit as stdout so a
+// noisy provider error cannot blow the context budget.
+func TestExecCLITruncatesStderr(t *testing.T) {
+	t.Parallel()
+	r, err := execCLI(context.Background(), "/bin/sh",
+		[]string{"-c", "printf '%0.sx' $(seq 1 100) 1>&2; exit 1"}, nil, 10)
+	require.NoError(t, err)
+	assert.LessOrEqual(t, len(r.Stderr), 10, "stderr exceeded limit")
+}
diff --git a/pkg/mcp/cloud/provider.go b/pkg/mcp/cloud/provider.go
index 0cab162..111ebad 100644
--- a/pkg/mcp/cloud/provider.go
+++ b/pkg/mcp/cloud/provider.go
@@ -73,6 +73,7 @@ type IdentityStatus struct {
 // is never surfaced; the harness caps output and reports truncation.
 type CLIResult struct {
 	Stdout    string `json:"stdout"`
+	Stderr    string `json:"stderr,omitempty"`
 	Truncated bool   `json:"truncated"`
 	ExitCode  int    `json:"exit_code"`
 }

From bccfd3bce3439f7cb0f4e8b87f36413d3a589dc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?=
 <54936225+sourcehawk@users.noreply.github.com>
Date: Sat, 30 May 2026 18:15:52 +0200
Subject: [PATCH 2/8] fix(cloud): match allowlisted verb chains as a prefix and
 reject metacharacter tokens
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allows exact-matched the positional subcommand path, so an allowlisted
"compute instances describe" rejected "compute instances describe my-vm"
— the resource operand made the path unequal, leaving most describe/get
commands advertised but unusable. Match the allowlisted path as a
token-wise prefix of argv's leading positionals so trailing resource
operands ride through. There is no shell, so a trailing token is an inert
argument.

As defense in depth, validateArgv now rejects any argv token that is or
contains a shell-control sequence (`;`, `|`, `&`, backtick, `$(`, `>`,
`<`, newline). A metacharacter token like ["...","list",";","rm"] is
refused by this check rather than by the allowlist; a literal resource
name or a key=value filter passes.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pkg/mcp/cloud/allowlist.go      | 27 ++++++++-----------------
 pkg/mcp/cloud/allowlist_test.go | 28 +++++++++++++++++++++++++-
 pkg/mcp/cloud/validate.go       | 29 ++++++++++++++++++++++-----
 pkg/mcp/cloud/validate_test.go  | 35 +++++++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+), 25 deletions(-)

diff --git a/pkg/mcp/cloud/allowlist.go b/pkg/mcp/cloud/allowlist.go
index 409a90b..7379a35 100644
--- a/pkg/mcp/cloud/allowlist.go
+++ b/pkg/mcp/cloud/allowlist.go
@@ -109,15 +109,17 @@ func filterAllowlist(list *CommandAllowlist, extra DenyFloor) *CommandAllowlist
 	return out
 }
 
-// Allows reports whether argv's positional subcommand path exactly equals an
-// allowlisted command path. Flag tokens and their values do not participate;
-// only the leading positionals do. The match is exact rather than prefix so a
-// surplus positional — a shell metacharacter token, an extra argument — never
-// rides through on the back of an allowed prefix.
+// Allows reports whether an allowlisted command path is a token-wise prefix of
+// argv's leading positional subcommand path. Flag tokens and their values do
+// not participate; only the leading positionals do. Prefix rather than exact
+// match lets a describe/get verb chain carry its trailing resource operand
+// (`compute instances describe my-vm`): there is no shell, so a trailing token
+// is an inert argument to the already-dispatched subcommand. Surplus tokens
+// that are shell-control sequences are caught separately in validateArgv.
 func (a *CommandAllowlist) Allows(argv []string) bool {
 	path := subcommandPath(argv)
 	for _, c := range a.Commands {
-		if pathEqual(path, normalizePath(c.Path)) {
+		if pathHasPrefix(path, normalizePath(c.Path)) {
 			return true
 		}
 	}
@@ -169,16 +171,3 @@ func pathHasPrefix(path, prefix []string) bool {
 	}
 	return true
 }
-
-// pathEqual reports whether two token paths are identical.
-func pathEqual(a, b []string) bool {
-	if len(a) != len(b) {
-		return false
-	}
-	for i := range a {
-		if a[i] != b[i] {
-			return false
-		}
-	}
-	return true
-}
diff --git a/pkg/mcp/cloud/allowlist_test.go b/pkg/mcp/cloud/allowlist_test.go
index 1dbd512..295cc04 100644
--- a/pkg/mcp/cloud/allowlist_test.go
+++ b/pkg/mcp/cloud/allowlist_test.go
@@ -48,7 +48,7 @@ func TestLoadCommandAllowlistMergesProviderDenyFloorAdditions(t *testing.T) {
 		"compute instances list should remain allowed")
 }
 
-func TestAllowsMatchesLongestPathPrefix(t *testing.T) {
+func TestAllowsMatchesVerbChainAsPrefix(t *testing.T) {
 	t.Parallel()
 	al := &CommandAllowlist{Commands: []Command{{Path: "compute firewall-rules list"}}}
 	assert.True(t, al.Allows([]string{"compute", "firewall-rules", "list", "--project", "prod"}),
@@ -56,3 +56,29 @@ func TestAllowsMatchesLongestPathPrefix(t *testing.T) {
 	assert.False(t, al.Allows([]string{"compute", "firewall-rules", "delete"}),
 		"a different verb under the same group must not be allowed")
 }
+
+func TestAllowsAcceptsResourceOperandAfterVerbChain(t *testing.T) {
+	t.Parallel()
+	al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}}
+	// A describe/get command takes a resource operand as a trailing positional;
+	// the allowlisted verb chain must match as a prefix so the operand rides
+	// through. There is no shell, so the operand is an inert argument.
+	assert.True(t, al.Allows([]string{"compute", "instances", "describe", "my-vm", "--project", "prod"}),
+		"an allowlisted verb chain followed by a resource operand must pass")
+	assert.True(t, al.Allows([]string{"compute", "instances", "describe", "my-vm", "us-vm-2"}),
+		"trailing positionals after the verb chain must pass")
+}
+
+func TestAllowsRejectsArgvShorterThanPath(t *testing.T) {
+	t.Parallel()
+	al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}}
+	assert.False(t, al.Allows([]string{"compute", "instances"}),
+		"an argv shorter than the allowlisted path is not a match")
+}
+
+func TestAllowsRejectsDifferentVerbAtPrefixDepth(t *testing.T) {
+	t.Parallel()
+	al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}}
+	assert.False(t, al.Allows([]string{"compute", "instances", "delete", "my-vm"}),
+		"a different verb at the same depth must not match")
+}
diff --git a/pkg/mcp/cloud/validate.go b/pkg/mcp/cloud/validate.go
index 770b995..74448da 100644
--- a/pkg/mcp/cloud/validate.go
+++ b/pkg/mcp/cloud/validate.go
@@ -29,15 +29,21 @@ func (s ScopeAllowlist) allowedFor(flag string) ([]string, bool) {
 	}
 }
 
-// validateArgv enforces the no-bypass contract on one argv before exec: the
-// positional subcommand path is on the allowlist, no token is a deny-floored
-// flag or arg-prefix, and every target-selecting flag value is within scope.
-// It runs entirely on argv tokens — there is no shell, so metacharacter tokens
-// are inert positionals that simply fail the exact allowlist match.
+// validateArgv enforces the no-bypass contract on one argv before exec: no
+// token carries a shell-control sequence, the positional subcommand path is on
+// the allowlist, no token is a deny-floored flag or arg-prefix, and every
+// target-selecting flag value is within scope. It runs entirely on argv tokens —
+// there is no shell, so a shell-control token would be an inert positional
+// anyway; the metachar check rejects it outright as defense in depth.
 func validateArgv(argv []string, allow *CommandAllowlist, scope ScopeAllowlist) error {
 	if len(argv) == 0 {
 		return fmt.Errorf("empty command")
 	}
+	for _, tok := range argv {
+		if isShellControlToken(tok) {
+			return fmt.Errorf("argv token contains a shell-control character: %q", tok)
+		}
+	}
 	if !allow.Allows(argv) {
 		return fmt.Errorf("subcommand not on the allowlist: %q", strings.Join(subcommandPath(argv), " "))
 	}
@@ -76,6 +82,19 @@ func validateArgv(argv []string, allow *CommandAllowlist, scope ScopeAllowlist)
 	return nil
 }
 
+// isShellControlToken reports whether tok is or contains a shell-control
+// sequence. The harness never invokes a shell, so these are already inert; this
+// is defense in depth, rejecting `;`, `|`, `&`, backtick, `$(`, `>`, `<`, and
+// embedded newlines so an argv like ["...", "describe", ";", "rm"] is refused
+// outright. A literal resource name ("my-vm") or a key=value flag
+// ("--filter=name=foo") contains none of these and passes.
+func isShellControlToken(tok string) bool {
+	if strings.ContainsAny(tok, ";|&`<>\n") {
+		return true
+	}
+	return strings.Contains(tok, "$(")
+}
+
 // splitFlag separates a "--flag=value" token into its flag and value. For a
 // bare "--flag" or a non-flag token it returns the token unchanged with no
 // inline value.
diff --git a/pkg/mcp/cloud/validate_test.go b/pkg/mcp/cloud/validate_test.go
index ba77a32..b87f551 100644
--- a/pkg/mcp/cloud/validate_test.go
+++ b/pkg/mcp/cloud/validate_test.go
@@ -29,6 +29,10 @@ func TestValidateArgvRejectsDenyFloorAndScope(t *testing.T) {
 		{"metachar-semicolon", []string{"compute", "instances", "list", ";", "rm", "-rf", "/"}, false},
 		{"metachar-pipe", []string{"compute", "instances", "list", "|", "cat"}, false},
 		{"metachar-subshell", []string{"compute", "instances", "list", "$(whoami)"}, false},
+		{"metachar-backtick", []string{"compute", "instances", "list", "`id`"}, false},
+		{"metachar-redirect", []string{"compute", "instances", "list", ">", "/tmp/x"}, false},
+		{"metachar-and", []string{"compute", "instances", "list", "&&", "rm"}, false},
+		{"metachar-embedded", []string{"compute", "instances", "list", "--filter=a$(id)"}, false},
 		{"not-allowed", []string{"iam", "service-accounts", "create"}, false},
 		{"empty", []string{}, false},
 	}
@@ -58,6 +62,37 @@ func TestValidateArgvEqualsFormFlag(t *testing.T) {
 		"expected --project=prod (equals form, in scope) to validate")
 }
 
+func TestValidateArgvAllowsResourceOperand(t *testing.T) {
+	t.Parallel()
+	al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}}
+	scope := ScopeAllowlist{Projects: []string{"prod"}}
+	// describe/get verbs take a resource operand; the allowlisted verb chain
+	// matches as a prefix, and the operand is an inert positional argument.
+	assert.NoError(t, validateArgv(
+		[]string{"compute", "instances", "describe", "my-vm", "--project", "prod"}, al, scope),
+		"an allowlisted verb chain plus a resource operand must validate")
+}
+
+func TestValidateArgvRejectsMetacharInAnyPosition(t *testing.T) {
+	t.Parallel()
+	al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}}
+	scope := ScopeAllowlist{}
+	for _, argv := range [][]string{
+		{"compute", "instances", "describe", "my-vm", ";", "rm"},
+		{"compute", "instances", "describe", ";", "my-vm"},
+		{"compute", "instances", "describe", "my-vm|cat"},
+		{"compute", "instances", "describe", "my-vm", "&&", "id"},
+	} {
+		assert.Errorf(t, validateArgv(argv, al, scope),
+			"a metacharacter token in %v must be rejected", argv)
+	}
+	// A literal resource name and a key=value filter contain no shell-control
+	// characters and must pass.
+	assert.NoError(t, validateArgv(
+		[]string{"compute", "instances", "describe", "my-vm", "--filter=name=foo"}, al, scope),
+		"a plain resource name and a key=value filter must pass")
+}
+
 func TestValidateArgvEmptyScopeAllowsAnyTarget(t *testing.T) {
 	t.Parallel()
 	al := &CommandAllowlist{Commands: []Command{{Path: "projects list"}}}

From 72cc79aa18c141758db182406a6c5f40ec53ce56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?=
 <54936225+sourcehawk@users.noreply.github.com>
Date: Sat, 30 May 2026 18:17:06 +0200
Subject: [PATCH 3/8] fix(cloud/gcp): validate impersonation instead of
 comparing the base account

`gcloud auth list` reports the operator's base active account, not the SA
selected by CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT, so the old
active==expected check marked correctly-configured impersonation invalid.
Validity now means "impersonation is pinned to the expected SA and the pin
works": read the in-process impersonation env, confirm it equals the
expected target, then run a minimal impersonated read
(gcloud auth print-access-token) to prove the grant is active. AssumedIdentity
is the SA on success; failures degrade through Valid/Hint with the captured
stderr.

NOTE: needs verification against a live gcloud before relying on the exact
print-access-token shape (flagged in a code comment).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pkg/mcp/cloud/providers/gcp/identity.go       |  87 ++++++++-------
 pkg/mcp/cloud/providers/gcp/identity_test.go  | 100 ++++++++----------
 pkg/mcp/cloud/providers/gcp/inventory_test.go |   6 ++
 3 files changed, 95 insertions(+), 98 deletions(-)

diff --git a/pkg/mcp/cloud/providers/gcp/identity.go b/pkg/mcp/cloud/providers/gcp/identity.go
index b036108..75d9466 100644
--- a/pkg/mcp/cloud/providers/gcp/identity.go
+++ b/pkg/mcp/cloud/providers/gcp/identity.go
@@ -2,65 +2,62 @@ package gcp
 
 import (
 	"context"
-	"encoding/json"
-	"fmt"
+	"os"
 
 	"github.com/sourcehawk/triagent/pkg/mcp/cloud"
 )
 
-// authAccount is one entry of `gcloud auth list --format=json`.
-type authAccount struct {
-	Account string `json:"account"`
-	Status  string `json:"status"`
-}
-
 // Identity is the read-only whoami. It is called by cloud.Probe with an
 // unvalidated RunFunc, so it may use the deny-floored `auth` subcommand
-// directly: it reads the active account and reports the session valid only when
-// that account equals expected, the impersonation target the launcher pinned. A
+// directly. Validity means "impersonation is pinned to the expected SA and the
+// pin actually works", not "logged in directly as the SA": gcloud impersonation
+// keeps the operator's base account active while every call assumes the target
+// SA, so comparing the base account to the target would mark a correctly
+// configured session invalid.
+//
+// The probe reads the in-process impersonation env (the launcher sets it on the
+// subprocess; the agent cannot reach it), confirms it is pinned to the expected
+// target, then runs a minimal impersonated read to prove the pin took effect. A
 // degraded auth state surfaces through Valid and Hint, never a Go error.
+//
+// NOTE: validated against gcloud's documented impersonation behavior; verify
+// against a live gcloud before relying on the exact print-access-token shape.
 func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc, expected string) (cloud.IdentityStatus, error) {
-	res, err := run(ctx, []string{"auth", "list", "--filter=status:ACTIVE", "--format=json"})
-	if err != nil {
-		return cloud.IdentityStatus{Provider: "gcp", Valid: false, Hint: err.Error()}, nil
-	}
+	st := cloud.IdentityStatus{Provider: "gcp"}
 
-	var accounts []authAccount
-	if err := json.Unmarshal([]byte(res.Stdout), &accounts); err != nil {
-		return cloud.IdentityStatus{
-			Provider: "gcp",
-			Valid:    false,
-			Hint:     fmt.Sprintf("parse gcloud auth list output: %v", err),
-		}, nil
-	}
-
-	active := activeAccount(accounts)
-	st := cloud.IdentityStatus{Provider: "gcp", AssumedIdentity: active}
-
-	switch {
-	case expected == "":
+	if expected == "" {
 		st.Valid = false
 		st.Hint = "no impersonation target pinned; set " + EnvImpersonate + " on the cloud MCP subprocess"
-	case active == "":
-		st.Valid = false
-		st.Hint = "no active gcloud account; run: gcloud auth login"
-	case active != expected:
+		return st, nil
+	}
+
+	pinned := os.Getenv(EnvImpersonate)
+	if pinned != expected {
 		st.Valid = false
-		st.Hint = fmt.Sprintf("active account %q is not the pinned identity %q", active, expected)
-	default:
-		st.Valid = true
+		st.Hint = EnvImpersonate + " is not pinned to the expected identity " + expected +
+			"; the launcher must set it on the cloud MCP subprocess"
+		return st, nil
 	}
-	return st, nil
-}
 
-// activeAccount returns the first account marked ACTIVE, or "" when none is. The
-// --filter=status:ACTIVE argv already narrows this server-side; the status check
-// is the belt to that braces.
-func activeAccount(accounts []authAccount) string {
-	for _, a := range accounts {
-		if a.Status == "ACTIVE" {
-			return a.Account
+	// Minimal impersonated read: succeeds only when the pinned SA can mint a
+	// token, which proves the impersonation grant is in place and active.
+	res, err := run(ctx, []string{"auth", "print-access-token", "--format=json"})
+	if err != nil {
+		st.Valid = false
+		st.Hint = err.Error()
+		return st, nil
+	}
+	if res.ExitCode != 0 {
+		st.Valid = false
+		hint := "impersonation failed; check the serviceAccountTokenCreator grant or re-auth: gcloud auth login"
+		if res.Stderr != "" {
+			hint = res.Stderr + " — " + hint
 		}
+		st.Hint = hint
+		return st, nil
 	}
-	return ""
+
+	st.AssumedIdentity = expected
+	st.Valid = true
+	return st, nil
 }
diff --git a/pkg/mcp/cloud/providers/gcp/identity_test.go b/pkg/mcp/cloud/providers/gcp/identity_test.go
index 04ba498..063d583 100644
--- a/pkg/mcp/cloud/providers/gcp/identity_test.go
+++ b/pkg/mcp/cloud/providers/gcp/identity_test.go
@@ -10,92 +10,86 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-// authListJSON is captured `gcloud auth list --format=json` output: an array of
-// accounts, exactly one with status ACTIVE.
-const authListJSON = `[
-  {
-    "account": "ro-sa@proj.iam.gserviceaccount.com",
-    "status": "ACTIVE"
-  },
-  {
-    "account": "operator@example.com",
-    "status": ""
-  }
-]`
+const targetSA = "ro-sa@proj.iam.gserviceaccount.com"
+
+// fakeRun returns a canned CLIResult/error for a given argv, recording the
+// argv it was called with so a test can assert the probe drove the right CLI.
+type fakeRun struct {
+	result cloud.CLIResult
+	err    error
+	calls  [][]string
+}
 
-func runReturning(out string) cloud.RunFunc {
-	return func(context.Context, []string) (cloud.CLIResult, error) {
-		return cloud.CLIResult{Stdout: out}, nil
-	}
+func (f *fakeRun) run(_ context.Context, argv []string) (cloud.CLIResult, error) {
+	f.calls = append(f.calls, argv)
+	return f.result, f.err
 }
 
-func TestIdentityResolvesActiveAccountAsTarget(t *testing.T) {
+func TestIdentityInvalidWhenNoImpersonationTargetPinned(t *testing.T) {
+	t.Setenv(EnvImpersonate, targetSA)
 	p, err := newWithBinary("/usr/bin/gcloud")
 	require.NoError(t, err)
 
-	st, err := p.Identity(context.Background(), runReturning(authListJSON), "ro-sa@proj.iam.gserviceaccount.com")
+	f := &fakeRun{result: cloud.CLIResult{Stdout: `"token"`}}
+	st, err := p.Identity(context.Background(), f.run, "")
 	require.NoError(t, err)
-	assert.Equal(t, "gcp", st.Provider)
-	assert.Equal(t, "ro-sa@proj.iam.gserviceaccount.com", st.AssumedIdentity)
-	assert.True(t, st.Valid, "active account equals the impersonation target")
+	assert.False(t, st.Valid, "no pinned target means the session is not validly pinned")
+	assert.NotEmpty(t, st.Hint)
+	assert.Empty(t, f.calls, "no probe should run without a target")
 }
 
-func TestIdentityInvalidWhenActiveAccountIsNotTheTarget(t *testing.T) {
+func TestIdentityInvalidWhenImpersonationEnvNotPinnedToExpected(t *testing.T) {
+	t.Setenv(EnvImpersonate, "someone-else@proj.iam.gserviceaccount.com")
 	p, err := newWithBinary("/usr/bin/gcloud")
 	require.NoError(t, err)
 
-	mismatch := `[{"account": "operator@example.com", "status": "ACTIVE"}]`
-	st, err := p.Identity(context.Background(), runReturning(mismatch), "ro-sa@proj.iam.gserviceaccount.com")
+	f := &fakeRun{result: cloud.CLIResult{Stdout: `"token"`}}
+	st, err := p.Identity(context.Background(), f.run, targetSA)
 	require.NoError(t, err)
-	assert.Equal(t, "operator@example.com", st.AssumedIdentity)
-	assert.False(t, st.Valid, "active account differs from the impersonation target")
+	assert.False(t, st.Valid, "impersonation env pinned to a different SA is invalid")
 	assert.NotEmpty(t, st.Hint)
+	assert.Empty(t, f.calls, "a mismatched pin short-circuits before the probe")
 }
 
-func TestIdentityInvalidWhenNoActiveAccount(t *testing.T) {
+func TestIdentityValidWhenImpersonatedReadSucceeds(t *testing.T) {
+	t.Setenv(EnvImpersonate, targetSA)
 	p, err := newWithBinary("/usr/bin/gcloud")
 	require.NoError(t, err)
 
-	st, err := p.Identity(context.Background(), runReturning(`[]`), "ro-sa@proj.iam.gserviceaccount.com")
+	f := &fakeRun{result: cloud.CLIResult{Stdout: `"ya29.token"`}}
+	st, err := p.Identity(context.Background(), f.run, targetSA)
 	require.NoError(t, err)
-	assert.Empty(t, st.AssumedIdentity)
-	assert.False(t, st.Valid)
-	assert.NotEmpty(t, st.Hint)
+	assert.Equal(t, "gcp", st.Provider)
+	assert.Equal(t, targetSA, st.AssumedIdentity, "the SA is the identity the session acts as")
+	assert.True(t, st.Valid, "a successful impersonated read proves the pin took effect")
+	require.Len(t, f.calls, 1)
+	assert.Equal(t, []string{"auth", "print-access-token", "--format=json"}, f.calls[0])
 }
 
-func TestIdentityInvalidWhenNoImpersonationTargetPinned(t *testing.T) {
+func TestIdentityInvalidWhenImpersonatedReadFails(t *testing.T) {
+	t.Setenv(EnvImpersonate, targetSA)
 	p, err := newWithBinary("/usr/bin/gcloud")
 	require.NoError(t, err)
 
-	st, err := p.Identity(context.Background(), runReturning(authListJSON), "")
+	f := &fakeRun{result: cloud.CLIResult{
+		ExitCode: 1,
+		Stderr:   "ERROR: Permission 'iam.serviceAccounts.getAccessToken' denied",
+	}}
+	st, err := p.Identity(context.Background(), f.run, targetSA)
 	require.NoError(t, err)
-	assert.False(t, st.Valid, "no pinned target means the session is not validly pinned")
-	assert.NotEmpty(t, st.Hint)
+	assert.False(t, st.Valid, "a failed impersonated read means the pin did not take effect")
+	assert.Contains(t, st.Hint, "iam.serviceAccounts.getAccessToken",
+		"the hint surfaces the captured stderr")
 }
 
 func TestIdentitySurfacesRunErrorAsHint(t *testing.T) {
+	t.Setenv(EnvImpersonate, targetSA)
 	p, err := newWithBinary("/usr/bin/gcloud")
 	require.NoError(t, err)
 
-	failing := cloud.RunFunc(func(context.Context, []string) (cloud.CLIResult, error) {
-		return cloud.CLIResult{}, errors.New("gcloud not authenticated")
-	})
-	st, err := p.Identity(context.Background(), failing, "ro-sa@proj.iam.gserviceaccount.com")
+	f := &fakeRun{err: errors.New("gcloud not authenticated")}
+	st, err := p.Identity(context.Background(), f.run, targetSA)
 	require.NoError(t, err, "a degraded auth state surfaces through Valid/Hint, not a Go error")
 	assert.False(t, st.Valid)
 	assert.Contains(t, st.Hint, "gcloud not authenticated")
 }
-
-func TestIdentityCallsAuthListWithJSONFormat(t *testing.T) {
-	p, err := newWithBinary("/usr/bin/gcloud")
-	require.NoError(t, err)
-
-	var gotArgv []string
-	capturing := cloud.RunFunc(func(_ context.Context, argv []string) (cloud.CLIResult, error) {
-		gotArgv = argv
-		return cloud.CLIResult{Stdout: authListJSON}, nil
-	})
-	_, err = p.Identity(context.Background(), capturing, "ro-sa@proj.iam.gserviceaccount.com")
-	require.NoError(t, err)
-	assert.Equal(t, []string{"auth", "list", "--filter=status:ACTIVE", "--format=json"}, gotArgv)
-}
diff --git a/pkg/mcp/cloud/providers/gcp/inventory_test.go b/pkg/mcp/cloud/providers/gcp/inventory_test.go
index 66404f4..97a5c4a 100644
--- a/pkg/mcp/cloud/providers/gcp/inventory_test.go
+++ b/pkg/mcp/cloud/providers/gcp/inventory_test.go
@@ -26,6 +26,12 @@ const projectsListJSON = `[
   }
 ]`
 
+func runReturning(out string) cloud.RunFunc {
+	return func(context.Context, []string) (cloud.CLIResult, error) {
+		return cloud.CLIResult{Stdout: out}, nil
+	}
+}
+
 func TestInventoryProjectsIDAndName(t *testing.T) {
 	t.Parallel()
 	p, err := newWithBinary("/usr/bin/gcloud")

From 23dc6003bdfcbc5a4d03bc646b400b433bdc92bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?=
 <54936225+sourcehawk@users.noreply.github.com>
Date: Sat, 30 May 2026 18:17:42 +0200
Subject: [PATCH 4/8] fix(cloud/gcp): check exit code before parsing projects
 list

execCLI returns a non-zero exit as CLIResult{ExitCode:n} with err==nil, so
a failed `gcloud projects list` was JSON-parsed and surfaced as a
misleading parse error. Check ExitCode before unmarshalling and return the
exit code plus captured stderr, mirroring the AWS provider. (The gcp
identity probe added in the prior commit already checks ExitCode.)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pkg/mcp/cloud/providers/gcp/inventory.go      |  3 +++
 pkg/mcp/cloud/providers/gcp/inventory_test.go | 13 +++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/pkg/mcp/cloud/providers/gcp/inventory.go b/pkg/mcp/cloud/providers/gcp/inventory.go
index bdf1cba..ff36933 100644
--- a/pkg/mcp/cloud/providers/gcp/inventory.go
+++ b/pkg/mcp/cloud/providers/gcp/inventory.go
@@ -25,6 +25,9 @@ func (p *Provider) Inventory(ctx context.Context, run cloud.RunFunc) (cloud.Inve
 	if err != nil {
 		return cloud.Inventory{}, fmt.Errorf("gcloud projects list: %w", err)
 	}
+	if res.ExitCode != 0 {
+		return cloud.Inventory{}, fmt.Errorf("gcloud projects list failed (exit %d): %s", res.ExitCode, res.Stderr)
+	}
 
 	var projects []project
 	if err := json.Unmarshal([]byte(res.Stdout), &projects); err != nil {
diff --git a/pkg/mcp/cloud/providers/gcp/inventory_test.go b/pkg/mcp/cloud/providers/gcp/inventory_test.go
index 97a5c4a..77fe8c0 100644
--- a/pkg/mcp/cloud/providers/gcp/inventory_test.go
+++ b/pkg/mcp/cloud/providers/gcp/inventory_test.go
@@ -70,6 +70,19 @@ func TestInventoryCallsProjectsListWithJSONFormat(t *testing.T) {
 		"the inventory argv must match the allowlisted `projects list` verb chain exactly")
 }
 
+func TestInventoryErrorsOnNonZeroExit(t *testing.T) {
+	t.Parallel()
+	p, err := newWithBinary("/usr/bin/gcloud")
+	require.NoError(t, err)
+
+	failing := cloud.RunFunc(func(context.Context, []string) (cloud.CLIResult, error) {
+		return cloud.CLIResult{ExitCode: 1, Stderr: "ERROR: (gcloud.projects.list) PERMISSION_DENIED"}, nil
+	})
+	_, err = p.Inventory(context.Background(), failing)
+	require.Error(t, err, "a non-zero exit is a real failure, not a parse error")
+	assert.Contains(t, err.Error(), "PERMISSION_DENIED", "the error surfaces the captured stderr")
+}
+
 func TestInventoryErrorsWhenRunErrors(t *testing.T) {
 	t.Parallel()
 	p, err := newWithBinary("/usr/bin/gcloud")

From 638b04ba353e7eaeca5051bbe12eb42af0e2389e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?=
 <54936225+sourcehawk@users.noreply.github.com>
Date: Sat, 30 May 2026 18:19:01 +0200
Subject: [PATCH 5/8] fix(cloud/aws): fall back to caller account only on
 Organizations-unavailable

Inventory fell back to the single-account projection on ANY non-zero exit
or transport error, masking throttling, network faults, and other real
failures. Now that stderr is captured, fall back only when the stderr
names an Organizations-unavailable condition (AccessDenied, "not a member
of an organization", or AWSOrganizationsNotInUseException). Any other
non-zero exit returns the exit code plus stderr; a transport error is
surfaced rather than silently degrading.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pkg/mcp/cloud/providers/aws/inventory.go      | 42 +++++++++++++++---
 pkg/mcp/cloud/providers/aws/inventory_test.go | 43 ++++++++++++++++---
 2 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/pkg/mcp/cloud/providers/aws/inventory.go b/pkg/mcp/cloud/providers/aws/inventory.go
index 6f76164..2b59d6f 100644
--- a/pkg/mcp/cloud/providers/aws/inventory.go
+++ b/pkg/mcp/cloud/providers/aws/inventory.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"strings"
 
 	"github.com/sourcehawk/triagent/pkg/mcp/cloud"
 )
@@ -21,15 +22,23 @@ type organizationsAccount struct {
 }
 
 // Inventory projects the AWS accounts the pinned identity can read. The primary
-// source is `aws organizations list-accounts`; when the identity lacks
-// Organizations access (AccessDenied, surfaced as a non-zero exit or a transport
-// error) it falls back to the single account the caller is in, derived from `aws
-// sts get-caller-identity`. Both commands are allowlisted so the projection works
-// under the validated run core.
+// source is `aws organizations list-accounts`; only when that fails with an
+// Organizations-unavailable condition (AccessDenied or the account not being a
+// member of an organization) does it fall back to the single account the caller
+// is in, derived from `aws sts get-caller-identity`. Any other failure — a
+// transport error, throttling, a network fault — is surfaced rather than masked
+// behind the single-account fallback. Both commands are allowlisted so the
+// projection works under the validated run core.
 func (p *Provider) Inventory(ctx context.Context, run cloud.RunFunc) (cloud.Inventory, error) {
 	res, err := run(ctx, []string{"organizations", "list-accounts", "--output", "json"})
-	if err != nil || res.ExitCode != 0 {
-		return p.callerAccountInventory(ctx, run)
+	if err != nil {
+		return cloud.Inventory{}, fmt.Errorf("aws organizations list-accounts: %w", err)
+	}
+	if res.ExitCode != 0 {
+		if isOrgsUnavailable(res.Stderr) {
+			return p.callerAccountInventory(ctx, run)
+		}
+		return cloud.Inventory{}, fmt.Errorf("aws organizations list-accounts failed (exit %d): %s", res.ExitCode, res.Stderr)
 	}
 
 	var parsed listAccountsResult
@@ -47,6 +56,25 @@ func (p *Provider) Inventory(ctx context.Context, run cloud.RunFunc) (cloud.Inve
 	return cloud.Inventory{Scopes: scopes}, nil
 }
 
+// isOrgsUnavailable reports whether a non-zero `organizations list-accounts`
+// stderr is the benign "Organizations is not available to this identity"
+// condition — the only failure the single-account fallback is correct for. AWS
+// returns AccessDenied when the role lacks Organizations permissions and
+// AWSOrganizationsNotInUseException (or "not a member of an organization") when
+// the account is standalone.
+func isOrgsUnavailable(stderr string) bool {
+	for _, marker := range []string{
+		"AccessDenied",
+		"not a member of an organization",
+		"AWSOrganizationsNotInUseException",
+	} {
+		if strings.Contains(stderr, marker) {
+			return true
+		}
+	}
+	return false
+}
+
 // callerAccountInventory derives the single-account inventory from the caller
 // identity, the fallback when Organizations access is denied.
 func (p *Provider) callerAccountInventory(ctx context.Context, run cloud.RunFunc) (cloud.Inventory, error) {
diff --git a/pkg/mcp/cloud/providers/aws/inventory_test.go b/pkg/mcp/cloud/providers/aws/inventory_test.go
index 56b3c75..63053b0 100644
--- a/pkg/mcp/cloud/providers/aws/inventory_test.go
+++ b/pkg/mcp/cloud/providers/aws/inventory_test.go
@@ -38,10 +38,8 @@ func TestInventoryProjectsActiveAccounts(t *testing.T) {
 func TestInventoryFallsBackToCallerAccountOnAccessDenied(t *testing.T) {
 	f := &fakeRun{
 		results: map[string]cloud.CLIResult{
-			"sts get-caller-identity": {Stdout: callerIdentityAssumedRole},
-		},
-		errs: map[string]error{
-			"organizations list-accounts": errAccessDenied,
+			"organizations list-accounts": {ExitCode: 254, Stderr: "An error occurred (AccessDeniedException) when calling the ListAccounts operation: ..."},
+			"sts get-caller-identity":     {Stdout: callerIdentityAssumedRole},
 		},
 	}
 	p, err := newWithBinary("/usr/bin/aws")
@@ -54,10 +52,10 @@ func TestInventoryFallsBackToCallerAccountOnAccessDenied(t *testing.T) {
 	assert.Equal(t, "111122223333", inv.Scopes[0].ID)
 }
 
-func TestInventoryFallsBackOnAccessDeniedExitCode(t *testing.T) {
+func TestInventoryFallsBackWhenOrgsNotInUse(t *testing.T) {
 	f := &fakeRun{
 		results: map[string]cloud.CLIResult{
-			"organizations list-accounts": {ExitCode: 254, Stdout: "An error occurred (AccessDeniedException) when calling the ListAccounts operation: ..."},
+			"organizations list-accounts": {ExitCode: 254, Stderr: "An error occurred (AWSOrganizationsNotInUseException) when calling the ListAccounts operation"},
 			"sts get-caller-identity":     {Stdout: callerIdentityAssumedRole},
 		},
 	}
@@ -69,3 +67,36 @@ func TestInventoryFallsBackOnAccessDeniedExitCode(t *testing.T) {
 	require.Len(t, inv.Scopes, 1)
 	assert.Equal(t, "111122223333", inv.Scopes[0].ID)
 }
+
+func TestInventorySurfacesNonAccessDeniedFailure(t *testing.T) {
+	f := &fakeRun{
+		results: map[string]cloud.CLIResult{
+			"organizations list-accounts": {ExitCode: 254, Stderr: "An error occurred (ThrottlingException) when calling the ListAccounts operation: Rate exceeded"},
+			"sts get-caller-identity":     {Stdout: callerIdentityAssumedRole},
+		},
+	}
+	p, err := newWithBinary("/usr/bin/aws")
+	require.NoError(t, err)
+
+	_, err = p.Inventory(context.Background(), f.run)
+	require.Error(t, err, "a throttling failure must surface, not silently fall back")
+	assert.Contains(t, err.Error(), "ThrottlingException", "the error surfaces the captured stderr")
+	// The fallback caller-identity call must not have run.
+	for _, c := range f.calls {
+		assert.NotEqual(t, []string{"sts", "get-caller-identity", "--output", "json"}, c,
+			"a non-AccessDenied failure must not trigger the single-account fallback")
+	}
+}
+
+func TestInventorySurfacesTransportError(t *testing.T) {
+	f := &fakeRun{
+		errs: map[string]error{
+			"organizations list-accounts": errAccessDenied,
+		},
+	}
+	p, err := newWithBinary("/usr/bin/aws")
+	require.NoError(t, err)
+
+	_, err = p.Inventory(context.Background(), f.run)
+	require.Error(t, err, "a process that could not be run is a transport failure, surfaced not masked")
+}

From 82be891d60874e01a18eed8111a2fc4f30c30c97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?=
 <54936225+sourcehawk@users.noreply.github.com>
Date: Sat, 30 May 2026 18:19:51 +0200
Subject: [PATCH 6/8] fix(cloud/aws): parse assumed-role ARNs across partitions
 and IAM paths

assumedRoleARN hardcoded arn:aws:sts:: and Cut at the first slash, so
GovCloud (arn:aws-us-gov:) / China (arn:aws-cn:) ARNs and roles carrying
an IAM path (assumed-role/path/to/Role/session) misparsed. Accept any
partition, and split so the role path-and-name is everything between
assumed-role/ and the final /<session>, rebuilding the IAM role ARN under
the same partition.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pkg/mcp/cloud/providers/aws/identity.go      | 49 +++++++++++++++---
 pkg/mcp/cloud/providers/aws/identity_test.go | 54 ++++++++++++++++++++
 2 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/pkg/mcp/cloud/providers/aws/identity.go b/pkg/mcp/cloud/providers/aws/identity.go
index c13b30d..9ba94cc 100644
--- a/pkg/mcp/cloud/providers/aws/identity.go
+++ b/pkg/mcp/cloud/providers/aws/identity.go
@@ -70,23 +70,56 @@ func evaluateIdentity(arn, expectedRoleARN string) (bool, string) {
 
 // assumedRoleARN reports whether arn is an STS assumed-role ARN and, if so,
 // returns the canonical IAM role ARN behind it. An assumed-role ARN has the
-// shape arn:aws:sts::<account>:assumed-role/<role-name>/<session>; the IAM role
-// it stands for is arn:aws:iam::<account>:role/<role-name>.
+// shape arn:<partition>:sts::<account>:assumed-role/<role-path-and-name>/<session>,
+// across the aws, aws-us-gov, and aws-cn partitions. The role keeps any IAM
+// path: the role-path-and-name is everything between "assumed-role/" and the
+// final "/<session>" segment, so a path-prefixed role like
+// assumed-role/team/sub/Role/session resolves to role/team/sub/Role. The IAM
+// role it stands for is arn:<partition>:iam::<account>:role/<role-path-and-name>.
 func assumedRoleARN(arn string) (string, bool) {
-	const prefix = "arn:aws:sts::"
+	const stsInfix = ":sts::"
 	const marker = ":assumed-role/"
-	if !strings.HasPrefix(arn, prefix) {
+	partition, ok := arnPartition(arn)
+	if !ok {
+		return "", false
+	}
+	if !strings.HasPrefix(arn, "arn:"+partition+stsInfix) {
 		return "", false
 	}
 	idx := strings.Index(arn, marker)
 	if idx < 0 {
 		return "", false
 	}
-	account := arn[len(prefix):idx]
+	account := arn[len("arn:"+partition+stsInfix):idx]
 	rest := arn[idx+len(marker):]
-	roleName, _, found := strings.Cut(rest, "/")
-	if !found || roleName == "" || account == "" {
+	// The session name is the final slash-delimited segment; the role path and
+	// name is everything before it.
+	rolePath, _, found := lastCut(rest, "/")
+	if !found || rolePath == "" || account == "" {
 		return "", false
 	}
-	return fmt.Sprintf("arn:aws:iam::%s:role/%s", account, roleName), true
+	return fmt.Sprintf("arn:%s:iam::%s:role/%s", partition, account, rolePath), true
+}
+
+// arnPartition returns the partition segment of an ARN (the field between the
+// first two colons of "arn:<partition>:...").
+func arnPartition(arn string) (string, bool) {
+	rest, ok := strings.CutPrefix(arn, "arn:")
+	if !ok {
+		return "", false
+	}
+	partition, _, found := strings.Cut(rest, ":")
+	if !found || partition == "" {
+		return "", false
+	}
+	return partition, true
+}
+
+// lastCut splits s around the last instance of sep, returning the text before
+// and after it. found reports whether sep appears in s.
+func lastCut(s, sep string) (before, after string, found bool) {
+	if i := strings.LastIndex(s, sep); i >= 0 {
+		return s[:i], s[i+len(sep):], true
+	}
+	return s, "", false
 }
diff --git a/pkg/mcp/cloud/providers/aws/identity_test.go b/pkg/mcp/cloud/providers/aws/identity_test.go
index 47dbd91..1eff919 100644
--- a/pkg/mcp/cloud/providers/aws/identity_test.go
+++ b/pkg/mcp/cloud/providers/aws/identity_test.go
@@ -90,6 +90,60 @@ func TestIdentityRejectsMismatchedExpectedRoleArn(t *testing.T) {
 	assert.NotEmpty(t, st.Hint)
 }
 
+func TestAssumedRoleARNParsesPartitionsAndPaths(t *testing.T) {
+	t.Parallel()
+	cases := []struct {
+		name string
+		arn  string
+		want string
+		ok   bool
+	}{
+		{
+			"commercial",
+			"arn:aws:sts::111122223333:assumed-role/triagent-readonly/session",
+			"arn:aws:iam::111122223333:role/triagent-readonly",
+			true,
+		},
+		{
+			"gov-cloud",
+			"arn:aws-us-gov:sts::111122223333:assumed-role/triagent-readonly/session",
+			"arn:aws-us-gov:iam::111122223333:role/triagent-readonly",
+			true,
+		},
+		{
+			"china",
+			"arn:aws-cn:sts::111122223333:assumed-role/triagent-readonly/session",
+			"arn:aws-cn:iam::111122223333:role/triagent-readonly",
+			true,
+		},
+		{
+			"iam-path",
+			"arn:aws:sts::111122223333:assumed-role/team/sub/triagent-readonly/session",
+			"arn:aws:iam::111122223333:role/team/sub/triagent-readonly",
+			true,
+		},
+		{
+			"plain-user",
+			"arn:aws:iam::111122223333:user/operator",
+			"",
+			false,
+		},
+		{
+			"no-session",
+			"arn:aws:sts::111122223333:assumed-role/triagent-readonly",
+			"",
+			false,
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got, ok := assumedRoleARN(tc.arn)
+			assert.Equal(t, tc.ok, ok)
+			assert.Equal(t, tc.want, got)
+		})
+	}
+}
+
 func TestIdentityInvalidOnNonZeroExit(t *testing.T) {
 	f := &fakeRun{results: map[string]cloud.CLIResult{
 		"sts get-caller-identity": {ExitCode: 255, Stdout: ""},

From 344360a95b5ee7bff35a5321ebf1b0b6d9046853 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?=
 <54936225+sourcehawk@users.noreply.github.com>
Date: Sat, 30 May 2026 18:20:12 +0200
Subject: [PATCH 7/8] docs(cloud): correct ScopeAllowlist account-scope claim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The doc comment said the agent "cannot pivot to an un-allowlisted
project, account, or region", but allowedFor only maps --project and
--region/--zone — Accounts is not argv-enforced. State it accurately:
project and region/zone are enforced against argv; account reach is
constrained by the pinned identity/role and the deny-floored --account /
--profile flags, not validated here.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pkg/mcp/cloud/validate.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pkg/mcp/cloud/validate.go b/pkg/mcp/cloud/validate.go
index 74448da..9b8469b 100644
--- a/pkg/mcp/cloud/validate.go
+++ b/pkg/mcp/cloud/validate.go
@@ -6,8 +6,11 @@ import (
 )
 
 // ScopeAllowlist is the deployment's set of cloud targets any run_cli argv may
-// reference. An empty field means that target axis is unconstrained. The agent
-// cannot pivot to an un-allowlisted project, account, or region.
+// reference. An empty field means that target axis is unconstrained. Project and
+// region/zone are enforced here against argv (allowedFor maps --project and
+// --region/--zone). Account reach is not validated at the argv layer: it is
+// constrained by the pinned identity or role the session assumes, and the
+// identity-selecting flags (--account, --profile) sit on the deny floor.
 type ScopeAllowlist struct {
 	Projects []string `json:"projects,omitempty"`
 	Accounts []string `json:"accounts,omitempty"`

From 69cb065bb2accca31a9f857552049635fda973f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?=
 <54936225+sourcehawk@users.noreply.github.com>
Date: Sat, 30 May 2026 18:21:17 +0200
Subject: [PATCH 8/8] fix(cloud): fail closed on a malformed cloud scope

parseCloudScope swallowed a malformed TRIAGENT_CLOUD_SCOPE and returned an
empty (unconstrained) ScopeAllowlist, failing OPEN and silently widening
run_cli. It now returns an error, and runCloud parses the scope before
resolving the provider and aborts startup on a malformed value, so a
misconfigured scope can never silently drop the deployment's restrictions.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 cmd/triagent-mcp/serve.go            | 20 +++++++++++-------
 cmd/triagent-mcp/serve_cloud_test.go | 31 ++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/cmd/triagent-mcp/serve.go b/cmd/triagent-mcp/serve.go
index 7fab3b8..ced36e8 100644
--- a/cmd/triagent-mcp/serve.go
+++ b/cmd/triagent-mcp/serve.go
@@ -446,6 +446,10 @@ func runCloud(ctx context.Context, f serveFlags) error {
 	if f.cloudProvider == "" {
 		return fmt.Errorf("--provider is required (gcp or aws) (set --provider or $%s)", cloud.EnvProvider)
 	}
+	scope, err := parseCloudScope(os.Getenv(cloud.EnvScope))
+	if err != nil {
+		return fmt.Errorf("build cloud mcp server: %w", err)
+	}
 	provider, err := providers.New(f.cloudProvider)
 	if err != nil {
 		return err
@@ -453,7 +457,7 @@ func runCloud(ctx context.Context, f serveFlags) error {
 	srv, err := cloud.New(cloud.Options{
 		Provider:         provider,
 		AllowlistPath:    os.Getenv(cloud.EnvAllowlistPath),
-		Scope:            parseCloudScope(os.Getenv(cloud.EnvScope)),
+		Scope:            scope,
 		ExpectedIdentity: os.Getenv(cloud.EnvExpectedIdentity),
 	})
 	if err != nil {
@@ -465,18 +469,18 @@ func runCloud(ctx context.Context, f serveFlags) error {
 
 // parseCloudScope decodes the JSON-encoded target scope the launcher froze into
 // a cloud.ScopeAllowlist. An empty value yields an empty scope, which leaves the
-// target axes unconstrained; a malformed value is logged and treated the same,
-// so a bad profile entry never silently widens scope.
-func parseCloudScope(raw string) cloud.ScopeAllowlist {
+// target axes unconstrained. A malformed value is an error that aborts startup:
+// failing closed, since a misconfigured scope must never silently widen run_cli
+// by dropping the deployment's restrictions.
+func parseCloudScope(raw string) (cloud.ScopeAllowlist, error) {
 	var scope cloud.ScopeAllowlist
 	if raw == "" {
-		return scope
+		return scope, nil
 	}
 	if err := json.Unmarshal([]byte(raw), &scope); err != nil {
-		log.Warn("mcp serve --kind=cloud: ignoring malformed scope", "error", err)
-		return cloud.ScopeAllowlist{}
+		return cloud.ScopeAllowlist{}, fmt.Errorf("malformed cloud scope in $%s: %w", cloud.EnvScope, err)
 	}
-	return scope
+	return scope, nil
 }
 
 func runGit(ctx context.Context, f serveFlags) error {
diff --git a/cmd/triagent-mcp/serve_cloud_test.go b/cmd/triagent-mcp/serve_cloud_test.go
index 32504a0..6358782 100644
--- a/cmd/triagent-mcp/serve_cloud_test.go
+++ b/cmd/triagent-mcp/serve_cloud_test.go
@@ -34,3 +34,34 @@ func TestServeCmd_KnowsCloudKind(t *testing.T) {
 	cmd := serveCmd()
 	assert.Contains(t, cmd.Long, "cloud", "serve --help should list cloud")
 }
+
+func TestParseCloudScope_EmptyYieldsUnconstrained(t *testing.T) {
+	t.Parallel()
+	scope, err := parseCloudScope("")
+	require.NoError(t, err)
+	assert.Empty(t, scope.Projects)
+	assert.Empty(t, scope.Regions)
+	assert.Empty(t, scope.Accounts)
+}
+
+func TestParseCloudScope_ValidJSON(t *testing.T) {
+	t.Parallel()
+	scope, err := parseCloudScope(`{"projects":["prod"],"regions":["us-central1"]}`)
+	require.NoError(t, err)
+	assert.Equal(t, []string{"prod"}, scope.Projects)
+	assert.Equal(t, []string{"us-central1"}, scope.Regions)
+}
+
+func TestParseCloudScope_MalformedFailsClosed(t *testing.T) {
+	t.Parallel()
+	_, err := parseCloudScope(`{"projects":`)
+	require.Error(t, err, "a malformed scope must fail closed, not silently drop restrictions")
+}
+
+func TestRunCloud_MalformedScopeAborts(t *testing.T) {
+	t.Setenv("TRIAGENT_CLOUD_PROVIDER", "gcp")
+	t.Setenv("TRIAGENT_CLOUD_SCOPE", `{"projects":`)
+	err := runCloud(context.Background(), serveFlags{kind: "cloud", cloudProvider: "gcp"})
+	require.Error(t, err, "a malformed scope must abort cloud-server startup")
+	assert.Contains(t, err.Error(), "scope", "the error should name the scope")
+}