diff --git a/cmd/triagent-mcp/serve.go b/cmd/triagent-mcp/serve.go index 7fab3b8..ced36e8 100644 --- a/cmd/triagent-mcp/serve.go +++ b/cmd/triagent-mcp/serve.go @@ -446,6 +446,10 @@ func runCloud(ctx context.Context, f serveFlags) error { if f.cloudProvider == "" { return fmt.Errorf("--provider is required (gcp or aws) (set --provider or $%s)", cloud.EnvProvider) } + scope, err := parseCloudScope(os.Getenv(cloud.EnvScope)) + if err != nil { + return fmt.Errorf("build cloud mcp server: %w", err) + } provider, err := providers.New(f.cloudProvider) if err != nil { return err @@ -453,7 +457,7 @@ func runCloud(ctx context.Context, f serveFlags) error { srv, err := cloud.New(cloud.Options{ Provider: provider, AllowlistPath: os.Getenv(cloud.EnvAllowlistPath), - Scope: parseCloudScope(os.Getenv(cloud.EnvScope)), + Scope: scope, ExpectedIdentity: os.Getenv(cloud.EnvExpectedIdentity), }) if err != nil { @@ -465,18 +469,18 @@ func runCloud(ctx context.Context, f serveFlags) error { // parseCloudScope decodes the JSON-encoded target scope the launcher froze into // a cloud.ScopeAllowlist. An empty value yields an empty scope, which leaves the -// target axes unconstrained; a malformed value is logged and treated the same, -// so a bad profile entry never silently widens scope. -func parseCloudScope(raw string) cloud.ScopeAllowlist { +// target axes unconstrained. A malformed value is an error that aborts startup: +// failing closed, since a misconfigured scope must never silently widen run_cli +// by dropping the deployment's restrictions. +func parseCloudScope(raw string) (cloud.ScopeAllowlist, error) { var scope cloud.ScopeAllowlist if raw == "" { - return scope + return scope, nil } if err := json.Unmarshal([]byte(raw), &scope); err != nil { - log.Warn("mcp serve --kind=cloud: ignoring malformed scope", "error", err) - return cloud.ScopeAllowlist{} + return cloud.ScopeAllowlist{}, fmt.Errorf("malformed cloud scope in $%s: %w", cloud.EnvScope, err) } - return scope + return scope, nil } func runGit(ctx context.Context, f serveFlags) error { diff --git a/cmd/triagent-mcp/serve_cloud_test.go b/cmd/triagent-mcp/serve_cloud_test.go index 32504a0..6358782 100644 --- a/cmd/triagent-mcp/serve_cloud_test.go +++ b/cmd/triagent-mcp/serve_cloud_test.go @@ -34,3 +34,34 @@ func TestServeCmd_KnowsCloudKind(t *testing.T) { cmd := serveCmd() assert.Contains(t, cmd.Long, "cloud", "serve --help should list cloud") } + +func TestParseCloudScope_EmptyYieldsUnconstrained(t *testing.T) { + t.Parallel() + scope, err := parseCloudScope("") + require.NoError(t, err) + assert.Empty(t, scope.Projects) + assert.Empty(t, scope.Regions) + assert.Empty(t, scope.Accounts) +} + +func TestParseCloudScope_ValidJSON(t *testing.T) { + t.Parallel() + scope, err := parseCloudScope(`{"projects":["prod"],"regions":["us-central1"]}`) + require.NoError(t, err) + assert.Equal(t, []string{"prod"}, scope.Projects) + assert.Equal(t, []string{"us-central1"}, scope.Regions) +} + +func TestParseCloudScope_MalformedFailsClosed(t *testing.T) { + t.Parallel() + _, err := parseCloudScope(`{"projects":`) + require.Error(t, err, "a malformed scope must fail closed, not silently drop restrictions") +} + +func TestRunCloud_MalformedScopeAborts(t *testing.T) { + t.Setenv("TRIAGENT_CLOUD_PROVIDER", "gcp") + t.Setenv("TRIAGENT_CLOUD_SCOPE", `{"projects":`) + err := runCloud(context.Background(), serveFlags{kind: "cloud", cloudProvider: "gcp"}) + require.Error(t, err, "a malformed scope must abort cloud-server startup") + assert.Contains(t, err.Error(), "scope", "the error should name the scope") +} diff --git a/pkg/mcp/cloud/allowlist.go b/pkg/mcp/cloud/allowlist.go index 409a90b..7379a35 100644 --- a/pkg/mcp/cloud/allowlist.go +++ b/pkg/mcp/cloud/allowlist.go @@ -109,15 +109,17 @@ func filterAllowlist(list *CommandAllowlist, extra DenyFloor) *CommandAllowlist return out } -// Allows reports whether argv's positional subcommand path exactly equals an -// allowlisted command path. Flag tokens and their values do not participate; -// only the leading positionals do. The match is exact rather than prefix so a -// surplus positional — a shell metacharacter token, an extra argument — never -// rides through on the back of an allowed prefix. +// Allows reports whether an allowlisted command path is a token-wise prefix of +// argv's leading positional subcommand path. Flag tokens and their values do +// not participate; only the leading positionals do. Prefix rather than exact +// match lets a describe/get verb chain carry its trailing resource operand +// (`compute instances describe my-vm`): there is no shell, so a trailing token +// is an inert argument to the already-dispatched subcommand. Surplus tokens +// that are shell-control sequences are caught separately in validateArgv. func (a *CommandAllowlist) Allows(argv []string) bool { path := subcommandPath(argv) for _, c := range a.Commands { - if pathEqual(path, normalizePath(c.Path)) { + if pathHasPrefix(path, normalizePath(c.Path)) { return true } } @@ -169,16 +171,3 @@ func pathHasPrefix(path, prefix []string) bool { } return true } - -// pathEqual reports whether two token paths are identical. -func pathEqual(a, b []string) bool { - if len(a) != len(b) { - return false - } - for i := range a { - if a[i] != b[i] { - return false - } - } - return true -} diff --git a/pkg/mcp/cloud/allowlist_test.go b/pkg/mcp/cloud/allowlist_test.go index 1dbd512..295cc04 100644 --- a/pkg/mcp/cloud/allowlist_test.go +++ b/pkg/mcp/cloud/allowlist_test.go @@ -48,7 +48,7 @@ func TestLoadCommandAllowlistMergesProviderDenyFloorAdditions(t *testing.T) { "compute instances list should remain allowed") } -func TestAllowsMatchesLongestPathPrefix(t *testing.T) { +func TestAllowsMatchesVerbChainAsPrefix(t *testing.T) { t.Parallel() al := &CommandAllowlist{Commands: []Command{{Path: "compute firewall-rules list"}}} assert.True(t, al.Allows([]string{"compute", "firewall-rules", "list", "--project", "prod"}), @@ -56,3 +56,29 @@ func TestAllowsMatchesLongestPathPrefix(t *testing.T) { assert.False(t, al.Allows([]string{"compute", "firewall-rules", "delete"}), "a different verb under the same group must not be allowed") } + +func TestAllowsAcceptsResourceOperandAfterVerbChain(t *testing.T) { + t.Parallel() + al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}} + // A describe/get command takes a resource operand as a trailing positional; + // the allowlisted verb chain must match as a prefix so the operand rides + // through. There is no shell, so the operand is an inert argument. + assert.True(t, al.Allows([]string{"compute", "instances", "describe", "my-vm", "--project", "prod"}), + "an allowlisted verb chain followed by a resource operand must pass") + assert.True(t, al.Allows([]string{"compute", "instances", "describe", "my-vm", "us-vm-2"}), + "trailing positionals after the verb chain must pass") +} + +func TestAllowsRejectsArgvShorterThanPath(t *testing.T) { + t.Parallel() + al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}} + assert.False(t, al.Allows([]string{"compute", "instances"}), + "an argv shorter than the allowlisted path is not a match") +} + +func TestAllowsRejectsDifferentVerbAtPrefixDepth(t *testing.T) { + t.Parallel() + al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}} + assert.False(t, al.Allows([]string{"compute", "instances", "delete", "my-vm"}), + "a different verb at the same depth must not match") +} diff --git a/pkg/mcp/cloud/harness.go b/pkg/mcp/cloud/harness.go index c185be7..8d11dc0 100644 --- a/pkg/mcp/cloud/harness.go +++ b/pkg/mcp/cloud/harness.go @@ -1,6 +1,7 @@ package cloud import ( + "bytes" "context" "errors" "os/exec" @@ -16,20 +17,35 @@ const defaultOutputLimit = 64 * 1024 // so a poisoned PATH cannot redirect the binary and ambient secrets do not // leak), closed stdin (no interactive prompt), and stdout capped at limit. A // non-zero exit is a normal result carried in ExitCode, not a Go error; a Go -// error means the process could not be run at all. +// error means the process could not be run at all. Stderr — where gcloud/aws +// write their error context — is captured alongside stdout and capped at the +// same limit, so a non-zero exit carries an explanation instead of an empty +// result. func execCLI(ctx context.Context, binPath string, argv []string, env []string, limit int) (CLIResult, error) { cmd := exec.CommandContext(ctx, binPath, argv...) cmd.Env = env cmd.Stdin = nil - out, err := cmd.Output() + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + err := cmd.Run() + res := CLIResult{} + out := stdout.Bytes() if len(out) > limit { out = out[:limit] res.Truncated = true } res.Stdout = string(out) + errOut := stderr.Bytes() + if len(errOut) > limit { + errOut = errOut[:limit] + res.Truncated = true + } + res.Stderr = string(errOut) + if err != nil { var exitErr *exec.ExitError if errors.As(err, &exitErr) { diff --git a/pkg/mcp/cloud/harness_test.go b/pkg/mcp/cloud/harness_test.go index 9730e34..0d4861f 100644 --- a/pkg/mcp/cloud/harness_test.go +++ b/pkg/mcp/cloud/harness_test.go @@ -17,3 +17,27 @@ func TestExecCLIExitCode(t *testing.T) { require.NoError(t, err, "non-zero exit should not be a Go error") assert.Equal(t, 1, r.ExitCode) } + +// TestExecCLICapturesStderr proves a non-zero exit carries the child's stderr, +// the context gcloud/aws write errors to. Without it run_cli would surface an +// empty stdout and no explanation for the failure. +func TestExecCLICapturesStderr(t *testing.T) { + t.Parallel() + // /bin/sh here is only the test fixture producing a stderr write + nonzero + // exit; the harness itself never shells (see harness_security_test.go). + r, err := execCLI(context.Background(), "/bin/sh", + []string{"-c", "echo boom 1>&2; exit 3"}, nil, 4096) + require.NoError(t, err, "non-zero exit should not be a Go error") + assert.Equal(t, 3, r.ExitCode) + assert.Contains(t, r.Stderr, "boom", "stderr must be captured") +} + +// TestExecCLITruncatesStderr caps stderr at the same limit as stdout so a +// noisy provider error cannot blow the context budget. +func TestExecCLITruncatesStderr(t *testing.T) { + t.Parallel() + r, err := execCLI(context.Background(), "/bin/sh", + []string{"-c", "printf '%0.sx' $(seq 1 100) 1>&2; exit 1"}, nil, 10) + require.NoError(t, err) + assert.LessOrEqual(t, len(r.Stderr), 10, "stderr exceeded limit") +} diff --git a/pkg/mcp/cloud/provider.go b/pkg/mcp/cloud/provider.go index 0cab162..111ebad 100644 --- a/pkg/mcp/cloud/provider.go +++ b/pkg/mcp/cloud/provider.go @@ -73,6 +73,7 @@ type IdentityStatus struct { // is never surfaced; the harness caps output and reports truncation. type CLIResult struct { Stdout string `json:"stdout"` + Stderr string `json:"stderr,omitempty"` Truncated bool `json:"truncated"` ExitCode int `json:"exit_code"` } diff --git a/pkg/mcp/cloud/providers/aws/identity.go b/pkg/mcp/cloud/providers/aws/identity.go index c13b30d..9ba94cc 100644 --- a/pkg/mcp/cloud/providers/aws/identity.go +++ b/pkg/mcp/cloud/providers/aws/identity.go @@ -70,23 +70,56 @@ func evaluateIdentity(arn, expectedRoleARN string) (bool, string) { // assumedRoleARN reports whether arn is an STS assumed-role ARN and, if so, // returns the canonical IAM role ARN behind it. An assumed-role ARN has the -// shape arn:aws:sts:::assumed-role//; the IAM role -// it stands for is arn:aws:iam:::role/. +// shape arn::sts:::assumed-role//, +// across the aws, aws-us-gov, and aws-cn partitions. The role keeps any IAM +// path: the role-path-and-name is everything between "assumed-role/" and the +// final "/" segment, so a path-prefixed role like +// assumed-role/team/sub/Role/session resolves to role/team/sub/Role. The IAM +// role it stands for is arn::iam:::role/. func assumedRoleARN(arn string) (string, bool) { - const prefix = "arn:aws:sts::" + const stsInfix = ":sts::" const marker = ":assumed-role/" - if !strings.HasPrefix(arn, prefix) { + partition, ok := arnPartition(arn) + if !ok { + return "", false + } + if !strings.HasPrefix(arn, "arn:"+partition+stsInfix) { return "", false } idx := strings.Index(arn, marker) if idx < 0 { return "", false } - account := arn[len(prefix):idx] + account := arn[len("arn:"+partition+stsInfix):idx] rest := arn[idx+len(marker):] - roleName, _, found := strings.Cut(rest, "/") - if !found || roleName == "" || account == "" { + // The session name is the final slash-delimited segment; the role path and + // name is everything before it. + rolePath, _, found := lastCut(rest, "/") + if !found || rolePath == "" || account == "" { return "", false } - return fmt.Sprintf("arn:aws:iam::%s:role/%s", account, roleName), true + return fmt.Sprintf("arn:%s:iam::%s:role/%s", partition, account, rolePath), true +} + +// arnPartition returns the partition segment of an ARN (the field between the +// first two colons of "arn::..."). +func arnPartition(arn string) (string, bool) { + rest, ok := strings.CutPrefix(arn, "arn:") + if !ok { + return "", false + } + partition, _, found := strings.Cut(rest, ":") + if !found || partition == "" { + return "", false + } + return partition, true +} + +// lastCut splits s around the last instance of sep, returning the text before +// and after it. found reports whether sep appears in s. +func lastCut(s, sep string) (before, after string, found bool) { + if i := strings.LastIndex(s, sep); i >= 0 { + return s[:i], s[i+len(sep):], true + } + return s, "", false } diff --git a/pkg/mcp/cloud/providers/aws/identity_test.go b/pkg/mcp/cloud/providers/aws/identity_test.go index 47dbd91..1eff919 100644 --- a/pkg/mcp/cloud/providers/aws/identity_test.go +++ b/pkg/mcp/cloud/providers/aws/identity_test.go @@ -90,6 +90,60 @@ func TestIdentityRejectsMismatchedExpectedRoleArn(t *testing.T) { assert.NotEmpty(t, st.Hint) } +func TestAssumedRoleARNParsesPartitionsAndPaths(t *testing.T) { + t.Parallel() + cases := []struct { + name string + arn string + want string + ok bool + }{ + { + "commercial", + "arn:aws:sts::111122223333:assumed-role/triagent-readonly/session", + "arn:aws:iam::111122223333:role/triagent-readonly", + true, + }, + { + "gov-cloud", + "arn:aws-us-gov:sts::111122223333:assumed-role/triagent-readonly/session", + "arn:aws-us-gov:iam::111122223333:role/triagent-readonly", + true, + }, + { + "china", + "arn:aws-cn:sts::111122223333:assumed-role/triagent-readonly/session", + "arn:aws-cn:iam::111122223333:role/triagent-readonly", + true, + }, + { + "iam-path", + "arn:aws:sts::111122223333:assumed-role/team/sub/triagent-readonly/session", + "arn:aws:iam::111122223333:role/team/sub/triagent-readonly", + true, + }, + { + "plain-user", + "arn:aws:iam::111122223333:user/operator", + "", + false, + }, + { + "no-session", + "arn:aws:sts::111122223333:assumed-role/triagent-readonly", + "", + false, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, ok := assumedRoleARN(tc.arn) + assert.Equal(t, tc.ok, ok) + assert.Equal(t, tc.want, got) + }) + } +} + func TestIdentityInvalidOnNonZeroExit(t *testing.T) { f := &fakeRun{results: map[string]cloud.CLIResult{ "sts get-caller-identity": {ExitCode: 255, Stdout: ""}, diff --git a/pkg/mcp/cloud/providers/aws/inventory.go b/pkg/mcp/cloud/providers/aws/inventory.go index 6f76164..2b59d6f 100644 --- a/pkg/mcp/cloud/providers/aws/inventory.go +++ b/pkg/mcp/cloud/providers/aws/inventory.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "strings" "github.com/sourcehawk/triagent/pkg/mcp/cloud" ) @@ -21,15 +22,23 @@ type organizationsAccount struct { } // Inventory projects the AWS accounts the pinned identity can read. The primary -// source is `aws organizations list-accounts`; when the identity lacks -// Organizations access (AccessDenied, surfaced as a non-zero exit or a transport -// error) it falls back to the single account the caller is in, derived from `aws -// sts get-caller-identity`. Both commands are allowlisted so the projection works -// under the validated run core. +// source is `aws organizations list-accounts`; only when that fails with an +// Organizations-unavailable condition (AccessDenied or the account not being a +// member of an organization) does it fall back to the single account the caller +// is in, derived from `aws sts get-caller-identity`. Any other failure — a +// transport error, throttling, a network fault — is surfaced rather than masked +// behind the single-account fallback. Both commands are allowlisted so the +// projection works under the validated run core. func (p *Provider) Inventory(ctx context.Context, run cloud.RunFunc) (cloud.Inventory, error) { res, err := run(ctx, []string{"organizations", "list-accounts", "--output", "json"}) - if err != nil || res.ExitCode != 0 { - return p.callerAccountInventory(ctx, run) + if err != nil { + return cloud.Inventory{}, fmt.Errorf("aws organizations list-accounts: %w", err) + } + if res.ExitCode != 0 { + if isOrgsUnavailable(res.Stderr) { + return p.callerAccountInventory(ctx, run) + } + return cloud.Inventory{}, fmt.Errorf("aws organizations list-accounts failed (exit %d): %s", res.ExitCode, res.Stderr) } var parsed listAccountsResult @@ -47,6 +56,25 @@ func (p *Provider) Inventory(ctx context.Context, run cloud.RunFunc) (cloud.Inve return cloud.Inventory{Scopes: scopes}, nil } +// isOrgsUnavailable reports whether a non-zero `organizations list-accounts` +// stderr is the benign "Organizations is not available to this identity" +// condition — the only failure the single-account fallback is correct for. AWS +// returns AccessDenied when the role lacks Organizations permissions and +// AWSOrganizationsNotInUseException (or "not a member of an organization") when +// the account is standalone. +func isOrgsUnavailable(stderr string) bool { + for _, marker := range []string{ + "AccessDenied", + "not a member of an organization", + "AWSOrganizationsNotInUseException", + } { + if strings.Contains(stderr, marker) { + return true + } + } + return false +} + // callerAccountInventory derives the single-account inventory from the caller // identity, the fallback when Organizations access is denied. func (p *Provider) callerAccountInventory(ctx context.Context, run cloud.RunFunc) (cloud.Inventory, error) { diff --git a/pkg/mcp/cloud/providers/aws/inventory_test.go b/pkg/mcp/cloud/providers/aws/inventory_test.go index 56b3c75..63053b0 100644 --- a/pkg/mcp/cloud/providers/aws/inventory_test.go +++ b/pkg/mcp/cloud/providers/aws/inventory_test.go @@ -38,10 +38,8 @@ func TestInventoryProjectsActiveAccounts(t *testing.T) { func TestInventoryFallsBackToCallerAccountOnAccessDenied(t *testing.T) { f := &fakeRun{ results: map[string]cloud.CLIResult{ - "sts get-caller-identity": {Stdout: callerIdentityAssumedRole}, - }, - errs: map[string]error{ - "organizations list-accounts": errAccessDenied, + "organizations list-accounts": {ExitCode: 254, Stderr: "An error occurred (AccessDeniedException) when calling the ListAccounts operation: ..."}, + "sts get-caller-identity": {Stdout: callerIdentityAssumedRole}, }, } p, err := newWithBinary("/usr/bin/aws") @@ -54,10 +52,10 @@ func TestInventoryFallsBackToCallerAccountOnAccessDenied(t *testing.T) { assert.Equal(t, "111122223333", inv.Scopes[0].ID) } -func TestInventoryFallsBackOnAccessDeniedExitCode(t *testing.T) { +func TestInventoryFallsBackWhenOrgsNotInUse(t *testing.T) { f := &fakeRun{ results: map[string]cloud.CLIResult{ - "organizations list-accounts": {ExitCode: 254, Stdout: "An error occurred (AccessDeniedException) when calling the ListAccounts operation: ..."}, + "organizations list-accounts": {ExitCode: 254, Stderr: "An error occurred (AWSOrganizationsNotInUseException) when calling the ListAccounts operation"}, "sts get-caller-identity": {Stdout: callerIdentityAssumedRole}, }, } @@ -69,3 +67,36 @@ func TestInventoryFallsBackOnAccessDeniedExitCode(t *testing.T) { require.Len(t, inv.Scopes, 1) assert.Equal(t, "111122223333", inv.Scopes[0].ID) } + +func TestInventorySurfacesNonAccessDeniedFailure(t *testing.T) { + f := &fakeRun{ + results: map[string]cloud.CLIResult{ + "organizations list-accounts": {ExitCode: 254, Stderr: "An error occurred (ThrottlingException) when calling the ListAccounts operation: Rate exceeded"}, + "sts get-caller-identity": {Stdout: callerIdentityAssumedRole}, + }, + } + p, err := newWithBinary("/usr/bin/aws") + require.NoError(t, err) + + _, err = p.Inventory(context.Background(), f.run) + require.Error(t, err, "a throttling failure must surface, not silently fall back") + assert.Contains(t, err.Error(), "ThrottlingException", "the error surfaces the captured stderr") + // The fallback caller-identity call must not have run. + for _, c := range f.calls { + assert.NotEqual(t, []string{"sts", "get-caller-identity", "--output", "json"}, c, + "a non-AccessDenied failure must not trigger the single-account fallback") + } +} + +func TestInventorySurfacesTransportError(t *testing.T) { + f := &fakeRun{ + errs: map[string]error{ + "organizations list-accounts": errAccessDenied, + }, + } + p, err := newWithBinary("/usr/bin/aws") + require.NoError(t, err) + + _, err = p.Inventory(context.Background(), f.run) + require.Error(t, err, "a process that could not be run is a transport failure, surfaced not masked") +} diff --git a/pkg/mcp/cloud/providers/gcp/identity.go b/pkg/mcp/cloud/providers/gcp/identity.go index b036108..75d9466 100644 --- a/pkg/mcp/cloud/providers/gcp/identity.go +++ b/pkg/mcp/cloud/providers/gcp/identity.go @@ -2,65 +2,62 @@ package gcp import ( "context" - "encoding/json" - "fmt" + "os" "github.com/sourcehawk/triagent/pkg/mcp/cloud" ) -// authAccount is one entry of `gcloud auth list --format=json`. -type authAccount struct { - Account string `json:"account"` - Status string `json:"status"` -} - // Identity is the read-only whoami. It is called by cloud.Probe with an // unvalidated RunFunc, so it may use the deny-floored `auth` subcommand -// directly: it reads the active account and reports the session valid only when -// that account equals expected, the impersonation target the launcher pinned. A +// directly. Validity means "impersonation is pinned to the expected SA and the +// pin actually works", not "logged in directly as the SA": gcloud impersonation +// keeps the operator's base account active while every call assumes the target +// SA, so comparing the base account to the target would mark a correctly +// configured session invalid. +// +// The probe reads the in-process impersonation env (the launcher sets it on the +// subprocess; the agent cannot reach it), confirms it is pinned to the expected +// target, then runs a minimal impersonated read to prove the pin took effect. A // degraded auth state surfaces through Valid and Hint, never a Go error. +// +// NOTE: validated against gcloud's documented impersonation behavior; verify +// against a live gcloud before relying on the exact print-access-token shape. func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc, expected string) (cloud.IdentityStatus, error) { - res, err := run(ctx, []string{"auth", "list", "--filter=status:ACTIVE", "--format=json"}) - if err != nil { - return cloud.IdentityStatus{Provider: "gcp", Valid: false, Hint: err.Error()}, nil - } + st := cloud.IdentityStatus{Provider: "gcp"} - var accounts []authAccount - if err := json.Unmarshal([]byte(res.Stdout), &accounts); err != nil { - return cloud.IdentityStatus{ - Provider: "gcp", - Valid: false, - Hint: fmt.Sprintf("parse gcloud auth list output: %v", err), - }, nil - } - - active := activeAccount(accounts) - st := cloud.IdentityStatus{Provider: "gcp", AssumedIdentity: active} - - switch { - case expected == "": + if expected == "" { st.Valid = false st.Hint = "no impersonation target pinned; set " + EnvImpersonate + " on the cloud MCP subprocess" - case active == "": - st.Valid = false - st.Hint = "no active gcloud account; run: gcloud auth login" - case active != expected: + return st, nil + } + + pinned := os.Getenv(EnvImpersonate) + if pinned != expected { st.Valid = false - st.Hint = fmt.Sprintf("active account %q is not the pinned identity %q", active, expected) - default: - st.Valid = true + st.Hint = EnvImpersonate + " is not pinned to the expected identity " + expected + + "; the launcher must set it on the cloud MCP subprocess" + return st, nil } - return st, nil -} -// activeAccount returns the first account marked ACTIVE, or "" when none is. The -// --filter=status:ACTIVE argv already narrows this server-side; the status check -// is the belt to that braces. -func activeAccount(accounts []authAccount) string { - for _, a := range accounts { - if a.Status == "ACTIVE" { - return a.Account + // Minimal impersonated read: succeeds only when the pinned SA can mint a + // token, which proves the impersonation grant is in place and active. + res, err := run(ctx, []string{"auth", "print-access-token", "--format=json"}) + if err != nil { + st.Valid = false + st.Hint = err.Error() + return st, nil + } + if res.ExitCode != 0 { + st.Valid = false + hint := "impersonation failed; check the serviceAccountTokenCreator grant or re-auth: gcloud auth login" + if res.Stderr != "" { + hint = res.Stderr + " — " + hint } + st.Hint = hint + return st, nil } - return "" + + st.AssumedIdentity = expected + st.Valid = true + return st, nil } diff --git a/pkg/mcp/cloud/providers/gcp/identity_test.go b/pkg/mcp/cloud/providers/gcp/identity_test.go index 04ba498..063d583 100644 --- a/pkg/mcp/cloud/providers/gcp/identity_test.go +++ b/pkg/mcp/cloud/providers/gcp/identity_test.go @@ -10,92 +10,86 @@ import ( "github.com/stretchr/testify/require" ) -// authListJSON is captured `gcloud auth list --format=json` output: an array of -// accounts, exactly one with status ACTIVE. -const authListJSON = `[ - { - "account": "ro-sa@proj.iam.gserviceaccount.com", - "status": "ACTIVE" - }, - { - "account": "operator@example.com", - "status": "" - } -]` +const targetSA = "ro-sa@proj.iam.gserviceaccount.com" + +// fakeRun returns a canned CLIResult/error for a given argv, recording the +// argv it was called with so a test can assert the probe drove the right CLI. +type fakeRun struct { + result cloud.CLIResult + err error + calls [][]string +} -func runReturning(out string) cloud.RunFunc { - return func(context.Context, []string) (cloud.CLIResult, error) { - return cloud.CLIResult{Stdout: out}, nil - } +func (f *fakeRun) run(_ context.Context, argv []string) (cloud.CLIResult, error) { + f.calls = append(f.calls, argv) + return f.result, f.err } -func TestIdentityResolvesActiveAccountAsTarget(t *testing.T) { +func TestIdentityInvalidWhenNoImpersonationTargetPinned(t *testing.T) { + t.Setenv(EnvImpersonate, targetSA) p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) - st, err := p.Identity(context.Background(), runReturning(authListJSON), "ro-sa@proj.iam.gserviceaccount.com") + f := &fakeRun{result: cloud.CLIResult{Stdout: `"token"`}} + st, err := p.Identity(context.Background(), f.run, "") require.NoError(t, err) - assert.Equal(t, "gcp", st.Provider) - assert.Equal(t, "ro-sa@proj.iam.gserviceaccount.com", st.AssumedIdentity) - assert.True(t, st.Valid, "active account equals the impersonation target") + assert.False(t, st.Valid, "no pinned target means the session is not validly pinned") + assert.NotEmpty(t, st.Hint) + assert.Empty(t, f.calls, "no probe should run without a target") } -func TestIdentityInvalidWhenActiveAccountIsNotTheTarget(t *testing.T) { +func TestIdentityInvalidWhenImpersonationEnvNotPinnedToExpected(t *testing.T) { + t.Setenv(EnvImpersonate, "someone-else@proj.iam.gserviceaccount.com") p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) - mismatch := `[{"account": "operator@example.com", "status": "ACTIVE"}]` - st, err := p.Identity(context.Background(), runReturning(mismatch), "ro-sa@proj.iam.gserviceaccount.com") + f := &fakeRun{result: cloud.CLIResult{Stdout: `"token"`}} + st, err := p.Identity(context.Background(), f.run, targetSA) require.NoError(t, err) - assert.Equal(t, "operator@example.com", st.AssumedIdentity) - assert.False(t, st.Valid, "active account differs from the impersonation target") + assert.False(t, st.Valid, "impersonation env pinned to a different SA is invalid") assert.NotEmpty(t, st.Hint) + assert.Empty(t, f.calls, "a mismatched pin short-circuits before the probe") } -func TestIdentityInvalidWhenNoActiveAccount(t *testing.T) { +func TestIdentityValidWhenImpersonatedReadSucceeds(t *testing.T) { + t.Setenv(EnvImpersonate, targetSA) p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) - st, err := p.Identity(context.Background(), runReturning(`[]`), "ro-sa@proj.iam.gserviceaccount.com") + f := &fakeRun{result: cloud.CLIResult{Stdout: `"ya29.token"`}} + st, err := p.Identity(context.Background(), f.run, targetSA) require.NoError(t, err) - assert.Empty(t, st.AssumedIdentity) - assert.False(t, st.Valid) - assert.NotEmpty(t, st.Hint) + assert.Equal(t, "gcp", st.Provider) + assert.Equal(t, targetSA, st.AssumedIdentity, "the SA is the identity the session acts as") + assert.True(t, st.Valid, "a successful impersonated read proves the pin took effect") + require.Len(t, f.calls, 1) + assert.Equal(t, []string{"auth", "print-access-token", "--format=json"}, f.calls[0]) } -func TestIdentityInvalidWhenNoImpersonationTargetPinned(t *testing.T) { +func TestIdentityInvalidWhenImpersonatedReadFails(t *testing.T) { + t.Setenv(EnvImpersonate, targetSA) p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) - st, err := p.Identity(context.Background(), runReturning(authListJSON), "") + f := &fakeRun{result: cloud.CLIResult{ + ExitCode: 1, + Stderr: "ERROR: Permission 'iam.serviceAccounts.getAccessToken' denied", + }} + st, err := p.Identity(context.Background(), f.run, targetSA) require.NoError(t, err) - assert.False(t, st.Valid, "no pinned target means the session is not validly pinned") - assert.NotEmpty(t, st.Hint) + assert.False(t, st.Valid, "a failed impersonated read means the pin did not take effect") + assert.Contains(t, st.Hint, "iam.serviceAccounts.getAccessToken", + "the hint surfaces the captured stderr") } func TestIdentitySurfacesRunErrorAsHint(t *testing.T) { + t.Setenv(EnvImpersonate, targetSA) p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) - failing := cloud.RunFunc(func(context.Context, []string) (cloud.CLIResult, error) { - return cloud.CLIResult{}, errors.New("gcloud not authenticated") - }) - st, err := p.Identity(context.Background(), failing, "ro-sa@proj.iam.gserviceaccount.com") + f := &fakeRun{err: errors.New("gcloud not authenticated")} + st, err := p.Identity(context.Background(), f.run, targetSA) require.NoError(t, err, "a degraded auth state surfaces through Valid/Hint, not a Go error") assert.False(t, st.Valid) assert.Contains(t, st.Hint, "gcloud not authenticated") } - -func TestIdentityCallsAuthListWithJSONFormat(t *testing.T) { - p, err := newWithBinary("/usr/bin/gcloud") - require.NoError(t, err) - - var gotArgv []string - capturing := cloud.RunFunc(func(_ context.Context, argv []string) (cloud.CLIResult, error) { - gotArgv = argv - return cloud.CLIResult{Stdout: authListJSON}, nil - }) - _, err = p.Identity(context.Background(), capturing, "ro-sa@proj.iam.gserviceaccount.com") - require.NoError(t, err) - assert.Equal(t, []string{"auth", "list", "--filter=status:ACTIVE", "--format=json"}, gotArgv) -} diff --git a/pkg/mcp/cloud/providers/gcp/inventory.go b/pkg/mcp/cloud/providers/gcp/inventory.go index bdf1cba..ff36933 100644 --- a/pkg/mcp/cloud/providers/gcp/inventory.go +++ b/pkg/mcp/cloud/providers/gcp/inventory.go @@ -25,6 +25,9 @@ func (p *Provider) Inventory(ctx context.Context, run cloud.RunFunc) (cloud.Inve if err != nil { return cloud.Inventory{}, fmt.Errorf("gcloud projects list: %w", err) } + if res.ExitCode != 0 { + return cloud.Inventory{}, fmt.Errorf("gcloud projects list failed (exit %d): %s", res.ExitCode, res.Stderr) + } var projects []project if err := json.Unmarshal([]byte(res.Stdout), &projects); err != nil { diff --git a/pkg/mcp/cloud/providers/gcp/inventory_test.go b/pkg/mcp/cloud/providers/gcp/inventory_test.go index 66404f4..77fe8c0 100644 --- a/pkg/mcp/cloud/providers/gcp/inventory_test.go +++ b/pkg/mcp/cloud/providers/gcp/inventory_test.go @@ -26,6 +26,12 @@ const projectsListJSON = `[ } ]` +func runReturning(out string) cloud.RunFunc { + return func(context.Context, []string) (cloud.CLIResult, error) { + return cloud.CLIResult{Stdout: out}, nil + } +} + func TestInventoryProjectsIDAndName(t *testing.T) { t.Parallel() p, err := newWithBinary("/usr/bin/gcloud") @@ -64,6 +70,19 @@ func TestInventoryCallsProjectsListWithJSONFormat(t *testing.T) { "the inventory argv must match the allowlisted `projects list` verb chain exactly") } +func TestInventoryErrorsOnNonZeroExit(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + failing := cloud.RunFunc(func(context.Context, []string) (cloud.CLIResult, error) { + return cloud.CLIResult{ExitCode: 1, Stderr: "ERROR: (gcloud.projects.list) PERMISSION_DENIED"}, nil + }) + _, err = p.Inventory(context.Background(), failing) + require.Error(t, err, "a non-zero exit is a real failure, not a parse error") + assert.Contains(t, err.Error(), "PERMISSION_DENIED", "the error surfaces the captured stderr") +} + func TestInventoryErrorsWhenRunErrors(t *testing.T) { t.Parallel() p, err := newWithBinary("/usr/bin/gcloud") diff --git a/pkg/mcp/cloud/validate.go b/pkg/mcp/cloud/validate.go index 770b995..9b8469b 100644 --- a/pkg/mcp/cloud/validate.go +++ b/pkg/mcp/cloud/validate.go @@ -6,8 +6,11 @@ import ( ) // ScopeAllowlist is the deployment's set of cloud targets any run_cli argv may -// reference. An empty field means that target axis is unconstrained. The agent -// cannot pivot to an un-allowlisted project, account, or region. +// reference. An empty field means that target axis is unconstrained. Project and +// region/zone are enforced here against argv (allowedFor maps --project and +// --region/--zone). Account reach is not validated at the argv layer: it is +// constrained by the pinned identity or role the session assumes, and the +// identity-selecting flags (--account, --profile) sit on the deny floor. type ScopeAllowlist struct { Projects []string `json:"projects,omitempty"` Accounts []string `json:"accounts,omitempty"` @@ -29,15 +32,21 @@ func (s ScopeAllowlist) allowedFor(flag string) ([]string, bool) { } } -// validateArgv enforces the no-bypass contract on one argv before exec: the -// positional subcommand path is on the allowlist, no token is a deny-floored -// flag or arg-prefix, and every target-selecting flag value is within scope. -// It runs entirely on argv tokens — there is no shell, so metacharacter tokens -// are inert positionals that simply fail the exact allowlist match. +// validateArgv enforces the no-bypass contract on one argv before exec: no +// token carries a shell-control sequence, the positional subcommand path is on +// the allowlist, no token is a deny-floored flag or arg-prefix, and every +// target-selecting flag value is within scope. It runs entirely on argv tokens — +// there is no shell, so a shell-control token would be an inert positional +// anyway; the metachar check rejects it outright as defense in depth. func validateArgv(argv []string, allow *CommandAllowlist, scope ScopeAllowlist) error { if len(argv) == 0 { return fmt.Errorf("empty command") } + for _, tok := range argv { + if isShellControlToken(tok) { + return fmt.Errorf("argv token contains a shell-control character: %q", tok) + } + } if !allow.Allows(argv) { return fmt.Errorf("subcommand not on the allowlist: %q", strings.Join(subcommandPath(argv), " ")) } @@ -76,6 +85,19 @@ func validateArgv(argv []string, allow *CommandAllowlist, scope ScopeAllowlist) return nil } +// isShellControlToken reports whether tok is or contains a shell-control +// sequence. The harness never invokes a shell, so these are already inert; this +// is defense in depth, rejecting `;`, `|`, `&`, backtick, `$(`, `>`, `<`, and +// embedded newlines so an argv like ["...", "describe", ";", "rm"] is refused +// outright. A literal resource name ("my-vm") or a key=value flag +// ("--filter=name=foo") contains none of these and passes. +func isShellControlToken(tok string) bool { + if strings.ContainsAny(tok, ";|&`<>\n") { + return true + } + return strings.Contains(tok, "$(") +} + // splitFlag separates a "--flag=value" token into its flag and value. For a // bare "--flag" or a non-flag token it returns the token unchanged with no // inline value. diff --git a/pkg/mcp/cloud/validate_test.go b/pkg/mcp/cloud/validate_test.go index ba77a32..b87f551 100644 --- a/pkg/mcp/cloud/validate_test.go +++ b/pkg/mcp/cloud/validate_test.go @@ -29,6 +29,10 @@ func TestValidateArgvRejectsDenyFloorAndScope(t *testing.T) { {"metachar-semicolon", []string{"compute", "instances", "list", ";", "rm", "-rf", "/"}, false}, {"metachar-pipe", []string{"compute", "instances", "list", "|", "cat"}, false}, {"metachar-subshell", []string{"compute", "instances", "list", "$(whoami)"}, false}, + {"metachar-backtick", []string{"compute", "instances", "list", "`id`"}, false}, + {"metachar-redirect", []string{"compute", "instances", "list", ">", "/tmp/x"}, false}, + {"metachar-and", []string{"compute", "instances", "list", "&&", "rm"}, false}, + {"metachar-embedded", []string{"compute", "instances", "list", "--filter=a$(id)"}, false}, {"not-allowed", []string{"iam", "service-accounts", "create"}, false}, {"empty", []string{}, false}, } @@ -58,6 +62,37 @@ func TestValidateArgvEqualsFormFlag(t *testing.T) { "expected --project=prod (equals form, in scope) to validate") } +func TestValidateArgvAllowsResourceOperand(t *testing.T) { + t.Parallel() + al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}} + scope := ScopeAllowlist{Projects: []string{"prod"}} + // describe/get verbs take a resource operand; the allowlisted verb chain + // matches as a prefix, and the operand is an inert positional argument. + assert.NoError(t, validateArgv( + []string{"compute", "instances", "describe", "my-vm", "--project", "prod"}, al, scope), + "an allowlisted verb chain plus a resource operand must validate") +} + +func TestValidateArgvRejectsMetacharInAnyPosition(t *testing.T) { + t.Parallel() + al := &CommandAllowlist{Commands: []Command{{Path: "compute instances describe"}}} + scope := ScopeAllowlist{} + for _, argv := range [][]string{ + {"compute", "instances", "describe", "my-vm", ";", "rm"}, + {"compute", "instances", "describe", ";", "my-vm"}, + {"compute", "instances", "describe", "my-vm|cat"}, + {"compute", "instances", "describe", "my-vm", "&&", "id"}, + } { + assert.Errorf(t, validateArgv(argv, al, scope), + "a metacharacter token in %v must be rejected", argv) + } + // A literal resource name and a key=value filter contain no shell-control + // characters and must pass. + assert.NoError(t, validateArgv( + []string{"compute", "instances", "describe", "my-vm", "--filter=name=foo"}, al, scope), + "a plain resource name and a key=value filter must pass") +} + func TestValidateArgvEmptyScopeAllowsAnyTarget(t *testing.T) { t.Parallel() al := &CommandAllowlist{Commands: []Command{{Path: "projects list"}}}