From 2bd81b050db143bca3644ce2d5b0149bcd60fd93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?= <54936225+sourcehawk@users.noreply.github.com> Date: Sat, 30 May 2026 17:37:37 +0200 Subject: [PATCH 1/3] refactor(cloud): thread expected identity and env through Probe/Identity Provider.Identity and cloud.Probe now take the pinned identity and the subprocess env explicitly instead of reading process-global env. The gcp and aws Identity implementations validate the resolved identity against the threaded expected value, dropping their os.Getenv reads. cloud.Server carries ExpectedIdentity (read once from TRIAGENT_CLOUD_EXPECTED_IDENTITY in the serve subprocess) and builds the probe env via subprocessEnv(). Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/mcp/cloud/env.go | 6 +++ pkg/mcp/cloud/fake_test.go | 2 +- pkg/mcp/cloud/probe.go | 18 ++++++-- pkg/mcp/cloud/probe_test.go | 45 +++++++++++--------- pkg/mcp/cloud/provider.go | 7 ++- pkg/mcp/cloud/providers/aws/identity.go | 23 +++------- pkg/mcp/cloud/providers/aws/identity_test.go | 14 +++--- pkg/mcp/cloud/providers/gcp/identity.go | 16 +++---- pkg/mcp/cloud/providers/gcp/identity_test.go | 18 +++----- pkg/mcp/cloud/server.go | 22 ++++++---- pkg/mcp/cloud/tools_status.go | 2 +- 11 files changed, 92 insertions(+), 81 deletions(-) diff --git a/pkg/mcp/cloud/env.go b/pkg/mcp/cloud/env.go index fcb57ae..d74b715 100644 --- a/pkg/mcp/cloud/env.go +++ b/pkg/mcp/cloud/env.go @@ -14,4 +14,10 @@ const ( // EnvScope carries the target scope allowlist the launcher froze for this // session, as JSON the cloud package decodes into ScopeAllowlist. EnvScope = "TRIAGENT_CLOUD_SCOPE" + // EnvExpectedIdentity carries the identity the launcher pinned for this + // session, uniform across providers: the impersonation target for gcp, the + // expected role ARN for aws. The serve subprocess reads it once at startup and + // threads it into the identity probe; the provider validates the resolved + // identity against it. + EnvExpectedIdentity = "TRIAGENT_CLOUD_EXPECTED_IDENTITY" ) diff --git a/pkg/mcp/cloud/fake_test.go b/pkg/mcp/cloud/fake_test.go index 62593e8..bdc7207 100644 --- a/pkg/mcp/cloud/fake_test.go +++ b/pkg/mcp/cloud/fake_test.go @@ -46,6 +46,6 @@ func (f *fakeProvider) Inventory(context.Context, RunFunc) (Inventory, error) { return f.inventory, nil } -func (f *fakeProvider) Identity(context.Context, RunFunc) (IdentityStatus, error) { +func (f *fakeProvider) Identity(context.Context, RunFunc, string) (IdentityStatus, error) { return f.identity, f.identityErr } diff --git a/pkg/mcp/cloud/probe.go b/pkg/mcp/cloud/probe.go index fecac47..6a90fd3 100644 --- a/pkg/mcp/cloud/probe.go +++ b/pkg/mcp/cloud/probe.go @@ -1,23 +1,33 @@ package cloud -import "context" +import ( + "context" + "fmt" +) // Probe runs the read-only whoami for one provider: which pinned identity is // active and whether it is valid. It is the single probe the launcher's // connections panel, the session preflight gate, and the session_status tool // all call, so those surfaces can never disagree. // +// expected is the identity the launcher pinned for this session, threaded +// explicitly so the probe validates against it without reading process-global +// env; env is the exact subprocess environment the whoami exec runs under, +// passed in by the caller rather than read from os.Environ here. +// // Probe never returns a Go error for an unreachable or invalid identity — that // is a degrade, reported through IdentityStatus.Valid and Hint, so a stale cloud // credential surfaces visibly instead of failing the caller. A Go error is // reserved for a caller contract violation (a nil provider). -func Probe(ctx context.Context, p Provider) (IdentityStatus, error) { - env := minimalEnv(p.EnvPassthrough()) +func Probe(ctx context.Context, p Provider, expected string, env []string) (IdentityStatus, error) { + if p == nil { + return IdentityStatus{}, fmt.Errorf("cloud: Probe requires a provider") + } run := func(ctx context.Context, argv []string) (CLIResult, error) { return execCLI(ctx, p.Binary(), argv, env, defaultOutputLimit) } - st, err := p.Identity(ctx, run) + st, err := p.Identity(ctx, run, expected) if err != nil { return IdentityStatus{ Provider: p.Name(), diff --git a/pkg/mcp/cloud/probe_test.go b/pkg/mcp/cloud/probe_test.go index 8c7df39..a2cf8ae 100644 --- a/pkg/mcp/cloud/probe_test.go +++ b/pkg/mcp/cloud/probe_test.go @@ -3,6 +3,7 @@ package cloud import ( "context" "errors" + "os" "strings" "testing" @@ -29,7 +30,7 @@ func (p *envProbeProvider) Inventory(context.Context, RunFunc) (Inventory, error return Inventory{}, nil } -func (p *envProbeProvider) Identity(ctx context.Context, run RunFunc) (IdentityStatus, error) { +func (p *envProbeProvider) Identity(ctx context.Context, run RunFunc, _ string) (IdentityStatus, error) { res, err := run(ctx, nil) if err != nil { return IdentityStatus{}, err @@ -51,56 +52,62 @@ func TestProbeReturnsProviderIdentity(t *testing.T) { Valid: true, }, } - st, err := Probe(context.Background(), p) + st, err := Probe(context.Background(), p, "", nil) require.NoError(t, err) assert.True(t, st.Valid) assert.Equal(t, "ro-sa@proj.iam.gserviceaccount.com", st.AssumedIdentity) } +func TestProbeErrorsOnNilProvider(t *testing.T) { + t.Parallel() + _, err := Probe(context.Background(), nil, "", nil) + require.Error(t, err, "a nil provider is a caller contract violation, not a degrade") +} + func TestProbeSurfacesProviderErrorAsInvalid(t *testing.T) { t.Parallel() p := &fakeProvider{name: "aws", identityErr: errors.New("token expired")} - st, err := Probe(context.Background(), p) + st, err := Probe(context.Background(), p, "", nil) require.NoError(t, err, "Probe should degrade, not error") assert.False(t, st.Valid, "expected Valid=false when the provider errors") assert.Equal(t, "aws", st.Provider, "expected provider name carried through") assert.NotEmpty(t, st.Hint, "expected the provider error surfaced as a hint") } -// TestProbeUsesMinimalSubprocessEnv proves the probe path forwards only the -// base passthrough plus the provider's declared names to the whoami subprocess, -// dropping the launcher's ambient secrets. A parent canary must not cross the -// boundary while a declared passthrough var survives. -func TestProbeUsesMinimalSubprocessEnv(t *testing.T) { +// TestProbeExecsWithExactlyTheGivenEnv proves the probe execs the whoami +// subprocess under exactly the env the caller passed, with no read of +// os.Environ inside Probe: a parent canary set in the process env must not +// cross the boundary, while a var present only in the passed env survives. +func TestProbeExecsWithExactlyTheGivenEnv(t *testing.T) { t.Setenv("TRIAGENT_CLOUD_LEAK_CANARY", "should-not-appear") - t.Setenv("CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT", "ro-sa@proj.iam.gserviceaccount.com") - p := &envProbeProvider{ - name: "gcp", - envPassthrough: []string{"CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT"}, - } + p := &envProbeProvider{name: "gcp"} - st, err := Probe(context.Background(), p) + env := []string{ + "PATH=" + os.Getenv("PATH"), + "CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT=ro-sa@proj.iam.gserviceaccount.com", + } + st, err := Probe(context.Background(), p, "", env) require.NoError(t, err) seen := st.AssumedIdentity assert.NotContains(t, seen, "TRIAGENT_CLOUD_LEAK_CANARY", - "parent-env secret must not reach the probe subprocess") + "a var present only in the process env, not the passed env, must not reach the subprocess") assert.Contains(t, seen, "CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT=ro-sa@proj.iam.gserviceaccount.com", - "declared passthrough var must reach the probe subprocess") + "the passed env must reach the probe subprocess") for _, line := range strings.Split(seen, "\n") { if line == "" { continue } name, _, _ := strings.Cut(line, "=") - assert.Contains(t, []string{"PATH", "HOME", "CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT"}, name, - "only base + declared passthrough names may cross the boundary") + assert.Contains(t, []string{"PATH", "CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT"}, name, + "only the names in the passed env may cross the boundary") } } func TestProbeInvalidWhenIdentityEmpty(t *testing.T) { t.Parallel() p := &fakeProvider{name: "gcp", identity: IdentityStatus{Provider: "gcp", Valid: true}} - st, err := Probe(context.Background(), p) + st, err := Probe(context.Background(), p, "", nil) require.NoError(t, err) assert.False(t, st.Valid, "an empty resolved identity must not be reported valid") } diff --git a/pkg/mcp/cloud/provider.go b/pkg/mcp/cloud/provider.go index 8abf18b..0cab162 100644 --- a/pkg/mcp/cloud/provider.go +++ b/pkg/mcp/cloud/provider.go @@ -36,8 +36,11 @@ type Provider interface { // accounts for aws). It execs only through run, never directly. Inventory(ctx context.Context, run RunFunc) (Inventory, error) // Identity is the read-only whoami: which pinned identity is active and - // whether it is valid. It execs only through run, never directly. - Identity(ctx context.Context, run RunFunc) (IdentityStatus, error) + // whether it is valid. expected is the identity the launcher pinned for this + // session (the impersonation target for gcp, the expected role ARN for aws, + // empty when none is pinned); the provider validates the resolved identity + // against it. It execs only through run, never directly. + Identity(ctx context.Context, run RunFunc, expected string) (IdentityStatus, error) } // RunFunc is the harness exec core, injected into providers so they never exec diff --git a/pkg/mcp/cloud/providers/aws/identity.go b/pkg/mcp/cloud/providers/aws/identity.go index 415017f..c13b30d 100644 --- a/pkg/mcp/cloud/providers/aws/identity.go +++ b/pkg/mcp/cloud/providers/aws/identity.go @@ -4,20 +4,11 @@ import ( "context" "encoding/json" "fmt" - "os" "strings" "github.com/sourcehawk/triagent/pkg/mcp/cloud" ) -// EnvExpectedRoleARN optionally pins the IAM role ARN the assumed-role caller -// must resolve to. When set, Identity rejects any caller whose underlying role -// does not match it, the strict check. When unset, Identity falls back to the -// structural check (the caller must be an assumed-role ARN at all, proving the -// AWS_PROFILE assume-role pin took effect rather than the operator's plain base -// identity leaking through). -const EnvExpectedRoleARN = "TRIAGENT_CLOUD_AWS_EXPECTED_ROLE_ARN" - // callerIdentity is the projection of `aws sts get-caller-identity --output // json`. Only the fields the probe and inventory fallback use are decoded. type callerIdentity struct { @@ -31,12 +22,12 @@ type callerIdentity struct { // the command is also allowlisted so it works under the validated core), parses // the caller ARN, and reports whether the pinned assume-role identity is active. // -// Validity has two modes. With TRIAGENT_CLOUD_AWS_EXPECTED_ROLE_ARN set, the -// caller's underlying role must match it exactly. Without it, the structural -// check applies: the caller must be an assumed-role ARN, which proves the -// AWS_PROFILE pin took effect — a plain user/root ARN means base credentials -// leaked through unimpersonated, so the session is not valid. -func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc) (cloud.IdentityStatus, error) { +// Validity has two modes. With expected set to a role ARN, the caller's +// underlying role must match it exactly. Without it, the structural check +// applies: the caller must be an assumed-role ARN, which proves the AWS_PROFILE +// pin took effect — a plain user/root ARN means base credentials leaked through +// unimpersonated, so the session is not valid. +func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc, expected string) (cloud.IdentityStatus, error) { res, err := run(ctx, []string{"sts", "get-caller-identity", "--output", "json"}) if err != nil { return cloud.IdentityStatus{Provider: "aws", Valid: false, Hint: err.Error()}, nil @@ -59,7 +50,7 @@ func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc) (cloud.Ident } st := cloud.IdentityStatus{Provider: "aws", AssumedIdentity: caller.Arn} - st.Valid, st.Hint = evaluateIdentity(caller.Arn, os.Getenv(EnvExpectedRoleARN)) + st.Valid, st.Hint = evaluateIdentity(caller.Arn, expected) return st, nil } diff --git a/pkg/mcp/cloud/providers/aws/identity_test.go b/pkg/mcp/cloud/providers/aws/identity_test.go index d546bf9..47dbd91 100644 --- a/pkg/mcp/cloud/providers/aws/identity_test.go +++ b/pkg/mcp/cloud/providers/aws/identity_test.go @@ -28,7 +28,7 @@ func TestIdentityBuildsCallerIdentityArgv(t *testing.T) { p, err := newWithBinary("/usr/bin/aws") require.NoError(t, err) - _, err = p.Identity(context.Background(), f.run) + _, err = p.Identity(context.Background(), f.run, "") require.NoError(t, err) require.Len(t, f.calls, 1) @@ -42,7 +42,7 @@ func TestIdentityValidWhenAssumedRole(t *testing.T) { p, err := newWithBinary("/usr/bin/aws") require.NoError(t, err) - st, err := p.Identity(context.Background(), f.run) + st, err := p.Identity(context.Background(), f.run, "") require.NoError(t, err) assert.Equal(t, "aws", st.Provider) @@ -57,7 +57,7 @@ func TestIdentityInvalidWhenNotAssumedRole(t *testing.T) { p, err := newWithBinary("/usr/bin/aws") require.NoError(t, err) - st, err := p.Identity(context.Background(), f.run) + st, err := p.Identity(context.Background(), f.run, "") require.NoError(t, err) assert.Equal(t, "arn:aws:iam::111122223333:user/operator", st.AssumedIdentity) @@ -66,27 +66,25 @@ func TestIdentityInvalidWhenNotAssumedRole(t *testing.T) { } func TestIdentityMatchesExpectedRoleArnWhenPinned(t *testing.T) { - t.Setenv(EnvExpectedRoleARN, "arn:aws:iam::111122223333:role/triagent-readonly") f := &fakeRun{results: map[string]cloud.CLIResult{ "sts get-caller-identity": {Stdout: callerIdentityAssumedRole}, }} p, err := newWithBinary("/usr/bin/aws") require.NoError(t, err) - st, err := p.Identity(context.Background(), f.run) + st, err := p.Identity(context.Background(), f.run, "arn:aws:iam::111122223333:role/triagent-readonly") require.NoError(t, err) assert.True(t, st.Valid, "assumed-role ARN whose role matches the pinned expectation is valid") } func TestIdentityRejectsMismatchedExpectedRoleArn(t *testing.T) { - t.Setenv(EnvExpectedRoleARN, "arn:aws:iam::111122223333:role/some-other-role") f := &fakeRun{results: map[string]cloud.CLIResult{ "sts get-caller-identity": {Stdout: callerIdentityAssumedRole}, }} p, err := newWithBinary("/usr/bin/aws") require.NoError(t, err) - st, err := p.Identity(context.Background(), f.run) + st, err := p.Identity(context.Background(), f.run, "arn:aws:iam::111122223333:role/some-other-role") require.NoError(t, err) assert.False(t, st.Valid, "assumed role not matching the pinned expectation is invalid") assert.NotEmpty(t, st.Hint) @@ -99,7 +97,7 @@ func TestIdentityInvalidOnNonZeroExit(t *testing.T) { p, err := newWithBinary("/usr/bin/aws") require.NoError(t, err) - st, err := p.Identity(context.Background(), f.run) + st, err := p.Identity(context.Background(), f.run, "") require.NoError(t, err) assert.False(t, st.Valid) assert.NotEmpty(t, st.Hint) diff --git a/pkg/mcp/cloud/providers/gcp/identity.go b/pkg/mcp/cloud/providers/gcp/identity.go index 8be0460..b036108 100644 --- a/pkg/mcp/cloud/providers/gcp/identity.go +++ b/pkg/mcp/cloud/providers/gcp/identity.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "os" "github.com/sourcehawk/triagent/pkg/mcp/cloud" ) @@ -18,12 +17,9 @@ type authAccount struct { // Identity is the read-only whoami. It is called by cloud.Probe with an // unvalidated RunFunc, so it may use the deny-floored `auth` subcommand // directly: it reads the active account and reports the session valid only when -// that account equals the pinned impersonation target the launcher set in -// CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT. A degraded auth state surfaces -// through Valid and Hint, never a Go error. -func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc) (cloud.IdentityStatus, error) { - target := os.Getenv(EnvImpersonate) - +// that account equals expected, the impersonation target the launcher pinned. A +// degraded auth state surfaces through Valid and Hint, never a Go error. +func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc, expected string) (cloud.IdentityStatus, error) { res, err := run(ctx, []string{"auth", "list", "--filter=status:ACTIVE", "--format=json"}) if err != nil { return cloud.IdentityStatus{Provider: "gcp", Valid: false, Hint: err.Error()}, nil @@ -42,15 +38,15 @@ func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc) (cloud.Ident st := cloud.IdentityStatus{Provider: "gcp", AssumedIdentity: active} switch { - case target == "": + case expected == "": st.Valid = false st.Hint = "no impersonation target pinned; set " + EnvImpersonate + " on the cloud MCP subprocess" case active == "": st.Valid = false st.Hint = "no active gcloud account; run: gcloud auth login" - case active != target: + case active != expected: st.Valid = false - st.Hint = fmt.Sprintf("active account %q is not the pinned identity %q", active, target) + st.Hint = fmt.Sprintf("active account %q is not the pinned identity %q", active, expected) default: st.Valid = true } diff --git a/pkg/mcp/cloud/providers/gcp/identity_test.go b/pkg/mcp/cloud/providers/gcp/identity_test.go index ef553ac..04ba498 100644 --- a/pkg/mcp/cloud/providers/gcp/identity_test.go +++ b/pkg/mcp/cloud/providers/gcp/identity_test.go @@ -30,11 +30,10 @@ func runReturning(out string) cloud.RunFunc { } func TestIdentityResolvesActiveAccountAsTarget(t *testing.T) { - t.Setenv(EnvImpersonate, "ro-sa@proj.iam.gserviceaccount.com") p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) - st, err := p.Identity(context.Background(), runReturning(authListJSON)) + st, err := p.Identity(context.Background(), runReturning(authListJSON), "ro-sa@proj.iam.gserviceaccount.com") require.NoError(t, err) assert.Equal(t, "gcp", st.Provider) assert.Equal(t, "ro-sa@proj.iam.gserviceaccount.com", st.AssumedIdentity) @@ -42,12 +41,11 @@ func TestIdentityResolvesActiveAccountAsTarget(t *testing.T) { } func TestIdentityInvalidWhenActiveAccountIsNotTheTarget(t *testing.T) { - t.Setenv(EnvImpersonate, "ro-sa@proj.iam.gserviceaccount.com") p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) mismatch := `[{"account": "operator@example.com", "status": "ACTIVE"}]` - st, err := p.Identity(context.Background(), runReturning(mismatch)) + st, err := p.Identity(context.Background(), runReturning(mismatch), "ro-sa@proj.iam.gserviceaccount.com") require.NoError(t, err) assert.Equal(t, "operator@example.com", st.AssumedIdentity) assert.False(t, st.Valid, "active account differs from the impersonation target") @@ -55,11 +53,10 @@ func TestIdentityInvalidWhenActiveAccountIsNotTheTarget(t *testing.T) { } func TestIdentityInvalidWhenNoActiveAccount(t *testing.T) { - t.Setenv(EnvImpersonate, "ro-sa@proj.iam.gserviceaccount.com") p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) - st, err := p.Identity(context.Background(), runReturning(`[]`)) + st, err := p.Identity(context.Background(), runReturning(`[]`), "ro-sa@proj.iam.gserviceaccount.com") require.NoError(t, err) assert.Empty(t, st.AssumedIdentity) assert.False(t, st.Valid) @@ -67,32 +64,29 @@ func TestIdentityInvalidWhenNoActiveAccount(t *testing.T) { } func TestIdentityInvalidWhenNoImpersonationTargetPinned(t *testing.T) { - t.Setenv(EnvImpersonate, "") p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) - st, err := p.Identity(context.Background(), runReturning(authListJSON)) + st, err := p.Identity(context.Background(), runReturning(authListJSON), "") require.NoError(t, err) assert.False(t, st.Valid, "no pinned target means the session is not validly pinned") assert.NotEmpty(t, st.Hint) } func TestIdentitySurfacesRunErrorAsHint(t *testing.T) { - t.Setenv(EnvImpersonate, "ro-sa@proj.iam.gserviceaccount.com") p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) failing := cloud.RunFunc(func(context.Context, []string) (cloud.CLIResult, error) { return cloud.CLIResult{}, errors.New("gcloud not authenticated") }) - st, err := p.Identity(context.Background(), failing) + st, err := p.Identity(context.Background(), failing, "ro-sa@proj.iam.gserviceaccount.com") require.NoError(t, err, "a degraded auth state surfaces through Valid/Hint, not a Go error") assert.False(t, st.Valid) assert.Contains(t, st.Hint, "gcloud not authenticated") } func TestIdentityCallsAuthListWithJSONFormat(t *testing.T) { - t.Setenv(EnvImpersonate, "ro-sa@proj.iam.gserviceaccount.com") p, err := newWithBinary("/usr/bin/gcloud") require.NoError(t, err) @@ -101,7 +95,7 @@ func TestIdentityCallsAuthListWithJSONFormat(t *testing.T) { gotArgv = argv return cloud.CLIResult{Stdout: authListJSON}, nil }) - _, err = p.Identity(context.Background(), capturing) + _, err = p.Identity(context.Background(), capturing, "ro-sa@proj.iam.gserviceaccount.com") require.NoError(t, err) assert.Equal(t, []string{"auth", "list", "--filter=status:ACTIVE", "--format=json"}, gotArgv) } diff --git a/pkg/mcp/cloud/server.go b/pkg/mcp/cloud/server.go index 3e2d8a6..d03ca44 100644 --- a/pkg/mcp/cloud/server.go +++ b/pkg/mcp/cloud/server.go @@ -29,14 +29,19 @@ type Options struct { // Argv referencing a target outside the scope is rejected before exec. The // launcher fills it from TRIAGENT_CLOUD_SCOPE. Scope ScopeAllowlist + // ExpectedIdentity is the identity the launcher pinned for this session, + // threaded into the identity probe so it validates the resolved identity + // against it. The launcher fills it from TRIAGENT_CLOUD_EXPECTED_IDENTITY. + ExpectedIdentity string } // Server holds the configured cloud-context MCP server. type Server struct { - impl *mcp.Server - provider Provider - allowlist *CommandAllowlist - scope ScopeAllowlist + impl *mcp.Server + provider Provider + allowlist *CommandAllowlist + scope ScopeAllowlist + expectedIdentity string } // New constructs a cloud-context MCP server. Provider is required. The command @@ -56,10 +61,11 @@ func New(opts Options) (*Server, error) { Version: "0.1.0", }, nil) s := &Server{ - impl: impl, - provider: opts.Provider, - allowlist: allow, - scope: opts.Scope, + impl: impl, + provider: opts.Provider, + allowlist: allow, + scope: opts.Scope, + expectedIdentity: opts.ExpectedIdentity, } s.registerOn(impl) return s, nil diff --git a/pkg/mcp/cloud/tools_status.go b/pkg/mcp/cloud/tools_status.go index 08ead5e..02ecaca 100644 --- a/pkg/mcp/cloud/tools_status.go +++ b/pkg/mcp/cloud/tools_status.go @@ -20,7 +20,7 @@ type SessionStatusOutput = IdentityStatus // errors on an invalid identity — a stale credential surfaces as Valid:false // with a Hint, the same visible-degrade contract the launcher renders. func (s *Server) sessionStatus(ctx context.Context, _ *mcp.CallToolRequest, _ SessionStatusInput) (*mcp.CallToolResult, SessionStatusOutput, error) { - st, err := Probe(ctx, s.provider) + st, err := Probe(ctx, s.provider, s.expectedIdentity, s.subprocessEnv()) if err != nil { return errorResult(err.Error()), SessionStatusOutput{}, nil } From 4c1fc6244d7f9354b57e6c6f4e837fc5e1a1f751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?= <54936225+sourcehawk@users.noreply.github.com> Date: Sat, 30 May 2026 17:37:48 +0200 Subject: [PATCH 2/3] refactor(cloud): drop os.Setenv pinning in ProbeSource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ProbeSource no longer mutates the launcher's process env (and its serializing mutex) to pin the per-provider expected identity. It now builds the subprocess credential env explicitly — base PATH/HOME plus the provider's declared config-dir passthrough names carried from os.Environ, with the source credential var overlaid — and threads the pinned identity into cloud.Probe. A test pins the no-mutation guarantee. Co-Authored-By: Claude Opus 4.8 (1M context) --- pkg/mcp/cloud/providers/probe.go | 110 ++++++++++++++------------ pkg/mcp/cloud/providers/probe_test.go | 71 +++++++++++++++++ 2 files changed, 131 insertions(+), 50 deletions(-) create mode 100644 pkg/mcp/cloud/providers/probe_test.go diff --git a/pkg/mcp/cloud/providers/probe.go b/pkg/mcp/cloud/providers/probe.go index 4e660b0..98d8827 100644 --- a/pkg/mcp/cloud/providers/probe.go +++ b/pkg/mcp/cloud/providers/probe.go @@ -3,13 +3,19 @@ package providers import ( "context" "os" - "sync" + "strings" "github.com/sourcehawk/triagent/pkg/mcp/cloud" "github.com/sourcehawk/triagent/pkg/mcp/cloud/providers/aws" "github.com/sourcehawk/triagent/pkg/mcp/cloud/providers/gcp" ) +// baseEnvPassthrough is the minimal env every provider CLI needs regardless of +// cloud: PATH so the resolved binary can find its own dependencies, HOME so it +// can locate per-user config. It mirrors the launcher-side serve harness; the +// per-source credential vars are overlaid on top. +var baseEnvPassthrough = []string{"PATH", "HOME"} + // Source is a neutral description of one cloud connection to probe: the // provider name, the pinned identity, and (aws only) the assume-role profile. // It carries exactly what ProbeSource needs without coupling this package to @@ -20,68 +26,72 @@ type Source struct { Profile string // aws AWS_PROFILE selector; ignored by gcp } -// probeEnvMu serializes the env pinning ProbeSource does. The whoami probe runs -// in the launcher process, where the provider reads its expected-identity env -// via os.Getenv; ProbeSource pins that env around the probe and restores it, so -// concurrent probes for different sources cannot read each other's pin. -var probeEnvMu sync.Mutex - // ProbeSource constructs the source's provider and runs the read-only identity -// probe, pinning the per-provider expected-identity env for the duration so the -// probe validates against this source's pinned identity. It degrades, never -// blocks: a provider construction error (e.g. a missing CLI binary) returns an -// invalid status with the error as the hint, exactly like a failed probe. +// probe, threading the source's pinned identity and the subprocess credential +// env explicitly so concurrent probes for different sources never share state. +// It degrades, never blocks: a provider construction error (e.g. a missing CLI +// binary) returns an invalid status with the error as the hint, exactly like a +// failed probe. func ProbeSource(ctx context.Context, src Source) cloud.IdentityStatus { p, err := New(src.Provider) if err != nil { return cloud.IdentityStatus{Provider: src.Provider, Valid: false, Hint: err.Error()} } - probeEnvMu.Lock() - defer probeEnvMu.Unlock() - defer pinIdentityEnv(src)() - - st, _ := cloud.Probe(ctx, p) + st, _ := cloud.Probe(ctx, p, src.AssumedIdentity, sourceEnvFor(p, src)) return st } -// pinIdentityEnv sets the per-provider expected-identity env for src and returns -// a restore func. gcp impersonates the assumed identity directly; aws selects an -// assume-role profile and checks the expected role ARN. The names come from the -// provider packages, never raw literals. -func pinIdentityEnv(src Source) func() { - switch src.Provider { - case "gcp": - return setEnv(map[string]string{gcp.EnvImpersonate: src.AssumedIdentity}) - case "aws": - return setEnv(map[string]string{ - aws.EnvProfile: src.Profile, - aws.EnvExpectedRoleARN: src.AssumedIdentity, - }) - default: - return func() {} - } +// passthroughLister is the slice of the cloud.Provider contract sourceEnvFor +// needs: which env names the provider's CLI carries from the parent process. +type passthroughLister interface { + EnvPassthrough() []string } -// setEnv sets each name to its value and returns a func restoring the prior -// values (including unset for names that were absent). -func setEnv(vals map[string]string) func() { - prior := make(map[string]*string, len(vals)) - for name, val := range vals { - if old, ok := os.LookupEnv(name); ok { - prior[name] = &old - } else { - prior[name] = nil - } - _ = os.Setenv(name, val) +// sourceEnvFor builds the explicit subprocess env for one source: the base +// PATH/HOME plus the provider's declared config-dir passthrough names, carried +// from the launcher process env, with the per-source credential var overlaid. +// The launcher process itself does not hold the pinned credential env (that is +// injected only into the serve subprocess), so ProbeSource supplies it here +// rather than reading it from os.Environ. +func sourceEnvFor(p passthroughLister, src Source) []string { + keep := make(map[string]bool, len(baseEnvPassthrough)+len(p.EnvPassthrough())) + for _, name := range baseEnvPassthrough { + keep[name] = true + } + for _, name := range p.EnvPassthrough() { + keep[name] = true } - return func() { - for name, old := range prior { - if old == nil { - _ = os.Unsetenv(name) - } else { - _ = os.Setenv(name, *old) - } + + overlay := credentialEnv(src) + var env []string + for _, kv := range os.Environ() { + name, _, ok := strings.Cut(kv, "=") + if !ok || !keep[name] { + continue + } + if _, overridden := overlay[name]; overridden { + continue } + env = append(env, kv) + } + for name, val := range overlay { + env = append(env, name+"="+val) + } + return env +} + +// credentialEnv is the per-provider credential the CLI authenticates with for +// the source: gcp impersonates the assumed identity directly; aws selects the +// assume-role profile. The env-name constants come from the provider packages, +// never raw literals. +func credentialEnv(src Source) map[string]string { + switch src.Provider { + case "gcp": + return map[string]string{gcp.EnvImpersonate: src.AssumedIdentity} + case "aws": + return map[string]string{aws.EnvProfile: src.Profile} + default: + return nil } } diff --git a/pkg/mcp/cloud/providers/probe_test.go b/pkg/mcp/cloud/providers/probe_test.go new file mode 100644 index 0000000..08c0498 --- /dev/null +++ b/pkg/mcp/cloud/providers/probe_test.go @@ -0,0 +1,71 @@ +package providers + +import ( + "context" + "os" + "testing" + + "github.com/sourcehawk/triagent/pkg/mcp/cloud/providers/aws" + "github.com/sourcehawk/triagent/pkg/mcp/cloud/providers/gcp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestProbeSourceDoesNotMutateProcessEnv pins the core guarantee of the +// explicit-threading refactor: ProbeSource builds the credential env for the +// subprocess without writing it into the launcher's own process env. A sentinel +// and the per-provider credential names must read identically before and after. +func TestProbeSourceDoesNotMutateProcessEnv(t *testing.T) { + t.Setenv("TRIAGENT_PROBE_SENTINEL", "untouched") + t.Setenv(aws.EnvProfile, "operator-base") + if err := os.Unsetenv(gcp.EnvImpersonate); err != nil { + require.NoError(t, err) + } + + for _, src := range []Source{ + {Provider: "gcp", AssumedIdentity: "ro-sa@proj.iam.gserviceaccount.com"}, + {Provider: "aws", AssumedIdentity: "arn:aws:iam::111122223333:role/triage-ro", Profile: "triage-ro"}, + } { + _ = ProbeSource(context.Background(), src) + } + + assert.Equal(t, "untouched", os.Getenv("TRIAGENT_PROBE_SENTINEL"), + "ProbeSource must not write to the process env") + assert.Equal(t, "operator-base", os.Getenv(aws.EnvProfile), + "ProbeSource must not pin AWS_PROFILE in the process env") + _, set := os.LookupEnv(gcp.EnvImpersonate) + assert.False(t, set, "ProbeSource must not pin the gcp impersonation env in the process env") +} + +func TestProbeSourceUnknownProviderDegrades(t *testing.T) { + st := ProbeSource(context.Background(), Source{Provider: "azure"}) + assert.False(t, st.Valid) + assert.Equal(t, "azure", st.Provider) + assert.NotEmpty(t, st.Hint) +} + +// fakePassthroughProvider exposes a fixed EnvPassthrough so sourceEnv's +// carry-and-overlay behaviour can be asserted without a real cloud CLI. +type fakePassthroughProvider struct{ passthrough []string } + +func (p *fakePassthroughProvider) EnvPassthrough() []string { return p.passthrough } + +func TestSourceEnvOverlaysCredentialOverProcessEnv(t *testing.T) { + t.Setenv("PATH", "/usr/bin") + t.Setenv("CLOUDSDK_CONFIG", "/home/op/.config/gcloud") + t.Setenv("TRIAGENT_PROBE_LEAK", "should-not-cross") + t.Setenv(gcp.EnvImpersonate, "operator-leaked@proj.iam.gserviceaccount.com") + + p := &fakePassthroughProvider{passthrough: []string{gcp.EnvImpersonate, "CLOUDSDK_CONFIG"}} + env := sourceEnvFor(p, Source{Provider: "gcp", AssumedIdentity: "ro-sa@proj.iam.gserviceaccount.com"}) + + assert.Contains(t, env, "PATH=/usr/bin", "base PATH is carried from the process env") + assert.Contains(t, env, "CLOUDSDK_CONFIG=/home/op/.config/gcloud", "declared config dir is carried") + assert.Contains(t, env, gcp.EnvImpersonate+"=ro-sa@proj.iam.gserviceaccount.com", + "the source credential overrides the process-env value") + assert.NotContains(t, env, gcp.EnvImpersonate+"=operator-leaked@proj.iam.gserviceaccount.com", + "the operator's ambient impersonation value must not survive the overlay") + for _, kv := range env { + assert.NotContains(t, kv, "TRIAGENT_PROBE_LEAK", "undeclared process env must not cross the boundary") + } +} From 2017decf46d98414d18b65f979f329c63469495c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86gir=20M=C3=A1ni=20Hauksson?= <54936225+sourcehawk@users.noreply.github.com> Date: Sat, 30 May 2026 17:37:56 +0200 Subject: [PATCH 3/3] refactor(preflight): uniform expected-identity env in mcpconfig and serve cloudSourceEnv now sets the uniform TRIAGENT_CLOUD_EXPECTED_IDENTITY for both providers in addition to the per-provider credential env the CLI authenticates with (gcp impersonation target, aws assume-role profile), replacing the aws-only expected-role-ARN env. runCloud reads the uniform env once and threads it into cloud.Options.ExpectedIdentity. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/triagent-mcp/serve.go | 12 +++++++----- internal/preflight/mcpconfig.go | 23 ++++++++++++----------- internal/preflight/mcpconfig_test.go | 11 ++++++----- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/cmd/triagent-mcp/serve.go b/cmd/triagent-mcp/serve.go index 6308f94..7fab3b8 100644 --- a/cmd/triagent-mcp/serve.go +++ b/cmd/triagent-mcp/serve.go @@ -439,8 +439,9 @@ func runProm(ctx context.Context, f serveFlags) error { // runCloud wires the read-only cloud-context MCP. --provider selects the // concrete backend; New plugs it in behind cloud.Provider. The launcher passes -// the allowlist override path and target scope through the subprocess env -// (cloud.EnvAllowlistPath, cloud.EnvScope), never argv. +// the allowlist override path, target scope, and pinned identity through the +// subprocess env (cloud.EnvAllowlistPath, cloud.EnvScope, +// cloud.EnvExpectedIdentity), never argv. func runCloud(ctx context.Context, f serveFlags) error { if f.cloudProvider == "" { return fmt.Errorf("--provider is required (gcp or aws) (set --provider or $%s)", cloud.EnvProvider) @@ -450,9 +451,10 @@ func runCloud(ctx context.Context, f serveFlags) error { return err } srv, err := cloud.New(cloud.Options{ - Provider: provider, - AllowlistPath: os.Getenv(cloud.EnvAllowlistPath), - Scope: parseCloudScope(os.Getenv(cloud.EnvScope)), + Provider: provider, + AllowlistPath: os.Getenv(cloud.EnvAllowlistPath), + Scope: parseCloudScope(os.Getenv(cloud.EnvScope)), + ExpectedIdentity: os.Getenv(cloud.EnvExpectedIdentity), }) if err != nil { return fmt.Errorf("build cloud mcp server: %w", err) diff --git a/internal/preflight/mcpconfig.go b/internal/preflight/mcpconfig.go index c01dc5a..d2686b2 100644 --- a/internal/preflight/mcpconfig.go +++ b/internal/preflight/mcpconfig.go @@ -181,19 +181,21 @@ func kubeEnv(in mcpConfigInputs) map[string]string { // cloudSourceEnv builds the subprocess env for one triagent-cloud- // server: the provider selector, the optional allowlist-override path, the -// JSON-encoded scope the cloud package decodes, and the per-provider -// pinned-identity env. +// JSON-encoded scope the cloud package decodes, the pinned identity the probe +// validates against, and the per-provider credential env the CLI authenticates +// with. // -// The two clouds pin identity through different env, by mechanism. GCP -// impersonates the assumed identity directly, so a single env -// (CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT) is both the impersonation target -// and the expected identity. AWS selects an assume-role profile (AWS_PROFILE) -// for credentials and checks the role ARN (TRIAGENT_CLOUD_AWS_EXPECTED_ROLE_ARN) -// for strict validity, so it needs both a profile selector and the expected ARN. -// The env-name constants come from the provider packages, never raw literals. +// The pinned identity is uniform: TRIAGENT_CLOUD_EXPECTED_IDENTITY carries it +// for both clouds, and the probe validates the resolved identity against it. The +// credential env differs by mechanism: GCP impersonates the assumed identity +// directly (CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT), AWS selects an +// assume-role profile (AWS_PROFILE) whose role_arn is the deployment's read-only +// role. The env-name constants come from the provider packages, never raw +// literals. func cloudSourceEnv(src profile.CloudSource) (map[string]string, error) { env := map[string]string{ - cloud.EnvProvider: src.Provider, + cloud.EnvProvider: src.Provider, + cloud.EnvExpectedIdentity: src.AssumedIdentity, } if src.CommandAllowlistPath != "" { env[cloud.EnvAllowlistPath] = src.CommandAllowlistPath @@ -209,7 +211,6 @@ func cloudSourceEnv(src profile.CloudSource) (map[string]string, error) { env[gcp.EnvImpersonate] = src.AssumedIdentity case "aws": env[aws.EnvProfile] = src.Profile - env[aws.EnvExpectedRoleARN] = src.AssumedIdentity } return env, nil } diff --git a/internal/preflight/mcpconfig_test.go b/internal/preflight/mcpconfig_test.go index 4b0f833..b734551 100644 --- a/internal/preflight/mcpconfig_test.go +++ b/internal/preflight/mcpconfig_test.go @@ -422,12 +422,12 @@ func TestWriteMCPConfig_GCPCloudSource_RegistersServerWithImpersonationEnv(t *te require.NotNil(t, env) assert.Equal(t, "gcp", env[cloud.EnvProvider]) assert.Equal(t, "/etc/triagent/gcp-allow.json", env[cloud.EnvAllowlistPath]) - // gcp impersonates the assumed identity directly; that one env is both - // the impersonation target and the expected identity. + // The pinned identity is uniform across providers. + assert.Equal(t, "triage-ro@prod.iam.gserviceaccount.com", env[cloud.EnvExpectedIdentity]) + // gcp impersonates the assumed identity directly as its credential env. assert.Equal(t, "triage-ro@prod.iam.gserviceaccount.com", env[gcp.EnvImpersonate]) // AWS-specific env must not leak onto a gcp source. assert.NotContains(t, env, aws.EnvProfile) - assert.NotContains(t, env, aws.EnvExpectedRoleARN) rawScope, _ := env[cloud.EnvScope].(string) require.NotEmpty(t, rawScope, "scope must be JSON-encoded into the env") @@ -460,9 +460,10 @@ func TestWriteMCPConfig_AWSCloudSource_RegistersServerWithProfileAndExpectedRole env, _ := srv["env"].(map[string]any) require.NotNil(t, env) assert.Equal(t, "aws", env[cloud.EnvProvider]) - // aws needs BOTH a profile selector and the expected role ARN. + // The pinned identity is uniform across providers. + assert.Equal(t, "arn:aws:iam::123456789012:role/triage-ro", env[cloud.EnvExpectedIdentity]) + // aws selects an assume-role profile as its credential env. assert.Equal(t, "triage-ro", env[aws.EnvProfile]) - assert.Equal(t, "arn:aws:iam::123456789012:role/triage-ro", env[aws.EnvExpectedRoleARN]) // gcp impersonation env must not leak onto an aws source. assert.NotContains(t, env, gcp.EnvImpersonate) }