diff --git a/cmd/triagent-mcp/serve.go b/cmd/triagent-mcp/serve.go index 1b81ecb..bed832f 100644 --- a/cmd/triagent-mcp/serve.go +++ b/cmd/triagent-mcp/serve.go @@ -12,6 +12,7 @@ import ( "github.com/charmbracelet/log" "github.com/sourcehawk/triagent/pkg/mcp/agentoperator" "github.com/sourcehawk/triagent/pkg/mcp/cloud" + "github.com/sourcehawk/triagent/pkg/mcp/cloud/providers/gcp" "github.com/sourcehawk/triagent/pkg/mcp/git" "github.com/sourcehawk/triagent/pkg/mcp/incidentio" "github.com/sourcehawk/triagent/pkg/mcp/k8s" @@ -460,13 +461,14 @@ func runCloud(ctx context.Context, f serveFlags) error { return srv.Run(ctx) } -// newCloudProvider constructs the cloud.Provider for the named provider. The -// gcp and aws implementations land in pkg/mcp/cloud/providers/ in their -// own PRs; until then a known provider reports that it is not yet built and an -// unknown one is named in the error. +// newCloudProvider constructs the cloud.Provider for the named provider. Each +// implementation lives in pkg/mcp/cloud/providers/; an unknown provider is +// named in the error. func newCloudProvider(name string) (cloud.Provider, error) { switch name { - case "gcp", "aws": + case "gcp": + return gcp.New() + case "aws": return nil, fmt.Errorf("cloud provider %q is not built yet", name) default: return nil, fmt.Errorf("unknown cloud --provider %q (want gcp or aws)", name) diff --git a/pkg/mcp/cloud/providers/gcp/default_commands.json b/pkg/mcp/cloud/providers/gcp/default_commands.json new file mode 100644 index 0000000..89066a2 --- /dev/null +++ b/pkg/mcp/cloud/providers/gcp/default_commands.json @@ -0,0 +1,36 @@ +{ + "commands": [ + { "path": "projects list", "description": "inventory: list the projects the pinned identity can see" }, + { "path": "projects describe", "description": "inventory: project metadata, lifecycle state, and labels" }, + { "path": "projects get-iam-policy", "description": "permissions: the IAM policy bound on a project" }, + + { "path": "compute instances list", "description": "inventory: compute instances in a project" }, + { "path": "compute instances describe", "description": "reachability: an instance's network interfaces, tags, and service account" }, + { "path": "compute networks list", "description": "reachability: VPC networks in a project" }, + { "path": "compute networks describe", "description": "reachability: a VPC network's subnet and peering layout" }, + { "path": "compute networks subnets list", "description": "reachability: subnets and their CIDR ranges" }, + { "path": "compute networks subnets describe", "description": "reachability: a subnet's range, region, and secondary ranges" }, + { "path": "compute firewall-rules list", "description": "reachability: firewall rules governing traffic to a workload" }, + { "path": "compute firewall-rules describe", "description": "reachability: a firewall rule's direction, ports, and target tags" }, + { "path": "compute routes list", "description": "reachability: routes that steer egress out of a network" }, + { "path": "compute routes describe", "description": "reachability: a single route's next-hop and priority" }, + { "path": "compute addresses list", "description": "reachability: reserved internal and external IP addresses" }, + { "path": "compute forwarding-rules list", "description": "reachability: load-balancer forwarding rules and their backends" }, + + { "path": "container clusters list", "description": "cluster: GKE clusters and their endpoints in a project" }, + { "path": "container clusters describe", "description": "cluster: a GKE cluster's networking, workload-identity, and node config" }, + { "path": "container node-pools list", "description": "cluster: node pools backing a GKE cluster" }, + { "path": "container node-pools describe", "description": "cluster: a node pool's machine type, autoscaling, and image config" }, + + { "path": "iam service-accounts list", "description": "permissions: service accounts defined in a project" }, + { "path": "iam service-accounts describe", "description": "permissions: a service account's display name and disabled state" }, + { "path": "iam service-accounts get-iam-policy", "description": "permissions: who can impersonate or manage a service account" }, + { "path": "iam roles describe", "description": "permissions: the permissions a role grants" }, + + { "path": "logging read", "description": "logs: read entries from a project's log buckets with a filter" }, + { "path": "logging logs list", "description": "audit: enumerate available log streams, including data_access and activity audit logs" }, + { "path": "logging sinks list", "description": "audit: where log entries are routed for retention" }, + + { "path": "monitoring dashboards list", "description": "cluster: monitoring dashboards configured for the project" } + ] +} diff --git a/pkg/mcp/cloud/providers/gcp/identity.go b/pkg/mcp/cloud/providers/gcp/identity.go new file mode 100644 index 0000000..b39d298 --- /dev/null +++ b/pkg/mcp/cloud/providers/gcp/identity.go @@ -0,0 +1,70 @@ +package gcp + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/sourcehawk/triagent/pkg/mcp/cloud" +) + +// authAccount is one entry of `gcloud auth list --format=json`. +type authAccount struct { + Account string `json:"account"` + Status string `json:"status"` +} + +// Identity is the read-only whoami. It is called by cloud.Probe with an +// unvalidated RunFunc, so it may use the deny-floored `auth` subcommand +// directly: it reads the active account and reports the session valid only when +// that account equals the pinned impersonation target the launcher set in +// CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT. A degraded auth state surfaces +// through Valid and Hint, never a Go error. +func (p *Provider) Identity(ctx context.Context, run cloud.RunFunc) (cloud.IdentityStatus, error) { + target := os.Getenv(impersonationEnv) + + res, err := run(ctx, []string{"auth", "list", "--filter=status:ACTIVE", "--format=json"}) + if err != nil { + return cloud.IdentityStatus{Provider: "gcp", Valid: false, Hint: err.Error()}, nil + } + + var accounts []authAccount + if err := json.Unmarshal([]byte(res.Stdout), &accounts); err != nil { + return cloud.IdentityStatus{ + Provider: "gcp", + Valid: false, + Hint: fmt.Sprintf("parse gcloud auth list output: %v", err), + }, nil + } + + active := activeAccount(accounts) + st := cloud.IdentityStatus{Provider: "gcp", AssumedIdentity: active} + + switch { + case target == "": + st.Valid = false + st.Hint = "no impersonation target pinned; set " + impersonationEnv + " on the cloud MCP subprocess" + case active == "": + st.Valid = false + st.Hint = "no active gcloud account; run: gcloud auth login" + case active != target: + st.Valid = false + st.Hint = fmt.Sprintf("active account %q is not the pinned identity %q", active, target) + default: + st.Valid = true + } + return st, nil +} + +// activeAccount returns the first account marked ACTIVE, or "" when none is. The +// --filter=status:ACTIVE argv already narrows this server-side; the status check +// is the belt to that braces. +func activeAccount(accounts []authAccount) string { + for _, a := range accounts { + if a.Status == "ACTIVE" { + return a.Account + } + } + return "" +} diff --git a/pkg/mcp/cloud/providers/gcp/identity_test.go b/pkg/mcp/cloud/providers/gcp/identity_test.go new file mode 100644 index 0000000..f737892 --- /dev/null +++ b/pkg/mcp/cloud/providers/gcp/identity_test.go @@ -0,0 +1,107 @@ +package gcp + +import ( + "context" + "errors" + "testing" + + "github.com/sourcehawk/triagent/pkg/mcp/cloud" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// authListJSON is captured `gcloud auth list --format=json` output: an array of +// accounts, exactly one with status ACTIVE. +const authListJSON = `[ + { + "account": "ro-sa@proj.iam.gserviceaccount.com", + "status": "ACTIVE" + }, + { + "account": "operator@example.com", + "status": "" + } +]` + +func runReturning(out string) cloud.RunFunc { + return func(context.Context, []string) (cloud.CLIResult, error) { + return cloud.CLIResult{Stdout: out}, nil + } +} + +func TestIdentityResolvesActiveAccountAsTarget(t *testing.T) { + t.Setenv(impersonationEnv, "ro-sa@proj.iam.gserviceaccount.com") + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + st, err := p.Identity(context.Background(), runReturning(authListJSON)) + require.NoError(t, err) + assert.Equal(t, "gcp", st.Provider) + assert.Equal(t, "ro-sa@proj.iam.gserviceaccount.com", st.AssumedIdentity) + assert.True(t, st.Valid, "active account equals the impersonation target") +} + +func TestIdentityInvalidWhenActiveAccountIsNotTheTarget(t *testing.T) { + t.Setenv(impersonationEnv, "ro-sa@proj.iam.gserviceaccount.com") + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + mismatch := `[{"account": "operator@example.com", "status": "ACTIVE"}]` + st, err := p.Identity(context.Background(), runReturning(mismatch)) + require.NoError(t, err) + assert.Equal(t, "operator@example.com", st.AssumedIdentity) + assert.False(t, st.Valid, "active account differs from the impersonation target") + assert.NotEmpty(t, st.Hint) +} + +func TestIdentityInvalidWhenNoActiveAccount(t *testing.T) { + t.Setenv(impersonationEnv, "ro-sa@proj.iam.gserviceaccount.com") + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + st, err := p.Identity(context.Background(), runReturning(`[]`)) + require.NoError(t, err) + assert.Empty(t, st.AssumedIdentity) + assert.False(t, st.Valid) + assert.NotEmpty(t, st.Hint) +} + +func TestIdentityInvalidWhenNoImpersonationTargetPinned(t *testing.T) { + t.Setenv(impersonationEnv, "") + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + st, err := p.Identity(context.Background(), runReturning(authListJSON)) + require.NoError(t, err) + assert.False(t, st.Valid, "no pinned target means the session is not validly pinned") + assert.NotEmpty(t, st.Hint) +} + +func TestIdentitySurfacesRunErrorAsHint(t *testing.T) { + t.Setenv(impersonationEnv, "ro-sa@proj.iam.gserviceaccount.com") + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + failing := cloud.RunFunc(func(context.Context, []string) (cloud.CLIResult, error) { + return cloud.CLIResult{}, errors.New("gcloud not authenticated") + }) + st, err := p.Identity(context.Background(), failing) + require.NoError(t, err, "a degraded auth state surfaces through Valid/Hint, not a Go error") + assert.False(t, st.Valid) + assert.Contains(t, st.Hint, "gcloud not authenticated") +} + +func TestIdentityCallsAuthListWithJSONFormat(t *testing.T) { + t.Setenv(impersonationEnv, "ro-sa@proj.iam.gserviceaccount.com") + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + var gotArgv []string + capturing := cloud.RunFunc(func(_ context.Context, argv []string) (cloud.CLIResult, error) { + gotArgv = argv + return cloud.CLIResult{Stdout: authListJSON}, nil + }) + _, err = p.Identity(context.Background(), capturing) + require.NoError(t, err) + assert.Equal(t, []string{"auth", "list", "--filter=status:ACTIVE", "--format=json"}, gotArgv) +} diff --git a/pkg/mcp/cloud/providers/gcp/inventory.go b/pkg/mcp/cloud/providers/gcp/inventory.go new file mode 100644 index 0000000..bdf1cba --- /dev/null +++ b/pkg/mcp/cloud/providers/gcp/inventory.go @@ -0,0 +1,39 @@ +package gcp + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/sourcehawk/triagent/pkg/mcp/cloud" +) + +// project is one entry of `gcloud projects list --format=json`. Only the fields +// the inventory projection surfaces are decoded. +type project struct { + ProjectID string `json:"projectId"` + Name string `json:"name"` +} + +// Inventory lists the projects the pinned identity can read, projected to id + +// name. It is called with the server's validated RunFunc, so the argv must match +// the allowlisted `projects list` verb chain exactly. A run error here is a real +// failure of the inventory tool and is returned to the caller, unlike the +// identity probe which degrades. +func (p *Provider) Inventory(ctx context.Context, run cloud.RunFunc) (cloud.Inventory, error) { + res, err := run(ctx, []string{"projects", "list", "--format=json"}) + if err != nil { + return cloud.Inventory{}, fmt.Errorf("gcloud projects list: %w", err) + } + + var projects []project + if err := json.Unmarshal([]byte(res.Stdout), &projects); err != nil { + return cloud.Inventory{}, fmt.Errorf("parse gcloud projects list output: %w", err) + } + + inv := cloud.Inventory{Scopes: make([]cloud.Scope, 0, len(projects))} + for _, pr := range projects { + inv.Scopes = append(inv.Scopes, cloud.Scope{ID: pr.ProjectID, Name: pr.Name}) + } + return inv, nil +} diff --git a/pkg/mcp/cloud/providers/gcp/inventory_test.go b/pkg/mcp/cloud/providers/gcp/inventory_test.go new file mode 100644 index 0000000..66404f4 --- /dev/null +++ b/pkg/mcp/cloud/providers/gcp/inventory_test.go @@ -0,0 +1,77 @@ +package gcp + +import ( + "context" + "errors" + "testing" + + "github.com/sourcehawk/triagent/pkg/mcp/cloud" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// projectsListJSON is captured `gcloud projects list --format=json` output. +const projectsListJSON = `[ + { + "projectId": "triage-prod", + "name": "Triage Production", + "projectNumber": "111111111111", + "lifecycleState": "ACTIVE" + }, + { + "projectId": "triage-staging", + "name": "Triage Staging", + "projectNumber": "222222222222", + "lifecycleState": "ACTIVE" + } +]` + +func TestInventoryProjectsIDAndName(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + inv, err := p.Inventory(context.Background(), runReturning(projectsListJSON)) + require.NoError(t, err) + require.Len(t, inv.Scopes, 2) + assert.Equal(t, cloud.Scope{ID: "triage-prod", Name: "Triage Production"}, inv.Scopes[0]) + assert.Equal(t, cloud.Scope{ID: "triage-staging", Name: "Triage Staging"}, inv.Scopes[1]) +} + +func TestInventoryEmptyWhenNoProjects(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + inv, err := p.Inventory(context.Background(), runReturning(`[]`)) + require.NoError(t, err) + assert.Empty(t, inv.Scopes) +} + +func TestInventoryCallsProjectsListWithJSONFormat(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + var gotArgv []string + capturing := cloud.RunFunc(func(_ context.Context, argv []string) (cloud.CLIResult, error) { + gotArgv = argv + return cloud.CLIResult{Stdout: projectsListJSON}, nil + }) + _, err = p.Inventory(context.Background(), capturing) + require.NoError(t, err) + assert.Equal(t, []string{"projects", "list", "--format=json"}, gotArgv, + "the inventory argv must match the allowlisted `projects list` verb chain exactly") +} + +func TestInventoryErrorsWhenRunErrors(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + + failing := cloud.RunFunc(func(context.Context, []string) (cloud.CLIResult, error) { + return cloud.CLIResult{}, errors.New("projects list rejected") + }) + _, err = p.Inventory(context.Background(), failing) + require.Error(t, err, "a run error is a real failure of the inventory tool, surfaced to the caller") +} diff --git a/pkg/mcp/cloud/providers/gcp/provider.go b/pkg/mcp/cloud/providers/gcp/provider.go new file mode 100644 index 0000000..245d2d3 --- /dev/null +++ b/pkg/mcp/cloud/providers/gcp/provider.go @@ -0,0 +1,89 @@ +// Package gcp implements the cloud.Provider contract over the gcloud CLI. It is +// selected by --provider=gcp and plugged into the cloud-context MCP behind the +// Provider interface (the teleport DI pattern); it never reaches into the parent +// cloud package's harness. All cloud access shells gcloud through the injected +// cloud.RunFunc — there is no cloud.google.com/go SDK dependency. +package gcp + +import ( + _ "embed" + "encoding/json" + "fmt" + "os/exec" + + "github.com/sourcehawk/triagent/pkg/mcp/cloud" +) + +// defaultCommandsJSON is the embedded read-only gcloud command allowlist. Each +// entry's description names the investigative axis it serves. The exact-match +// allowlist requires the complete invariant verb chain per entry. +// +//go:embed default_commands.json +var defaultCommandsJSON []byte + +// impersonationEnv is the env var the launcher sets to pin the read-only +// service account gcloud impersonates. The provider reads it (never sets it) to +// learn which identity Identity must resolve to; it is on the agent deny floor +// as a flag, so the agent can never select it. +const impersonationEnv = "CLOUDSDK_AUTH_IMPERSONATE_SERVICE_ACCOUNT" + +var _ cloud.Provider = (*Provider)(nil) + +// Provider implements cloud.Provider over the gcloud CLI. +type Provider struct { + binary string + allowlist *cloud.CommandAllowlist +} + +// New constructs the gcp provider, resolving gcloud to an absolute path once via +// exec.LookPath so a poisoned PATH cannot redirect the binary at run time. +func New() (*Provider, error) { + bin, err := exec.LookPath("gcloud") + if err != nil { + return nil, fmt.Errorf("gcp: resolve gcloud binary: %w", err) + } + return newWithBinary(bin) +} + +// newWithBinary builds the provider against an already-resolved binary path. It +// is the seam tests inject a fixed path through, bypassing exec.LookPath. +func newWithBinary(binary string) (*Provider, error) { + var list cloud.CommandAllowlist + if err := json.Unmarshal(defaultCommandsJSON, &list); err != nil { + return nil, fmt.Errorf("gcp: parse embedded default_commands.json: %w", err) + } + return &Provider{binary: binary, allowlist: &list}, nil +} + +// Name reports the provider identifier. +func (p *Provider) Name() string { return "gcp" } + +// Binary is the resolved absolute path to gcloud. +func (p *Provider) Binary() string { return p.binary } + +// DefaultAllowlist is the embedded read-only command allowlist. +func (p *Provider) DefaultAllowlist() *cloud.CommandAllowlist { return p.allowlist } + +// DenyFloorAdditions contributes gcp-specific subcommands that read credentials, +// shell into instances, or mutate by side effect, on top of the base floor. +func (p *Provider) DenyFloorAdditions() cloud.DenyFloor { + return cloud.DenyFloor{ + Subcommands: []string{ + "compute ssh", + "compute scp", + "compute reset-windows-password", + "functions call", + }, + } +} + +// EnvPassthrough names the gcloud env vars the subprocess needs: the pinned +// impersonation target plus the config and active-project locations. PATH and +// HOME are forwarded by the harness base set, so they are absent here. +func (p *Provider) EnvPassthrough() []string { + return []string{ + impersonationEnv, + "CLOUDSDK_CONFIG", + "CLOUDSDK_CORE_PROJECT", + } +} diff --git a/pkg/mcp/cloud/providers/gcp/provider_test.go b/pkg/mcp/cloud/providers/gcp/provider_test.go new file mode 100644 index 0000000..18c1702 --- /dev/null +++ b/pkg/mcp/cloud/providers/gcp/provider_test.go @@ -0,0 +1,68 @@ +package gcp + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewResolvesBinaryAndName(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + assert.Equal(t, "gcp", p.Name()) + assert.Equal(t, "/usr/bin/gcloud", p.Binary()) +} + +func TestDefaultAllowlistLoadsEmbeddedJSON(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + allow := p.DefaultAllowlist() + require.NotNil(t, allow) + assert.NotEmpty(t, allow.Commands, "embedded default_commands.json should ship read-only commands") +} + +func TestDefaultAllowlistIncludesProjectsList(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + assert.True(t, p.DefaultAllowlist().Allows([]string{"projects", "list", "--format=json"}), + "Inventory needs `projects list` on the allowlist") +} + +func TestDefaultAllowlistCoversInvestigativeAxes(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + allow := p.DefaultAllowlist() + // One representative read-only command per investigative axis. Exact-match + // allowlist, so each is the complete invariant verb chain. + axes := [][]string{ + {"projects", "list"}, // inventory + {"compute", "firewall-rules", "list"}, // reachability + {"projects", "get-iam-policy"}, // permissions / IAM read + {"container", "clusters", "describe"}, // cluster / GKE describe + {"logging", "read"}, // logs read + {"logging", "logs", "list"}, // audit read + } + for _, argv := range axes { + assert.Truef(t, allow.Allows(argv), "expected %v on the allowlist", argv) + } +} + +func TestDenyFloorAdditionsCoverDangerousGCPSubcommands(t *testing.T) { + t.Parallel() + p, err := newWithBinary("/usr/bin/gcloud") + require.NoError(t, err) + floor := p.DenyFloorAdditions() + for _, want := range []string{ + "compute ssh", + "compute scp", + "functions call", + "compute reset-windows-password", + } { + assert.Containsf(t, floor.Subcommands, want, "expected %q on the gcp deny-floor additions", want) + } +}