Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ The handler filters tools dynamically based on `GetMyPermissions` from Sysdig Se
| `troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota` | `tool_troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota.go` | List Kubernetes pods with memory usage below 25% of the limit. | `promql.exec` | "Show the top 10 underutilized pods by memory quota in cluster 'production'" |
| `troubleshoot_kubernetes_list_top_cpu_consumed_by_workload` | `tool_troubleshoot_kubernetes_list_top_cpu_consumed_by_workload.go` | Identifies the Kubernetes workloads (all containers) consuming the most CPU (in cores). | `promql.exec` | "Show the top 10 workloads consuming the most CPU in cluster 'production'" |
| `troubleshoot_kubernetes_list_top_cpu_consumed_by_container` | `tool_troubleshoot_kubernetes_list_top_cpu_consumed_by_container.go` | Identifies the Kubernetes containers consuming the most CPU (in cores). | `promql.exec` | "Show the top 10 containers consuming the most CPU in cluster 'production'" |
| `troubleshoot_kubernetes_list_top_memory_consumed_by_workload` | `tool_troubleshoot_kubernetes_list_top_memory_consumed_by_workload.go` | Lists memory-intensive workloads (all containers). | `promql.exec` | "Show the top 10 workloads consuming the most memory in cluster 'production'" |

Every tool has a companion `_test.go` file that exercises request validation, permission metadata, and Sysdig client calls through mocks.
Note that if you add more tools you need to also update this file to reflect that.
Expand Down
23 changes: 14 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,17 +166,22 @@ The server dynamically filters the available tools based on the permissions asso
- **`troubleshoot_kubernetes_list_underutilized_pods_by_memory_quota`**
- **Description**: List Kubernetes pods with memory usage below 25% of the limit.
- **Required Permission**: `promql.exec`
- **Sample Prompt**: "Show the top 10 underutilized pods by memory quota in cluster 'production'"
- **Sample Prompt**: "Show the top 10 underutilized pods by memory quota in cluster 'production'"

- **`troubleshoot_kubernetes_list_top_cpu_consumed_by_workload`**
- **Description**: Identifies the Kubernetes workloads (all containers) consuming the most CPU (in cores).
- **Required Permission**: `promql.exec`
- **Sample Prompt**: "Show the top 10 workloads consuming the most CPU in cluster 'production'"
- **`troubleshoot_kubernetes_list_top_cpu_consumed_by_workload`**
- **Description**: Identifies the Kubernetes workloads (all containers) consuming the most CPU (in cores).
- **Required Permission**: `promql.exec`
- **Sample Prompt**: "Show the top 10 workloads consuming the most CPU in cluster 'production'"

- **`troubleshoot_kubernetes_list_top_cpu_consumed_by_container`**
- **Description**: Identifies the Kubernetes containers consuming the most CPU (in cores).
- **Required Permission**: `promql.exec`
- **Sample Prompt**: "Show the top 10 containers consuming the most CPU in cluster 'production'"

- **`troubleshoot_kubernetes_list_top_cpu_consumed_by_container`**
- **Description**: Identifies the Kubernetes containers consuming the most CPU (in cores).
- **Required Permission**: `promql.exec`
- **Sample Prompt**: "Show the top 10 containers consuming the most CPU in cluster 'production'"
- **`troubleshoot_kubernetes_list_top_memory_consumed_by_workload`**
- **Description**: Lists memory-intensive workloads (all containers).
- **Required Permission**: `promql.exec`
- **Sample Prompt**: "Show the top 10 workloads consuming the most memory in cluster 'production'"

## Requirements
- [Go](https://go.dev/doc/install) 1.25 or higher (if running without Docker).
Expand Down
1 change: 1 addition & 0 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ func setupHandler(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *mcp
tools.NewTroubleshootKubernetesListTopCPUConsumedByWorkload(sysdigClient),
tools.NewTroubleshootKubernetesListTopCPUConsumedByContainer(sysdigClient),
tools.NewTroubleshootKubernetesListUnderutilizedPodsByMemoryQuota(sysdigClient),
tools.NewTroubleshootKubernetesListTopMemoryConsumedByWorkload(sysdigClient),
)
return handler
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (t *TroubleshootKubernetesListTopCPUConsumedByContainer) RegisterInServer(s
mcp.DefaultNumber(20),
),
mcp.WithOutputSchema[map[string]any](),
WithRequiredPermissions("promql.exec"),
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
)
s.AddTool(tool, t.handle)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (t *TroubleshootKubernetesListTopCPUConsumedByWorkload) RegisterInServer(s
mcp.DefaultNumber(20),
),
mcp.WithOutputSchema[map[string]any](),
WithRequiredPermissions("promql.exec"),
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
)
s.AddTool(tool, t.handle)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package tools

import (
"context"
"encoding/json"
"fmt"
"io"
"strings"

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
)

type TroubleshootKubernetesListTopMemoryConsumedByWorkload struct {
SysdigClient sysdig.ExtendedClientWithResponsesInterface
}

func NewTroubleshootKubernetesListTopMemoryConsumedByWorkload(sysdigClient sysdig.ExtendedClientWithResponsesInterface) *TroubleshootKubernetesListTopMemoryConsumedByWorkload {
return &TroubleshootKubernetesListTopMemoryConsumedByWorkload{
SysdigClient: sysdigClient,
}
}

func (t *TroubleshootKubernetesListTopMemoryConsumedByWorkload) RegisterInServer(s *server.MCPServer) {
tool := mcp.NewTool("troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
mcp.WithDescription("Lists memory-intensive workloads (all containers)."),
mcp.WithString("cluster_name", mcp.Description("The name of the cluster to filter by.")),
mcp.WithString("namespace_name", mcp.Description("The name of the namespace to filter by.")),
mcp.WithString("workload_type", mcp.Description("The type of the workload to filter by.")),
mcp.WithString("workload_name", mcp.Description("The name of the workload to filter by.")),
mcp.WithNumber("limit",
mcp.Description("Maximum number of workloads to return."),
mcp.DefaultNumber(20),
),
mcp.WithOutputSchema[map[string]any](),
WithRequiredPermissions(), // FIXME(fede): Add the required permissions. It should be `promql.exec` but somehow the token does not have that permission even if you are able to execute queries.
)
s.AddTool(tool, t.handle)
}

func (t *TroubleshootKubernetesListTopMemoryConsumedByWorkload) handle(ctx context.Context, request mcp.CallToolRequest) (*mcp.CallToolResult, error) {
clusterName := mcp.ParseString(request, "cluster_name", "")
namespaceName := mcp.ParseString(request, "namespace_name", "")
workloadType := mcp.ParseString(request, "workload_type", "")
workloadName := mcp.ParseString(request, "workload_name", "")
limit := mcp.ParseInt(request, "limit", 20)

query := buildTopMemoryConsumedByWorkloadQuery(clusterName, namespaceName, workloadType, workloadName, limit)

limitQuery := sysdig.LimitQuery(limit)
params := &sysdig.GetQueryV1Params{
Query: query,
Limit: &limitQuery,
}

httpResp, err := t.SysdigClient.GetQueryV1(ctx, params)
if err != nil {
return mcp.NewToolResultErrorFromErr("failed to get workload list", err), nil
}

if httpResp.StatusCode != 200 {
bodyBytes, _ := io.ReadAll(httpResp.Body)
return mcp.NewToolResultErrorf("failed to get workload list: status code %d, body: %s", httpResp.StatusCode, string(bodyBytes)), nil
}

var queryResponse sysdig.QueryResponseV1
if err := json.NewDecoder(httpResp.Body).Decode(&queryResponse); err != nil {
return mcp.NewToolResultErrorFromErr("failed to decode response", err), nil
}

return mcp.NewToolResultJSON(queryResponse)
}

func buildTopMemoryConsumedByWorkloadQuery(clusterName, namespaceName, workloadType, workloadName string, limit int) string {
filters := []string{}
if clusterName != "" {
filters = append(filters, fmt.Sprintf("kube_cluster_name=\"%s\"", clusterName))
}
if namespaceName != "" {
filters = append(filters, fmt.Sprintf("kube_namespace_name=\"%s\"", namespaceName))
}
if workloadType != "" {
filters = append(filters, fmt.Sprintf("kube_workload_type=\"%s\"", workloadType))
}
if workloadName != "" {
filters = append(filters, fmt.Sprintf("kube_workload_name=\"%s\"", workloadName))
}

filterString := ""
if len(filters) > 0 {
filterString = fmt.Sprintf("{%s}", strings.Join(filters, ","))
}

innerQuery := fmt.Sprintf("sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes%s)", filterString)
return fmt.Sprintf("topk(%d, %s)", limit, innerQuery)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package tools_test

import (
"bytes"
"context"
"io"
"net/http"

"github.com/mark3labs/mcp-go/mcp"
"github.com/mark3labs/mcp-go/server"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/mcp/tools"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig"
"github.com/sysdiglabs/sysdig-mcp-server/internal/infra/sysdig/mocks"
"go.uber.org/mock/gomock"
)

var _ = Describe("TroubleshootKubernetesListTopMemoryConsumedByWorkload Tool", func() {
var (
tool *tools.TroubleshootKubernetesListTopMemoryConsumedByWorkload
mockSysdig *mocks.MockExtendedClientWithResponsesInterface
mcpServer *server.MCPServer
ctrl *gomock.Controller
)

BeforeEach(func() {
ctrl = gomock.NewController(GinkgoT())
mockSysdig = mocks.NewMockExtendedClientWithResponsesInterface(ctrl)
tool = tools.NewTroubleshootKubernetesListTopMemoryConsumedByWorkload(mockSysdig)
mcpServer = server.NewMCPServer("test", "test")
tool.RegisterInServer(mcpServer)
})

It("should register successfully in the server", func() {
Expect(mcpServer.GetTool("troubleshoot_kubernetes_list_top_memory_consumed_by_workload")).NotTo(BeNil())
})

When("listing top memory consumed by workload", func() {
DescribeTable("it succeeds", func(ctx context.Context, toolName string, request mcp.CallToolRequest, expectedParamsRequested sysdig.GetQueryV1Params) {
mockSysdig.EXPECT().GetQueryV1(gomock.Any(), &expectedParamsRequested).Return(&http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewBufferString(`{"status":"success"}`)),
}, nil)

serverTool := mcpServer.GetTool(toolName)
result, err := serverTool.Handler(ctx, request)
Expect(err).NotTo(HaveOccurred())

resultData, ok := result.Content[0].(mcp.TextContent)
Expect(ok).To(BeTrue())
Expect(resultData.Text).To(MatchJSON(`{"status":"success"}`))
},
Entry(nil,
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
Arguments: map[string]any{},
},
},
sysdig.GetQueryV1Params{
Query: `topk(20, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes))`,
Limit: asPtr(sysdig.LimitQuery(20)),
},
),
Entry(nil,
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
Arguments: map[string]any{"limit": "10"},
},
},
sysdig.GetQueryV1Params{
Query: `topk(10, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes))`,
Limit: asPtr(sysdig.LimitQuery(10)),
},
),
Entry(nil,
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
Arguments: map[string]any{"cluster_name": "my_cluster"},
},
},
sysdig.GetQueryV1Params{
Query: `topk(20, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes{kube_cluster_name="my_cluster"}))`,
Limit: asPtr(sysdig.LimitQuery(20)),
},
),
Entry(nil,
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
Arguments: map[string]any{"cluster_name": "my_cluster", "namespace_name": "my_namespace"},
},
},
sysdig.GetQueryV1Params{
Query: `topk(20, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes{kube_cluster_name="my_cluster",kube_namespace_name="my_namespace"}))`,
Limit: asPtr(sysdig.LimitQuery(20)),
},
),
Entry(nil,
"troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
mcp.CallToolRequest{
Params: mcp.CallToolParams{
Name: "troubleshoot_kubernetes_list_top_memory_consumed_by_workload",
Arguments: map[string]any{
"cluster_name": "my_cluster",
"namespace_name": "my_namespace",
"workload_type": "deployment",
"workload_name": "my_workload",
"limit": "5",
},
},
},
sysdig.GetQueryV1Params{
Query: `topk(5, sum by (kube_cluster_name, kube_namespace_name, kube_workload_type, kube_workload_name) (sysdig_container_memory_used_bytes{kube_cluster_name="my_cluster",kube_namespace_name="my_namespace",kube_workload_type="deployment",kube_workload_name="my_workload"}))`,
Limit: asPtr(sysdig.LimitQuery(5)),
},
),
)
})
})