From d37a3a7c065604e1d011904ab6353292e7c21934 Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Thu, 27 Nov 2025 12:32:58 -0600 Subject: [PATCH] Add RetryDelay to ErrorHandling in WorkflowStep CRD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VirtualMCPServer CRD's ErrorHandling struct was missing the RetryDelay field that is supported by the implementation. This adds the field to allow users to configure the delay between retry attempts for workflow steps. Changes: - Add RetryDelay field to ErrorHandling struct with duration pattern validation - Update converter to parse and pass through the retry delay - Add comprehensive tests for error handling conversion Fixes #2773 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../api/v1alpha1/virtualmcpserver_types.go | 6 + cmd/thv-operator/pkg/vmcpconfig/converter.go | 8 +- .../pkg/vmcpconfig/converter_test.go | 220 ++++++++++++++++++ deploy/charts/operator-crds/Chart.yaml | 2 +- deploy/charts/operator-crds/README.md | 2 +- ...ev_virtualmcpcompositetooldefinitions.yaml | 6 + ...olhive.stacklok.dev_virtualmcpservers.yaml | 6 + docs/operator/crd-api.md | 1 + 8 files changed, 248 insertions(+), 3 deletions(-) diff --git a/cmd/thv-operator/api/v1alpha1/virtualmcpserver_types.go b/cmd/thv-operator/api/v1alpha1/virtualmcpserver_types.go index eb5649547..3355bd848 100644 --- a/cmd/thv-operator/api/v1alpha1/virtualmcpserver_types.go +++ b/cmd/thv-operator/api/v1alpha1/virtualmcpserver_types.go @@ -275,6 +275,12 @@ type ErrorHandling struct { // Only used when Action is "retry" // +optional MaxRetries int `json:"maxRetries,omitempty"` + + // RetryDelay is the delay between retry attempts + // Only used when Action is "retry" + // +kubebuilder:validation:Pattern=`^([0-9]+(\.[0-9]+)?(ms|s|m))+$` + // +optional + RetryDelay string `json:"retryDelay,omitempty"` } // TokenCacheConfig configures token caching behavior diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter.go b/cmd/thv-operator/pkg/vmcpconfig/converter.go index e7a9a90db..e4f3ef52b 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter.go @@ -304,10 +304,16 @@ func (*Converter) convertCompositeTools( // Convert error handling if crdStep.OnError != nil { - step.OnError = &vmcpconfig.StepErrorHandling{ + stepError := &vmcpconfig.StepErrorHandling{ Action: crdStep.OnError.Action, RetryCount: crdStep.OnError.MaxRetries, } + if crdStep.OnError.RetryDelay != "" { + if duration, err := time.ParseDuration(crdStep.OnError.RetryDelay); err == nil { + stepError.RetryDelay = vmcpconfig.Duration(duration) + } + } + step.OnError = stepError } tool.Steps = append(tool.Steps, step) diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go index 6f22ecfaa..7d6aaa865 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go @@ -1,8 +1,10 @@ +// Package vmcpconfig provides conversion logic from VirtualMCPServer CRD to vmcp Config package vmcpconfig import ( "context" "testing" + "time" "github.com/go-logr/logr" "github.com/stretchr/testify/assert" @@ -12,6 +14,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" + vmcpconfig "github.com/stacklok/toolhive/pkg/vmcp/config" ) func TestConvertCompositeTools_Parameters(t *testing.T) { @@ -236,3 +239,220 @@ func TestConvertCompositeTools_Timeout(t *testing.T) { }) } } + +func TestConverter_ConvertCompositeTools_ErrorHandling(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + errorHandling *mcpv1alpha1.ErrorHandling + expectedAction string + expectedRetry int + expectedDelay vmcpconfig.Duration + }{ + { + name: "with retry delay", + errorHandling: &mcpv1alpha1.ErrorHandling{ + Action: "retry", + MaxRetries: 3, + RetryDelay: "5s", + }, + expectedAction: "retry", + expectedRetry: 3, + expectedDelay: vmcpconfig.Duration(5 * time.Second), + }, + { + name: "with millisecond retry delay", + errorHandling: &mcpv1alpha1.ErrorHandling{ + Action: "retry", + MaxRetries: 5, + RetryDelay: "500ms", + }, + expectedAction: "retry", + expectedRetry: 5, + expectedDelay: vmcpconfig.Duration(500 * time.Millisecond), + }, + { + name: "with minute retry delay", + errorHandling: &mcpv1alpha1.ErrorHandling{ + Action: "retry", + MaxRetries: 2, + RetryDelay: "1m", + }, + expectedAction: "retry", + expectedRetry: 2, + expectedDelay: vmcpconfig.Duration(1 * time.Minute), + }, + { + name: "without retry delay", + errorHandling: &mcpv1alpha1.ErrorHandling{ + Action: "retry", + MaxRetries: 3, + }, + expectedAction: "retry", + expectedRetry: 3, + expectedDelay: vmcpconfig.Duration(0), + }, + { + name: "abort action", + errorHandling: &mcpv1alpha1.ErrorHandling{ + Action: "abort", + }, + expectedAction: "abort", + expectedRetry: 0, + expectedDelay: vmcpconfig.Duration(0), + }, + { + name: "continue action", + errorHandling: &mcpv1alpha1.ErrorHandling{ + Action: "continue", + }, + expectedAction: "continue", + expectedRetry: 0, + expectedDelay: vmcpconfig.Duration(0), + }, + { + name: "invalid retry delay format is ignored", + errorHandling: &mcpv1alpha1.ErrorHandling{ + Action: "retry", + MaxRetries: 3, + RetryDelay: "invalid", + }, + expectedAction: "retry", + expectedRetry: 3, + expectedDelay: vmcpconfig.Duration(0), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + vmcpServer := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{Name: "test-group"}, + CompositeTools: []mcpv1alpha1.CompositeToolSpec{ + { + Name: "test-tool", + Description: "A test composite tool", + Steps: []mcpv1alpha1.WorkflowStep{ + { + ID: "step1", + Type: "tool", + Tool: "backend/some-tool", + OnError: tt.errorHandling, + }, + }, + }, + }, + }, + } + + converter := NewConverter() + ctx := log.IntoContext(context.Background(), logr.Discard()) + config, err := converter.Convert(ctx, vmcpServer) + + require.NoError(t, err) + require.NotNil(t, config) + require.Len(t, config.CompositeTools, 1) + require.Len(t, config.CompositeTools[0].Steps, 1) + + step := config.CompositeTools[0].Steps[0] + if tt.errorHandling != nil { + require.NotNil(t, step.OnError) + assert.Equal(t, tt.expectedAction, step.OnError.Action) + assert.Equal(t, tt.expectedRetry, step.OnError.RetryCount) + assert.Equal(t, tt.expectedDelay, step.OnError.RetryDelay) + } else { + assert.Nil(t, step.OnError) + } + }) + } +} + +func TestConverter_ConvertCompositeTools_NoErrorHandling(t *testing.T) { + t.Parallel() + + vmcpServer := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{Name: "test-group"}, + CompositeTools: []mcpv1alpha1.CompositeToolSpec{ + { + Name: "test-tool", + Description: "A test composite tool", + Steps: []mcpv1alpha1.WorkflowStep{ + { + ID: "step1", + Type: "tool", + Tool: "backend/some-tool", + // No OnError specified + }, + }, + }, + }, + }, + } + + converter := NewConverter() + ctx := log.IntoContext(context.Background(), logr.Discard()) + config, err := converter.Convert(ctx, vmcpServer) + + require.NoError(t, err) + require.NotNil(t, config) + require.Len(t, config.CompositeTools, 1) + require.Len(t, config.CompositeTools[0].Steps, 1) + + step := config.CompositeTools[0].Steps[0] + assert.Nil(t, step.OnError) +} + +func TestConverter_ConvertCompositeTools_StepTimeout(t *testing.T) { + t.Parallel() + + vmcpServer := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{Name: "test-group"}, + CompositeTools: []mcpv1alpha1.CompositeToolSpec{ + { + Name: "test-tool", + Description: "A test composite tool", + Timeout: "30s", + Steps: []mcpv1alpha1.WorkflowStep{ + { + ID: "step1", + Type: "tool", + Tool: "backend/some-tool", + Timeout: "10s", + }, + }, + }, + }, + }, + } + + converter := NewConverter() + ctx := log.IntoContext(context.Background(), logr.Discard()) + config, err := converter.Convert(ctx, vmcpServer) + + require.NoError(t, err) + require.NotNil(t, config) + require.Len(t, config.CompositeTools, 1) + + tool := config.CompositeTools[0] + assert.Equal(t, vmcpconfig.Duration(30*time.Second), tool.Timeout) + + require.Len(t, tool.Steps, 1) + assert.Equal(t, vmcpconfig.Duration(10*time.Second), tool.Steps[0].Timeout) +} diff --git a/deploy/charts/operator-crds/Chart.yaml b/deploy/charts/operator-crds/Chart.yaml index d524c9539..51bfc86e5 100644 --- a/deploy/charts/operator-crds/Chart.yaml +++ b/deploy/charts/operator-crds/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: toolhive-operator-crds description: A Helm chart for installing the ToolHive Operator CRDs into Kubernetes. type: application -version: 0.0.67 +version: 0.0.68 appVersion: "0.0.1" diff --git a/deploy/charts/operator-crds/README.md b/deploy/charts/operator-crds/README.md index a3272de5b..3d240bebf 100644 --- a/deploy/charts/operator-crds/README.md +++ b/deploy/charts/operator-crds/README.md @@ -1,6 +1,6 @@ # ToolHive Operator CRDs Helm Chart -![Version: 0.0.67](https://img.shields.io/badge/Version-0.0.67-informational?style=flat-square) +![Version: 0.0.68](https://img.shields.io/badge/Version-0.0.68-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for installing the ToolHive Operator CRDs into Kubernetes. diff --git a/deploy/charts/operator-crds/crds/toolhive.stacklok.dev_virtualmcpcompositetooldefinitions.yaml b/deploy/charts/operator-crds/crds/toolhive.stacklok.dev_virtualmcpcompositetooldefinitions.yaml index 0ca09ea6a..9ca006f41 100644 --- a/deploy/charts/operator-crds/crds/toolhive.stacklok.dev_virtualmcpcompositetooldefinitions.yaml +++ b/deploy/charts/operator-crds/crds/toolhive.stacklok.dev_virtualmcpcompositetooldefinitions.yaml @@ -155,6 +155,12 @@ spec: MaxRetries is the maximum number of retries Only used when Action is "retry" type: integer + retryDelay: + description: |- + RetryDelay is the delay between retry attempts + Only used when Action is "retry" + pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m))+$ + type: string type: object schema: description: Schema defines the expected response schema for diff --git a/deploy/charts/operator-crds/crds/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/crds/toolhive.stacklok.dev_virtualmcpservers.yaml index 9e67018b5..96c292ad1 100644 --- a/deploy/charts/operator-crds/crds/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/crds/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -247,6 +247,12 @@ spec: MaxRetries is the maximum number of retries Only used when Action is "retry" type: integer + retryDelay: + description: |- + RetryDelay is the delay between retry attempts + Only used when Action is "retry" + pattern: ^([0-9]+(\.[0-9]+)?(ms|s|m))+$ + type: string type: object schema: description: Schema defines the expected response schema diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index b56f24377..5825ccfbb 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -322,6 +322,7 @@ _Appears in:_ | --- | --- | --- | --- | | `action` _string_ | Action defines the action to take on error | abort | Enum: [abort continue retry]
| | `maxRetries` _integer_ | MaxRetries is the maximum number of retries
Only used when Action is "retry" | | | +| `retryDelay` _string_ | RetryDelay is the delay between retry attempts
Only used when Action is "retry" | | Pattern: `^([0-9]+(\.[0-9]+)?(ms\|s\|m))+$`
| #### ExternalAuthConfigRef