diff --git a/admin/server/server.go b/admin/server/server.go index 24066a0e22b..a1ff73bd9ff 100644 --- a/admin/server/server.go +++ b/admin/server/server.go @@ -411,7 +411,8 @@ func (s *Server) jwtAttributesForService(ctx context.Context, serviceID string, func timeoutSelector(fullMethodName string) time.Duration { if strings.HasPrefix(fullMethodName, "/rill.admin.v1.AIService") { - return time.Minute * 2 + // NOTE: The runtime usually sets a lower timeout through its AILLMTimeoutSeconds config, so this is more of a hard upper bound. + return time.Minute * 10 } if fullMethodName == "/rill.admin.v1.AdminService/DeleteProject" { return time.Minute * 4 diff --git a/docs/docs/reference/project-files/rill-yaml.md b/docs/docs/reference/project-files/rill-yaml.md index 6a86977b7f6..87758c0c139 100644 --- a/docs/docs/reference/project-files/rill-yaml.md +++ b/docs/docs/reference/project-files/rill-yaml.md @@ -164,7 +164,7 @@ _[object]_ - A map of key-value pairs for setting variables on your project. It - **`rill.ai.completion_timeout_seconds`** - _[integer]_ - Maximum duration of a full AI completion request (which may include multiple LLM calls and tool uses), in seconds. Default: 300. - - **`rill.ai.llm_timeout_seconds`** - _[integer]_ - Maximum duration of a single LLM completion request, in seconds. Default: 180. + - **`rill.ai.llm_timeout_seconds`** - _[integer]_ - Maximum duration of a single LLM completion request, in seconds. Default: 180. Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect. - **`rill.ai.default_query_limit`** - _[integer]_ - Default row limit applied to AI tool queries when no limit is specified. Default: 25. diff --git a/runtime/ai/ai.go b/runtime/ai/ai.go index 00c27a5b425..a401608776c 100644 --- a/runtime/ai/ai.go +++ b/runtime/ai/ai.go @@ -27,6 +27,8 @@ import ( semconv "go.opentelemetry.io/otel/semconv/v1.17.0" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" "google.golang.org/protobuf/types/known/structpb" ) @@ -1278,9 +1280,15 @@ func (s *Session) Complete(ctx context.Context, name string, out any, opts *Comp // Handle LLM completion error if err != nil { - if errors.Is(err, llmCtx.Err()) && errors.Is(err, context.DeadlineExceeded) { + if errors.Is(err, llmCtx.Err()) && errors.Is(err, context.DeadlineExceeded) { // Timeout from local ctx. return nil, fmt.Errorf("LLM request timed out after %s: %w", llmRequestTimeout, err) } + if status.Code(err) == codes.DeadlineExceeded { // Timeout from admin service. + return nil, fmt.Errorf("LLM request timed out: %w", err) + } + if errors.Is(err, ctx.Err()) { + return nil, ctx.Err() + } return nil, fmt.Errorf("completion failed: %w (stack: %s)", err, string(debug.Stack())) } diff --git a/runtime/ai/router_agent.go b/runtime/ai/router_agent.go index 590c9a158ae..1224fe8693f 100644 --- a/runtime/ai/router_agent.go +++ b/runtime/ai/router_agent.go @@ -2,6 +2,7 @@ package ai import ( "context" + "errors" "fmt" "regexp" "slices" @@ -165,7 +166,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout Args: analystAgentArgs, }) if err != nil { - return nil, err + return nil, mapAgentErr(err) } return &RouterAgentResult{Response: res.Response, Agent: args.Agent}, nil @@ -184,7 +185,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout Args: developerAgentArgs, }) if err != nil { - return nil, err + return nil, mapAgentErr(err) } return &RouterAgentResult{Response: res.Response, Agent: args.Agent}, nil @@ -197,7 +198,7 @@ func (t *RouterAgent) Handler(ctx context.Context, args *RouterAgentArgs) (*Rout Args: args.FeedbackAgentArgs, }) if err != nil { - return nil, err + return nil, mapAgentErr(err) } return &RouterAgentResult{Response: res.Response, Agent: FeedbackAgentName}, nil } @@ -245,6 +246,20 @@ func promptToTitle(message string) string { return title } +// mapAgentErr maps common agent errors to more user-friendly messages. +// +// NOTE: For context errors, it does not include the underlying error to keep messages clean. +// The actual error is still available in the message containing the sub-agent's result. +func mapAgentErr(err error) error { + if errors.Is(err, context.Canceled) { + return fmt.Errorf("agent canceled") + } + if errors.Is(err, context.DeadlineExceeded) { + return fmt.Errorf("agent timed out") + } + return fmt.Errorf("agent error: %w", err) +} + func must[T any](t T, ok bool) T { if !ok { panic("expected value to be present") diff --git a/runtime/drivers/registry.go b/runtime/drivers/registry.go index 6ae29b2026f..3af2802ce29 100644 --- a/runtime/drivers/registry.go +++ b/runtime/drivers/registry.go @@ -120,6 +120,7 @@ type InstanceConfig struct { // AICompletionTimeoutSeconds is the maximum duration of a full AI completion request, which may include multiple LLM requests and tool calls. AICompletionTimeoutSeconds uint32 `mapstructure:"rill.ai.completion_timeout_seconds"` // AILLMTimeoutSeconds is the maximum duration of a single LLM completion request. + // Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect. AILLMTimeoutSeconds uint32 `mapstructure:"rill.ai.llm_timeout_seconds"` // AIDefaultQueryLimit is the default row limit applied to AI tool queries when no limit is specified. AIDefaultQueryLimit int64 `mapstructure:"rill.ai.default_query_limit"` diff --git a/runtime/parser/schema/rillyaml.schema.yaml b/runtime/parser/schema/rillyaml.schema.yaml index f1f8ba77863..73c99642f3f 100644 --- a/runtime/parser/schema/rillyaml.schema.yaml +++ b/runtime/parser/schema/rillyaml.schema.yaml @@ -171,7 +171,7 @@ allOf: description: "Maximum duration of a full AI completion request (which may include multiple LLM calls and tool uses), in seconds. Default: 300." rill.ai.llm_timeout_seconds: type: integer - description: "Maximum duration of a single LLM completion request, in seconds. Default: 180." + description: "Maximum duration of a single LLM completion request, in seconds. Default: 180. Note: when using Rill's hosted AI service (i.e. not a self-configured LLM), the admin server enforces a hard upper bound of 10 minutes, so values above that have no effect." rill.ai.default_query_limit: type: integer description: "Default row limit applied to AI tool queries when no limit is specified. Default: 25."