From 251046d0703dc290836fd7c0abdd7b2e7704fb21 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 13:09:49 -0700 Subject: [PATCH 01/65] fix(import): dedup workflow name (#3813) --- apps/sim/app/api/workflows/route.ts | 76 ++++++++++--------- .../app/workspace/[workspaceId]/home/home.tsx | 1 + .../w/hooks/use-import-workflow.ts | 1 + .../w/hooks/use-import-workspace.ts | 1 + apps/sim/hooks/queries/workflows.ts | 5 +- 5 files changed, 49 insertions(+), 35 deletions(-) diff --git a/apps/sim/app/api/workflows/route.ts b/apps/sim/app/api/workflows/route.ts index 3181185b75e..4dc1d85a9c0 100644 --- a/apps/sim/app/api/workflows/route.ts +++ b/apps/sim/app/api/workflows/route.ts @@ -8,7 +8,7 @@ import { AuditAction, AuditResourceType, recordAudit } from '@/lib/audit/log' import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' import { generateRequestId } from '@/lib/core/utils/request' import { getNextWorkflowColor } from '@/lib/workflows/colors' -import { listWorkflows, type WorkflowScope } from '@/lib/workflows/utils' +import { deduplicateWorkflowName, listWorkflows, type WorkflowScope } from '@/lib/workflows/utils' import { getUserEntityPermissions, workspaceExists } from '@/lib/workspaces/permissions/utils' import { verifyWorkspaceMembership } from '@/app/api/workflows/utils' @@ -25,6 +25,7 @@ const CreateWorkflowSchema = z.object({ workspaceId: z.string().optional(), folderId: z.string().nullable().optional(), sortOrder: z.number().int().optional(), + deduplicate: z.boolean().optional(), }) // GET /api/workflows - Get workflows for user (optionally filtered by workspaceId) @@ -126,12 +127,13 @@ export async function POST(req: NextRequest) { const body = await req.json() const { id: clientId, - name, + name: requestedName, description, color, workspaceId, folderId, sortOrder: providedSortOrder, + deduplicate, } = CreateWorkflowSchema.parse(body) if (!workspaceId) { @@ -162,19 +164,6 @@ export async function POST(req: NextRequest) { logger.info(`[${requestId}] Creating workflow ${workflowId} for user ${userId}`) - import('@/lib/core/telemetry') - .then(({ PlatformEvents }) => { - PlatformEvents.workflowCreated({ - workflowId, - name, - workspaceId: workspaceId || undefined, - folderId: folderId || undefined, - }) - }) - .catch(() => { - // Silently fail - }) - let sortOrder: number if (providedSortOrder !== undefined) { sortOrder = providedSortOrder @@ -214,31 +203,50 @@ export async function POST(req: NextRequest) { sortOrder = minSortOrder != null ? minSortOrder - 1 : 0 } - const duplicateConditions = [ - eq(workflow.workspaceId, workspaceId), - isNull(workflow.archivedAt), - eq(workflow.name, name), - ] + let name = requestedName - if (folderId) { - duplicateConditions.push(eq(workflow.folderId, folderId)) + if (deduplicate) { + name = await deduplicateWorkflowName(requestedName, workspaceId, folderId) } else { - duplicateConditions.push(isNull(workflow.folderId)) - } + const duplicateConditions = [ + eq(workflow.workspaceId, workspaceId), + isNull(workflow.archivedAt), + eq(workflow.name, requestedName), + ] + + if (folderId) { + duplicateConditions.push(eq(workflow.folderId, folderId)) + } else { + duplicateConditions.push(isNull(workflow.folderId)) + } - const [duplicateWorkflow] = await db - .select({ id: workflow.id }) - .from(workflow) - .where(and(...duplicateConditions)) - .limit(1) + const [duplicateWorkflow] = await db + .select({ id: workflow.id }) + .from(workflow) + .where(and(...duplicateConditions)) + .limit(1) - if (duplicateWorkflow) { - return NextResponse.json( - { error: `A workflow named "${name}" already exists in this folder` }, - { status: 409 } - ) + if (duplicateWorkflow) { + return NextResponse.json( + { error: `A workflow named "${requestedName}" already exists in this folder` }, + { status: 409 } + ) + } } + import('@/lib/core/telemetry') + .then(({ PlatformEvents }) => { + PlatformEvents.workflowCreated({ + workflowId, + name, + workspaceId: workspaceId || undefined, + folderId: folderId || undefined, + }) + }) + .catch(() => { + // Silently fail + }) + await db.insert(workflow).values({ id: workflowId, userId, diff --git a/apps/sim/app/workspace/[workspaceId]/home/home.tsx b/apps/sim/app/workspace/[workspaceId]/home/home.tsx index 001489783ae..87e8f126b80 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/home.tsx +++ b/apps/sim/app/workspace/[workspaceId]/home/home.tsx @@ -54,6 +54,7 @@ export function Home({ chatId }: HomeProps = {}) { description, color, workspaceId, + deduplicate: true, }), }) diff --git a/apps/sim/app/workspace/[workspaceId]/w/hooks/use-import-workflow.ts b/apps/sim/app/workspace/[workspaceId]/w/hooks/use-import-workflow.ts index 5be5a661268..c7f461b71f6 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/hooks/use-import-workflow.ts +++ b/apps/sim/app/workspace/[workspaceId]/w/hooks/use-import-workflow.ts @@ -56,6 +56,7 @@ export function useImportWorkflow({ workspaceId }: UseImportWorkflowProps) { workspaceId, folderId, sortOrder, + deduplicate: true, }), }) diff --git a/apps/sim/app/workspace/[workspaceId]/w/hooks/use-import-workspace.ts b/apps/sim/app/workspace/[workspaceId]/w/hooks/use-import-workspace.ts index 24f8d35c4ae..6b6b5df81ac 100644 --- a/apps/sim/app/workspace/[workspaceId]/w/hooks/use-import-workspace.ts +++ b/apps/sim/app/workspace/[workspaceId]/w/hooks/use-import-workspace.ts @@ -176,6 +176,7 @@ export function useImportWorkspace({ onSuccess }: UseImportWorkspaceProps = {}) color: workflowColor, workspaceId: newWorkspace.id, folderId: targetFolderId, + deduplicate: true, }), }) diff --git a/apps/sim/hooks/queries/workflows.ts b/apps/sim/hooks/queries/workflows.ts index 16ceb2c9822..c2ae3a40363 100644 --- a/apps/sim/hooks/queries/workflows.ts +++ b/apps/sim/hooks/queries/workflows.ts @@ -164,6 +164,7 @@ interface CreateWorkflowVariables { folderId?: string | null sortOrder?: number id?: string + deduplicate?: boolean } interface CreateWorkflowResult { @@ -300,7 +301,8 @@ export function useCreateWorkflow() { return useMutation({ mutationFn: async (variables: CreateWorkflowVariables): Promise => { - const { workspaceId, name, description, color, folderId, sortOrder, id } = variables + const { workspaceId, name, description, color, folderId, sortOrder, id, deduplicate } = + variables logger.info(`Creating new workflow in workspace: ${workspaceId}`) @@ -315,6 +317,7 @@ export function useCreateWorkflow() { workspaceId, folderId: folderId || null, sortOrder, + deduplicate, }), }) From f0adc5e98a3e65c30227ab63f437c7d254545fcb Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 13:11:35 -0700 Subject: [PATCH 02/65] feat(concurrency): bullmq based concurrency control system (#3605) * feat(concurrency): bullmq based queueing system * fix bun lock * remove manual execs off queues * address comments * fix legacy team limits * cleanup enterprise typing code * inline child triggers * fix status check * address more comments * optimize reconciler scan * remove dead code * add to landing page * Add load testing framework * update bullmq * fix * fix headless path --------- Co-authored-by: Theodore Li --- apps/docs/content/docs/en/execution/costs.mdx | 11 + .../app/(home)/components/pricing/pricing.tsx | 4 + apps/sim/app/api/copilot/chat/route.ts | 17 + apps/sim/app/api/jobs/[jobId]/route.test.ts | 160 +++++ apps/sim/app/api/jobs/[jobId]/route.ts | 56 +- apps/sim/app/api/mcp/copilot/route.ts | 16 + apps/sim/app/api/mothership/execute/route.ts | 15 + .../app/api/schedules/execute/route.test.ts | 85 ++- apps/sim/app/api/schedules/execute/route.ts | 62 +- apps/sim/app/api/v1/copilot/chat/route.ts | 15 + .../app/api/webhooks/trigger/[path]/route.ts | 50 +- .../[id]/execute/route.async.test.ts | 36 +- .../app/api/workflows/[id]/execute/route.ts | 301 ++++++++- .../home/components/message-content/utils.ts | 1 + .../app/workspace/[workspaceId]/home/types.ts | 2 + apps/sim/background/schedule-execution.ts | 1 + apps/sim/background/workflow-execution.ts | 1 + .../workspace-notification-delivery.ts | 218 ++++++- apps/sim/lib/auth/hybrid.ts | 14 + apps/sim/lib/billing/subscriptions/utils.ts | 20 +- apps/sim/lib/billing/types/index.ts | 43 +- apps/sim/lib/billing/webhooks/enterprise.ts | 47 +- .../lib/billing/workspace-concurrency.test.ts | 146 +++++ apps/sim/lib/billing/workspace-concurrency.ts | 170 ++++++ apps/sim/lib/core/admission/gate.ts | 62 ++ .../lib/core/async-jobs/backends/bullmq.ts | 106 ++++ .../sim/lib/core/async-jobs/backends/index.ts | 2 +- .../core/async-jobs/backends/redis.test.ts | 176 ------ .../sim/lib/core/async-jobs/backends/redis.ts | 146 ----- apps/sim/lib/core/async-jobs/config.ts | 46 +- apps/sim/lib/core/async-jobs/index.ts | 1 + apps/sim/lib/core/async-jobs/types.ts | 6 +- apps/sim/lib/core/bullmq/connection.ts | 29 + apps/sim/lib/core/bullmq/index.ts | 16 + apps/sim/lib/core/bullmq/queues.ts | 196 ++++++ apps/sim/lib/core/config/env.ts | 9 + .../lib/core/workspace-dispatch/adapter.ts | 80 +++ .../workspace-dispatch/dispatcher.test.ts | 175 ++++++ .../lib/core/workspace-dispatch/dispatcher.ts | 156 +++++ .../lib/core/workspace-dispatch/factory.ts | 42 ++ apps/sim/lib/core/workspace-dispatch/index.ts | 32 + .../workspace-dispatch/memory-store.test.ts | 65 ++ .../core/workspace-dispatch/memory-store.ts | 505 +++++++++++++++ .../lib/core/workspace-dispatch/planner.ts | 154 +++++ .../workspace-dispatch/reconciler.test.ts | 225 +++++++ .../lib/core/workspace-dispatch/reconciler.ts | 226 +++++++ .../core/workspace-dispatch/redis-store.ts | 577 ++++++++++++++++++ .../core/workspace-dispatch/status.test.ts | 102 ++++ .../sim/lib/core/workspace-dispatch/status.ts | 110 ++++ apps/sim/lib/core/workspace-dispatch/store.ts | 193 ++++++ apps/sim/lib/core/workspace-dispatch/types.ts | 107 ++++ .../core/workspace-dispatch/worker.test.ts | 98 +++ .../sim/lib/core/workspace-dispatch/worker.ts | 104 ++++ apps/sim/lib/execution/buffered-stream.ts | 111 ++++ .../lib/knowledge/connectors/sync-engine.ts | 37 +- .../knowledge/documents/document-processor.ts | 17 +- .../documents/parser-extension.test.ts | 27 + .../knowledge/documents/parser-extension.ts | 56 ++ apps/sim/lib/knowledge/documents/queue.ts | 227 ------- apps/sim/lib/knowledge/documents/service.ts | 156 +++-- apps/sim/lib/logs/events.ts | 6 + apps/sim/lib/mothership/inbox/executor.ts | 18 + .../lib/notifications/inactivity-polling.ts | 3 + .../contexts/copilot/copilot-file-manager.ts | 2 +- .../workspace/workspace-file-manager.ts | 30 +- .../uploads/utils/user-file-base64.server.ts | 8 +- apps/sim/lib/webhooks/processor.test.ts | 26 +- apps/sim/lib/webhooks/processor.ts | 112 ++-- .../workflows/executor/execution-events.ts | 61 +- .../executor/queued-workflow-execution.ts | 339 ++++++++++ apps/sim/lib/workflows/utils.ts | 8 +- apps/sim/package.json | 9 +- apps/sim/scripts/load/README.md | 113 ++++ .../sim/scripts/load/workflow-concurrency.yml | 24 + apps/sim/scripts/load/workflow-isolation.yml | 35 ++ apps/sim/scripts/load/workflow-waves.yml | 33 + apps/sim/worker/health.ts | 77 +++ apps/sim/worker/index.ts | 190 ++++++ .../processors/knowledge-connector-sync.ts | 22 + .../knowledge-document-processing.ts | 26 + .../processors/mothership-job-execution.ts | 20 + apps/sim/worker/processors/schedule.ts | 21 + apps/sim/worker/processors/webhook.ts | 21 + apps/sim/worker/processors/workflow.ts | 51 ++ .../workspace-notification-delivery.ts | 32 + bun.lock | 32 +- docker-compose.prod.yml | 40 ++ helm/sim/values.yaml | 5 + 88 files changed, 6273 insertions(+), 959 deletions(-) create mode 100644 apps/sim/app/api/jobs/[jobId]/route.test.ts create mode 100644 apps/sim/lib/billing/workspace-concurrency.test.ts create mode 100644 apps/sim/lib/billing/workspace-concurrency.ts create mode 100644 apps/sim/lib/core/admission/gate.ts create mode 100644 apps/sim/lib/core/async-jobs/backends/bullmq.ts delete mode 100644 apps/sim/lib/core/async-jobs/backends/redis.test.ts delete mode 100644 apps/sim/lib/core/async-jobs/backends/redis.ts create mode 100644 apps/sim/lib/core/bullmq/connection.ts create mode 100644 apps/sim/lib/core/bullmq/index.ts create mode 100644 apps/sim/lib/core/bullmq/queues.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/adapter.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/dispatcher.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/factory.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/index.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/memory-store.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/memory-store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/planner.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/reconciler.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/reconciler.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/redis-store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/status.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/status.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/store.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/types.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/worker.test.ts create mode 100644 apps/sim/lib/core/workspace-dispatch/worker.ts create mode 100644 apps/sim/lib/execution/buffered-stream.ts create mode 100644 apps/sim/lib/knowledge/documents/parser-extension.test.ts create mode 100644 apps/sim/lib/knowledge/documents/parser-extension.ts delete mode 100644 apps/sim/lib/knowledge/documents/queue.ts create mode 100644 apps/sim/lib/workflows/executor/queued-workflow-execution.ts create mode 100644 apps/sim/scripts/load/README.md create mode 100644 apps/sim/scripts/load/workflow-concurrency.yml create mode 100644 apps/sim/scripts/load/workflow-isolation.yml create mode 100644 apps/sim/scripts/load/workflow-waves.yml create mode 100644 apps/sim/worker/health.ts create mode 100644 apps/sim/worker/index.ts create mode 100644 apps/sim/worker/processors/knowledge-connector-sync.ts create mode 100644 apps/sim/worker/processors/knowledge-document-processing.ts create mode 100644 apps/sim/worker/processors/mothership-job-execution.ts create mode 100644 apps/sim/worker/processors/schedule.ts create mode 100644 apps/sim/worker/processors/webhook.ts create mode 100644 apps/sim/worker/processors/workflow.ts create mode 100644 apps/sim/worker/processors/workspace-notification-delivery.ts diff --git a/apps/docs/content/docs/en/execution/costs.mdx b/apps/docs/content/docs/en/execution/costs.mdx index 25f4cc05adf..9f7af19f3d0 100644 --- a/apps/docs/content/docs/en/execution/costs.mdx +++ b/apps/docs/content/docs/en/execution/costs.mdx @@ -195,6 +195,17 @@ By default, your usage is capped at the credits included in your plan. To allow Max (individual) shares the same rate limits as team plans. Team plans (Pro or Max for Teams) use the Max-tier rate limits. +### Concurrent Execution Limits + +| Plan | Concurrent Executions | +|------|----------------------| +| **Free** | 5 | +| **Pro** | 50 | +| **Max / Team** | 200 | +| **Enterprise** | 200 (customizable) | + +Concurrent execution limits control how many workflow executions can run simultaneously within a workspace. When the limit is reached, new executions are queued and admitted as running executions complete. Manual runs from the editor are not subject to these limits. + ### File Storage | Plan | Storage | diff --git a/apps/sim/app/(home)/components/pricing/pricing.tsx b/apps/sim/app/(home)/components/pricing/pricing.tsx index 0c244a398e2..509da377d69 100644 --- a/apps/sim/app/(home)/components/pricing/pricing.tsx +++ b/apps/sim/app/(home)/components/pricing/pricing.tsx @@ -25,6 +25,7 @@ const PRICING_TIERS: PricingTier[] = [ '5GB file storage', '3 tables · 1,000 rows each', '5 min execution limit', + '5 concurrent/workspace', '7-day log retention', 'CLI/SDK/MCP Access', ], @@ -42,6 +43,7 @@ const PRICING_TIERS: PricingTier[] = [ '50GB file storage', '25 tables · 5,000 rows each', '50 min execution · 150 runs/min', + '50 concurrent/workspace', 'Unlimited log retention', 'CLI/SDK/MCP Access', ], @@ -59,6 +61,7 @@ const PRICING_TIERS: PricingTier[] = [ '500GB file storage', '25 tables · 5,000 rows each', '50 min execution · 300 runs/min', + '200 concurrent/workspace', 'Unlimited log retention', 'CLI/SDK/MCP Access', ], @@ -75,6 +78,7 @@ const PRICING_TIERS: PricingTier[] = [ 'Custom file storage', '10,000 tables · 1M rows each', 'Custom execution limits', + 'Custom concurrency limits', 'Unlimited log retention', 'SSO & SCIM · SOC2 & HIPAA', 'Self hosting · Dedicated support', diff --git a/apps/sim/app/api/copilot/chat/route.ts b/apps/sim/app/api/copilot/chat/route.ts index dc9c443fb70..e14b3d715eb 100644 --- a/apps/sim/app/api/copilot/chat/route.ts +++ b/apps/sim/app/api/copilot/chat/route.ts @@ -5,6 +5,7 @@ import { and, desc, eq, sql } from 'drizzle-orm' import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { getSession } from '@/lib/auth' +import { createRunSegment } from '@/lib/copilot/async-runs/repository' import { getAccessibleCopilotChat, resolveOrCreateChat } from '@/lib/copilot/chat-lifecycle' import { buildCopilotRequestPayload } from '@/lib/copilot/chat-payload' import { @@ -539,10 +540,26 @@ export async function POST(req: NextRequest) { return new Response(sseStream, { headers: SSE_RESPONSE_HEADERS }) } + const nsExecutionId = crypto.randomUUID() + const nsRunId = crypto.randomUUID() + + if (actualChatId) { + await createRunSegment({ + id: nsRunId, + executionId: nsExecutionId, + chatId: actualChatId, + userId: authenticatedUserId, + workflowId, + streamId: userMessageIdToUse, + }).catch(() => {}) + } + const nonStreamingResult = await orchestrateCopilotStream(requestPayload, { userId: authenticatedUserId, workflowId, chatId: actualChatId, + executionId: nsExecutionId, + runId: nsRunId, goRoute: '/api/copilot', autoExecuteTools: true, interactive: true, diff --git a/apps/sim/app/api/jobs/[jobId]/route.test.ts b/apps/sim/app/api/jobs/[jobId]/route.test.ts new file mode 100644 index 00000000000..050c0bee2c5 --- /dev/null +++ b/apps/sim/app/api/jobs/[jobId]/route.test.ts @@ -0,0 +1,160 @@ +/** + * @vitest-environment node + */ +import type { NextRequest } from 'next/server' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockCheckHybridAuth, + mockGetDispatchJobRecord, + mockGetJobQueue, + mockVerifyWorkflowAccess, + mockGetWorkflowById, +} = vi.hoisted(() => ({ + mockCheckHybridAuth: vi.fn(), + mockGetDispatchJobRecord: vi.fn(), + mockGetJobQueue: vi.fn(), + mockVerifyWorkflowAccess: vi.fn(), + mockGetWorkflowById: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/auth/hybrid', () => ({ + checkHybridAuth: mockCheckHybridAuth, +})) + +vi.mock('@/lib/core/async-jobs', () => ({ + JOB_STATUS: { + PENDING: 'pending', + PROCESSING: 'processing', + COMPLETED: 'completed', + FAILED: 'failed', + }, + getJobQueue: mockGetJobQueue, +})) + +vi.mock('@/lib/core/workspace-dispatch/store', () => ({ + getDispatchJobRecord: mockGetDispatchJobRecord, +})) + +vi.mock('@/lib/core/utils/request', () => ({ + generateRequestId: vi.fn().mockReturnValue('request-1'), +})) + +vi.mock('@/socket/middleware/permissions', () => ({ + verifyWorkflowAccess: mockVerifyWorkflowAccess, +})) + +vi.mock('@/lib/workflows/utils', () => ({ + getWorkflowById: mockGetWorkflowById, +})) + +import { GET } from './route' + +function createMockRequest(): NextRequest { + return { + headers: { + get: () => null, + }, + } as NextRequest +} + +describe('GET /api/jobs/[jobId]', () => { + beforeEach(() => { + vi.clearAllMocks() + + mockCheckHybridAuth.mockResolvedValue({ + success: true, + userId: 'user-1', + apiKeyType: undefined, + workspaceId: undefined, + }) + + mockVerifyWorkflowAccess.mockResolvedValue({ hasAccess: true }) + mockGetWorkflowById.mockResolvedValue({ + id: 'workflow-1', + workspaceId: 'workspace-1', + }) + + mockGetJobQueue.mockResolvedValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + }) + + it('returns dispatcher-aware waiting status with metadata', async () => { + mockGetDispatchJobRecord.mockResolvedValue({ + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { + workflowId: 'workflow-1', + }, + priority: 10, + status: 'waiting', + createdAt: 1000, + admittedAt: 2000, + }) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'dispatch-1' }), + }) + const body = await response.json() + + expect(response.status).toBe(200) + expect(body.status).toBe('waiting') + expect(body.metadata.queueName).toBe('workflow-execution') + expect(body.metadata.lane).toBe('runtime') + expect(body.metadata.workspaceId).toBe('workspace-1') + }) + + it('returns completed output from dispatch state', async () => { + mockGetDispatchJobRecord.mockResolvedValue({ + id: 'dispatch-2', + workspaceId: 'workspace-1', + lane: 'interactive', + queueName: 'workflow-execution', + bullmqJobName: 'direct-workflow-execution', + bullmqPayload: {}, + metadata: { + workflowId: 'workflow-1', + }, + priority: 1, + status: 'completed', + createdAt: 1000, + startedAt: 2000, + completedAt: 7000, + output: { success: true }, + }) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'dispatch-2' }), + }) + const body = await response.json() + + expect(response.status).toBe(200) + expect(body.status).toBe('completed') + expect(body.output).toEqual({ success: true }) + expect(body.metadata.duration).toBe(5000) + }) + + it('returns 404 when neither dispatch nor BullMQ job exists', async () => { + mockGetDispatchJobRecord.mockResolvedValue(null) + + const response = await GET(createMockRequest(), { + params: Promise.resolve({ jobId: 'missing-job' }), + }) + + expect(response.status).toBe(404) + }) +}) diff --git a/apps/sim/app/api/jobs/[jobId]/route.ts b/apps/sim/app/api/jobs/[jobId]/route.ts index cb8a43a80de..0ce749fa828 100644 --- a/apps/sim/app/api/jobs/[jobId]/route.ts +++ b/apps/sim/app/api/jobs/[jobId]/route.ts @@ -1,8 +1,10 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { checkHybridAuth } from '@/lib/auth/hybrid' -import { getJobQueue, JOB_STATUS } from '@/lib/core/async-jobs' +import { getJobQueue } from '@/lib/core/async-jobs' import { generateRequestId } from '@/lib/core/utils/request' +import { presentDispatchOrJobStatus } from '@/lib/core/workspace-dispatch/status' +import { getDispatchJobRecord } from '@/lib/core/workspace-dispatch/store' import { createErrorResponse } from '@/app/api/workflows/utils' const logger = createLogger('TaskStatusAPI') @@ -23,68 +25,54 @@ export async function GET( const authenticatedUserId = authResult.userId + const dispatchJob = await getDispatchJobRecord(taskId) const jobQueue = await getJobQueue() - const job = await jobQueue.getJob(taskId) + const job = dispatchJob ? null : await jobQueue.getJob(taskId) - if (!job) { + if (!job && !dispatchJob) { return createErrorResponse('Task not found', 404) } - if (job.metadata?.workflowId) { + const metadataToCheck = dispatchJob?.metadata ?? job?.metadata + + if (metadataToCheck?.workflowId) { const { verifyWorkflowAccess } = await import('@/socket/middleware/permissions') const accessCheck = await verifyWorkflowAccess( authenticatedUserId, - job.metadata.workflowId as string + metadataToCheck.workflowId as string ) if (!accessCheck.hasAccess) { - logger.warn(`[${requestId}] Access denied to workflow ${job.metadata.workflowId}`) + logger.warn(`[${requestId}] Access denied to workflow ${metadataToCheck.workflowId}`) return createErrorResponse('Access denied', 403) } if (authResult.apiKeyType === 'workspace' && authResult.workspaceId) { const { getWorkflowById } = await import('@/lib/workflows/utils') - const workflow = await getWorkflowById(job.metadata.workflowId as string) + const workflow = await getWorkflowById(metadataToCheck.workflowId as string) if (!workflow?.workspaceId || workflow.workspaceId !== authResult.workspaceId) { return createErrorResponse('API key is not authorized for this workspace', 403) } } - } else if (job.metadata?.userId && job.metadata.userId !== authenticatedUserId) { - logger.warn(`[${requestId}] Access denied to user ${job.metadata.userId}`) + } else if (metadataToCheck?.userId && metadataToCheck.userId !== authenticatedUserId) { + logger.warn(`[${requestId}] Access denied to user ${metadataToCheck.userId}`) return createErrorResponse('Access denied', 403) - } else if (!job.metadata?.userId && !job.metadata?.workflowId) { + } else if (!metadataToCheck?.userId && !metadataToCheck?.workflowId) { logger.warn(`[${requestId}] Access denied to job ${taskId}`) return createErrorResponse('Access denied', 403) } - const mappedStatus = job.status === JOB_STATUS.PENDING ? 'queued' : job.status - + const presented = presentDispatchOrJobStatus(dispatchJob, job) const response: any = { success: true, taskId, - status: mappedStatus, - metadata: { - startedAt: job.startedAt, - }, - } - - if (job.status === JOB_STATUS.COMPLETED) { - response.output = job.output - response.metadata.completedAt = job.completedAt - if (job.startedAt && job.completedAt) { - response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() - } - } - - if (job.status === JOB_STATUS.FAILED) { - response.error = job.error - response.metadata.completedAt = job.completedAt - if (job.startedAt && job.completedAt) { - response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() - } + status: presented.status, + metadata: presented.metadata, } - if (job.status === JOB_STATUS.PROCESSING || job.status === JOB_STATUS.PENDING) { - response.estimatedDuration = 300000 + if (presented.output !== undefined) response.output = presented.output + if (presented.error !== undefined) response.error = presented.error + if (presented.estimatedDuration !== undefined) { + response.estimatedDuration = presented.estimatedDuration } return NextResponse.json(response) diff --git a/apps/sim/app/api/mcp/copilot/route.ts b/apps/sim/app/api/mcp/copilot/route.ts index bb3e2e856d5..b61dbc39806 100644 --- a/apps/sim/app/api/mcp/copilot/route.ts +++ b/apps/sim/app/api/mcp/copilot/route.ts @@ -18,6 +18,7 @@ import { eq, sql } from 'drizzle-orm' import { type NextRequest, NextResponse } from 'next/server' import { validateOAuthAccessToken } from '@/lib/auth/oauth-token' import { getHighestPrioritySubscription } from '@/lib/billing/core/subscription' +import { createRunSegment } from '@/lib/copilot/async-runs/repository' import { ORCHESTRATION_TIMEOUT_MS, SIM_AGENT_API_URL } from '@/lib/copilot/constants' import { orchestrateCopilotStream } from '@/lib/copilot/orchestrator' import { orchestrateSubagentStream } from '@/lib/copilot/orchestrator/subagent' @@ -727,10 +728,25 @@ async function handleBuildToolCall( chatId, } + const executionId = crypto.randomUUID() + const runId = crypto.randomUUID() + const messageId = requestPayload.messageId as string + + await createRunSegment({ + id: runId, + executionId, + chatId, + userId, + workflowId: resolved.workflowId, + streamId: messageId, + }).catch(() => {}) + const result = await orchestrateCopilotStream(requestPayload, { userId, workflowId: resolved.workflowId, chatId, + executionId, + runId, goRoute: '/api/mcp', autoExecuteTools: true, timeout: ORCHESTRATION_TIMEOUT_MS, diff --git a/apps/sim/app/api/mothership/execute/route.ts b/apps/sim/app/api/mothership/execute/route.ts index 1632b028d12..b0fc3a82d08 100644 --- a/apps/sim/app/api/mothership/execute/route.ts +++ b/apps/sim/app/api/mothership/execute/route.ts @@ -2,6 +2,7 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { checkInternalAuth } from '@/lib/auth/hybrid' +import { createRunSegment } from '@/lib/copilot/async-runs/repository' import { buildIntegrationToolSchemas } from '@/lib/copilot/chat-payload' import { appendCopilotLogContext } from '@/lib/copilot/logging' import { orchestrateCopilotStream } from '@/lib/copilot/orchestrator' @@ -71,10 +72,24 @@ export async function POST(req: NextRequest) { ...(userPermission ? { userPermission } : {}), } + const executionId = crypto.randomUUID() + const runId = crypto.randomUUID() + + await createRunSegment({ + id: runId, + executionId, + chatId: effectiveChatId, + userId, + workspaceId, + streamId: messageId, + }).catch(() => {}) + const result = await orchestrateCopilotStream(requestPayload, { userId, workspaceId, chatId: effectiveChatId, + executionId, + runId, goRoute: '/api/mothership/execute', autoExecuteTools: true, interactive: false, diff --git a/apps/sim/app/api/schedules/execute/route.test.ts b/apps/sim/app/api/schedules/execute/route.test.ts index cfdf6c3877b..80c59e537d1 100644 --- a/apps/sim/app/api/schedules/execute/route.test.ts +++ b/apps/sim/app/api/schedules/execute/route.test.ts @@ -9,10 +9,12 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' const { mockVerifyCronAuth, mockExecuteScheduleJob, + mockExecuteJobInline, mockFeatureFlags, mockDbReturning, mockDbUpdate, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockStartJob, mockCompleteJob, mockMarkJobFailed, @@ -22,6 +24,7 @@ const { const mockDbSet = vi.fn().mockReturnValue({ where: mockDbWhere }) const mockDbUpdate = vi.fn().mockReturnValue({ set: mockDbSet }) const mockEnqueue = vi.fn().mockResolvedValue('job-id-1') + const mockEnqueueWorkspaceDispatch = vi.fn().mockResolvedValue('job-id-1') const mockStartJob = vi.fn().mockResolvedValue(undefined) const mockCompleteJob = vi.fn().mockResolvedValue(undefined) const mockMarkJobFailed = vi.fn().mockResolvedValue(undefined) @@ -29,6 +32,7 @@ const { return { mockVerifyCronAuth: vi.fn().mockReturnValue(null), mockExecuteScheduleJob: vi.fn().mockResolvedValue(undefined), + mockExecuteJobInline: vi.fn().mockResolvedValue(undefined), mockFeatureFlags: { isTriggerDevEnabled: false, isHosted: false, @@ -38,6 +42,7 @@ const { mockDbReturning, mockDbUpdate, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockStartJob, mockCompleteJob, mockMarkJobFailed, @@ -50,6 +55,8 @@ vi.mock('@/lib/auth/internal', () => ({ vi.mock('@/background/schedule-execution', () => ({ executeScheduleJob: mockExecuteScheduleJob, + executeJobInline: mockExecuteJobInline, + releaseScheduleLock: vi.fn().mockResolvedValue(undefined), })) vi.mock('@/lib/core/config/feature-flags', () => mockFeatureFlags) @@ -68,6 +75,22 @@ vi.mock('@/lib/core/async-jobs', () => ({ shouldExecuteInline: vi.fn().mockReturnValue(false), })) +vi.mock('@/lib/core/bullmq', () => ({ + isBullMQEnabled: vi.fn().mockReturnValue(true), + createBullMQJobData: vi.fn((payload: unknown) => ({ payload })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, +})) + +vi.mock('@/lib/workflows/utils', () => ({ + getWorkflowById: vi.fn().mockResolvedValue({ + id: 'workflow-1', + workspaceId: 'workspace-1', + }), +})) + vi.mock('drizzle-orm', () => ({ and: vi.fn((...conditions: unknown[]) => ({ type: 'and', conditions })), eq: vi.fn((field: unknown, value: unknown) => ({ field, value, type: 'eq' })), @@ -142,6 +165,18 @@ const MULTIPLE_SCHEDULES = [ }, ] +const SINGLE_JOB = [ + { + id: 'job-1', + cronExpression: '0 * * * *', + failedCount: 0, + lastQueuedAt: undefined, + sourceUserId: 'user-1', + sourceWorkspaceId: 'workspace-1', + sourceType: 'job', + }, +] + function createMockRequest(): NextRequest { const mockHeaders = new Map([ ['authorization', 'Bearer test-cron-secret'], @@ -211,30 +246,44 @@ describe('Scheduled Workflow Execution API Route', () => { expect(data).toHaveProperty('executedCount', 2) }) + it('should queue mothership jobs to BullMQ when available', async () => { + mockDbReturning.mockReturnValueOnce([]).mockReturnValueOnce(SINGLE_JOB) + + const response = await GET(createMockRequest()) + + expect(response.status).toBe(200) + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( + expect.objectContaining({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'mothership-job-execution', + bullmqJobName: 'mothership-job-execution', + bullmqPayload: { + payload: { + scheduleId: 'job-1', + cronExpression: '0 * * * *', + failedCount: 0, + now: expect.any(String), + }, + }, + }) + ) + expect(mockExecuteJobInline).not.toHaveBeenCalled() + }) + it('should enqueue preassigned correlation metadata for schedules', async () => { mockDbReturning.mockReturnValue(SINGLE_SCHEDULE) const response = await GET(createMockRequest()) expect(response.status).toBe(200) - expect(mockEnqueue).toHaveBeenCalledWith( - 'schedule-execution', + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ - scheduleId: 'schedule-1', - workflowId: 'workflow-1', - executionId: 'schedule-execution-1', - requestId: 'test-request-id', - correlation: { - executionId: 'schedule-execution-1', - requestId: 'test-request-id', - source: 'schedule', - workflowId: 'workflow-1', - scheduleId: 'schedule-1', - triggerType: 'schedule', - scheduledFor: '2025-01-01T00:00:00.000Z', - }, - }), - { + id: 'schedule-execution-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'schedule-execution', + bullmqJobName: 'schedule-execution', metadata: { workflowId: 'workflow-1', correlation: { @@ -247,7 +296,7 @@ describe('Scheduled Workflow Execution API Route', () => { scheduledFor: '2025-01-01T00:00:00.000Z', }, }, - } + }) ) }) }) diff --git a/apps/sim/app/api/schedules/execute/route.ts b/apps/sim/app/api/schedules/execute/route.ts index cef36bfb25b..d739f3aa67b 100644 --- a/apps/sim/app/api/schedules/execute/route.ts +++ b/apps/sim/app/api/schedules/execute/route.ts @@ -5,7 +5,9 @@ import { type NextRequest, NextResponse } from 'next/server' import { v4 as uuidv4 } from 'uuid' import { verifyCronAuth } from '@/lib/auth/internal' import { getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { generateRequestId } from '@/lib/core/utils/request' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { executeJobInline, executeScheduleJob, @@ -73,6 +75,8 @@ export async function GET(request: NextRequest) { cronExpression: workflowSchedule.cronExpression, failedCount: workflowSchedule.failedCount, lastQueuedAt: workflowSchedule.lastQueuedAt, + sourceWorkspaceId: workflowSchedule.sourceWorkspaceId, + sourceUserId: workflowSchedule.sourceUserId, sourceType: workflowSchedule.sourceType, }) @@ -111,9 +115,40 @@ export async function GET(request: NextRequest) { } try { - const jobId = await jobQueue.enqueue('schedule-execution', payload, { - metadata: { workflowId: schedule.workflowId ?? undefined, correlation }, - }) + const { getWorkflowById } = await import('@/lib/workflows/utils') + const resolvedWorkflow = schedule.workflowId + ? await getWorkflowById(schedule.workflowId) + : null + const resolvedWorkspaceId = resolvedWorkflow?.workspaceId + + let jobId: string + if (isBullMQEnabled()) { + if (!resolvedWorkspaceId) { + throw new Error( + `Missing workspace for scheduled workflow ${schedule.workflowId}; refusing to bypass workspace admission` + ) + } + + jobId = await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: resolvedWorkspaceId, + lane: 'runtime', + queueName: 'schedule-execution', + bullmqJobName: 'schedule-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: schedule.workflowId ?? undefined, + correlation, + }), + metadata: { + workflowId: schedule.workflowId ?? undefined, + correlation, + }, + }) + } else { + jobId = await jobQueue.enqueue('schedule-execution', payload, { + metadata: { workflowId: schedule.workflowId ?? undefined, correlation }, + }) + } logger.info( `[${requestId}] Queued schedule execution task ${jobId} for workflow ${schedule.workflowId}` ) @@ -165,7 +200,7 @@ export async function GET(request: NextRequest) { } }) - // Jobs always execute inline (no TriggerDev) + // Mothership jobs use BullMQ when available, otherwise direct inline execution. const jobPromises = dueJobs.map(async (job) => { const queueTime = job.lastQueuedAt ?? queuedAt const payload = { @@ -176,7 +211,24 @@ export async function GET(request: NextRequest) { } try { - await executeJobInline(payload) + if (isBullMQEnabled()) { + if (!job.sourceWorkspaceId || !job.sourceUserId) { + throw new Error(`Mothership job ${job.id} is missing workspace/user ownership`) + } + + await enqueueWorkspaceDispatch({ + workspaceId: job.sourceWorkspaceId!, + lane: 'runtime', + queueName: 'mothership-job-execution', + bullmqJobName: 'mothership-job-execution', + bullmqPayload: createBullMQJobData(payload), + metadata: { + userId: job.sourceUserId, + }, + }) + } else { + await executeJobInline(payload) + } } catch (error) { logger.error(`[${requestId}] Job execution failed for ${job.id}`, { error: error instanceof Error ? error.message : String(error), diff --git a/apps/sim/app/api/v1/copilot/chat/route.ts b/apps/sim/app/api/v1/copilot/chat/route.ts index a222510bb55..dafb1baf0e4 100644 --- a/apps/sim/app/api/v1/copilot/chat/route.ts +++ b/apps/sim/app/api/v1/copilot/chat/route.ts @@ -1,6 +1,7 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' +import { createRunSegment } from '@/lib/copilot/async-runs/repository' import { appendCopilotLogContext } from '@/lib/copilot/logging' import { COPILOT_REQUEST_MODES } from '@/lib/copilot/models' import { orchestrateCopilotStream } from '@/lib/copilot/orchestrator' @@ -104,10 +105,24 @@ export async function POST(req: NextRequest) { chatId, } + const executionId = crypto.randomUUID() + const runId = crypto.randomUUID() + + await createRunSegment({ + id: runId, + executionId, + chatId, + userId: auth.userId, + workflowId: resolved.workflowId, + streamId: messageId, + }).catch(() => {}) + const result = await orchestrateCopilotStream(requestPayload, { userId: auth.userId, workflowId: resolved.workflowId, chatId, + executionId, + runId, goRoute: '/api/mcp', autoExecuteTools: parsed.autoExecuteTools, timeout: parsed.timeout, diff --git a/apps/sim/app/api/webhooks/trigger/[path]/route.ts b/apps/sim/app/api/webhooks/trigger/[path]/route.ts index 56304c3e850..2c283b72fdb 100644 --- a/apps/sim/app/api/webhooks/trigger/[path]/route.ts +++ b/apps/sim/app/api/webhooks/trigger/[path]/route.ts @@ -1,6 +1,8 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' +import { admissionRejectedResponse, tryAdmit } from '@/lib/core/admission/gate' import { generateRequestId } from '@/lib/core/utils/request' +import { DispatchQueueFullError } from '@/lib/core/workspace-dispatch' import { checkWebhookPreprocessing, findAllWebhooksForPath, @@ -41,10 +43,25 @@ export async function POST( request: NextRequest, { params }: { params: Promise<{ path: string }> } ) { + const ticket = tryAdmit() + if (!ticket) { + return admissionRejectedResponse() + } + + try { + return await handleWebhookPost(request, params) + } finally { + ticket.release() + } +} + +async function handleWebhookPost( + request: NextRequest, + params: Promise<{ path: string }> +): Promise { const requestId = generateRequestId() const { path } = await params - // Handle provider challenges before body parsing (Microsoft Graph validationToken, etc.) const earlyChallenge = await handleProviderChallenges({}, request, requestId, path) if (earlyChallenge) { return earlyChallenge @@ -140,17 +157,30 @@ export async function POST( continue } - const response = await queueWebhookExecution(foundWebhook, foundWorkflow, body, request, { - requestId, - path, - actorUserId: preprocessResult.actorUserId, - executionId: preprocessResult.executionId, - correlation: preprocessResult.correlation, - }) - responses.push(response) + try { + const response = await queueWebhookExecution(foundWebhook, foundWorkflow, body, request, { + requestId, + path, + actorUserId: preprocessResult.actorUserId, + executionId: preprocessResult.executionId, + correlation: preprocessResult.correlation, + }) + responses.push(response) + } catch (error) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + throw error + } } - // Return the last successful response, or a combined response for multiple webhooks if (responses.length === 0) { return new NextResponse('No webhooks processed successfully', { status: 500 }) } diff --git a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts index 7d6c599dcfd..355ae6ddf06 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.async.test.ts @@ -10,15 +10,18 @@ const { mockAuthorizeWorkflowByWorkspacePermission, mockPreprocessExecution, mockEnqueue, + mockEnqueueWorkspaceDispatch, } = vi.hoisted(() => ({ mockCheckHybridAuth: vi.fn(), mockAuthorizeWorkflowByWorkspacePermission: vi.fn(), mockPreprocessExecution: vi.fn(), mockEnqueue: vi.fn().mockResolvedValue('job-123'), + mockEnqueueWorkspaceDispatch: vi.fn().mockResolvedValue('job-123'), })) vi.mock('@/lib/auth/hybrid', () => ({ checkHybridAuth: mockCheckHybridAuth, + hasExternalApiCredentials: vi.fn().mockReturnValue(true), AuthType: { SESSION: 'session', API_KEY: 'api_key', @@ -44,6 +47,16 @@ vi.mock('@/lib/core/async-jobs', () => ({ markJobFailed: vi.fn(), }), shouldExecuteInline: vi.fn().mockReturnValue(false), + shouldUseBullMQ: vi.fn().mockReturnValue(true), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + createBullMQJobData: vi.fn((payload: unknown, metadata?: unknown) => ({ payload, metadata })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, + waitForDispatchJob: vi.fn(), })) vi.mock('@/lib/core/utils/request', () => ({ @@ -132,22 +145,13 @@ describe('workflow execute async route', () => { expect(response.status).toBe(202) expect(body.executionId).toBe('execution-123') expect(body.jobId).toBe('job-123') - expect(mockEnqueue).toHaveBeenCalledWith( - 'workflow-execution', + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ - workflowId: 'workflow-1', - userId: 'actor-1', - executionId: 'execution-123', - requestId: 'req-12345678', - correlation: { - executionId: 'execution-123', - requestId: 'req-12345678', - source: 'workflow', - workflowId: 'workflow-1', - triggerType: 'manual', - }, - }), - { + id: 'execution-123', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', metadata: { workflowId: 'workflow-1', userId: 'actor-1', @@ -159,7 +163,7 @@ describe('workflow execute async route', () => { triggerType: 'manual', }, }, - } + }) ) }) }) diff --git a/apps/sim/app/api/workflows/[id]/execute/route.ts b/apps/sim/app/api/workflows/[id]/execute/route.ts index fa837f43a9e..df3fc41d434 100644 --- a/apps/sim/app/api/workflows/[id]/execute/route.ts +++ b/apps/sim/app/api/workflows/[id]/execute/route.ts @@ -2,8 +2,10 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { validate as uuidValidate, v4 as uuidv4 } from 'uuid' import { z } from 'zod' -import { AuthType, checkHybridAuth } from '@/lib/auth/hybrid' -import { getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' +import { AuthType, checkHybridAuth, hasExternalApiCredentials } from '@/lib/auth/hybrid' +import { admissionRejectedResponse, tryAdmit } from '@/lib/core/admission/gate' +import { getJobQueue, shouldExecuteInline, shouldUseBullMQ } from '@/lib/core/async-jobs' +import { createBullMQJobData } from '@/lib/core/bullmq' import { createTimeoutAbortController, getTimeoutErrorMessage, @@ -12,6 +14,13 @@ import { import { generateRequestId } from '@/lib/core/utils/request' import { SSE_HEADERS } from '@/lib/core/utils/sse' import { getBaseUrl } from '@/lib/core/utils/urls' +import { + DispatchQueueFullError, + enqueueWorkspaceDispatch, + type WorkspaceDispatchLane, + waitForDispatchJob, +} from '@/lib/core/workspace-dispatch' +import { createBufferedExecutionStream } from '@/lib/execution/buffered-stream' import { buildNextCallChain, parseCallChain, @@ -33,6 +42,11 @@ import { import { executeWorkflowCore } from '@/lib/workflows/executor/execution-core' import { type ExecutionEvent, encodeSSEEvent } from '@/lib/workflows/executor/execution-events' import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' +import { + DIRECT_WORKFLOW_JOB_NAME, + type QueuedWorkflowExecutionPayload, + type QueuedWorkflowExecutionResult, +} from '@/lib/workflows/executor/queued-workflow-execution' import { loadDeployedWorkflowState, loadWorkflowFromNormalizedTables, @@ -104,6 +118,8 @@ const ExecuteWorkflowSchema = z.object({ export const runtime = 'nodejs' export const dynamic = 'force-dynamic' +const INLINE_TRIGGER_TYPES = new Set(['manual', 'workflow']) + function resolveOutputIds( selectedOutputs: string[] | undefined, blocks: Record @@ -161,6 +177,7 @@ type AsyncExecutionParams = { requestId: string workflowId: string userId: string + workspaceId: string input: any triggerType: CoreTriggerType executionId: string @@ -168,7 +185,8 @@ type AsyncExecutionParams = { } async function handleAsyncExecution(params: AsyncExecutionParams): Promise { - const { requestId, workflowId, userId, input, triggerType, executionId, callChain } = params + const { requestId, workflowId, userId, workspaceId, input, triggerType, executionId, callChain } = + params const correlation = { executionId, @@ -181,6 +199,7 @@ async function handleAsyncExecution(params: AsyncExecutionParams): Promise { try { - await jobQueue.startJob(jobId) + await inlineJobQueue.startJob(jobId) const output = await executeWorkflowJob(payload) - await jobQueue.completeJob(jobId, output) + await inlineJobQueue.completeJob(jobId, output) } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error) logger.error(`[${requestId}] Async workflow execution failed`, { @@ -213,7 +252,7 @@ async function handleAsyncExecution(params: AsyncExecutionParams): Promise }) { + const isSessionRequest = req.headers.has('cookie') && !hasExternalApiCredentials(req.headers) + if (isSessionRequest) { + return handleExecutePost(req, params) + } + + const ticket = tryAdmit() + if (!ticket) { + return admissionRejectedResponse() + } + + try { + return await handleExecutePost(req, params) + } finally { + ticket.release() + } +} + +async function handleExecutePost( + req: NextRequest, + params: Promise<{ id: string }> +): Promise { const requestId = generateRequestId() const { id: workflowId } = await params @@ -584,6 +680,7 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: requestId, workflowId, userId: actorUserId, + workspaceId, input, triggerType: loggingTriggerType, executionId, @@ -676,30 +773,116 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: if (!enableSSE) { logger.info(`[${requestId}] Using non-SSE execution (direct JSON response)`) + const metadata: ExecutionMetadata = { + requestId, + executionId, + workflowId, + workspaceId, + userId: actorUserId, + sessionUserId: isClientSession ? userId : undefined, + workflowUserId: workflow.userId, + triggerType, + useDraftState: shouldUseDraftState, + startTime: new Date().toISOString(), + isClientSession, + enforceCredentialAccess: useAuthenticatedUserAsActor, + workflowStateOverride: effectiveWorkflowStateOverride, + callChain, + } + + const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} + + if (shouldUseBullMQ() && !INLINE_TRIGGER_TYPES.has(triggerType)) { + try { + const dispatchJobId = await enqueueDirectWorkflowExecution( + { + workflow, + metadata, + input: processedInput, + variables: executionVariables, + selectedOutputs, + includeFileBase64, + base64MaxBytes, + stopAfterBlockId, + timeoutMs: preprocessResult.executionTimeout?.sync, + runFromBlock: resolvedRunFromBlock, + }, + 5, + 'interactive' + ) + + const resultRecord = await waitForDispatchJob( + dispatchJobId, + (preprocessResult.executionTimeout?.sync ?? 300000) + 30000 + ) + + if (resultRecord.status === 'failed') { + return NextResponse.json( + { + success: false, + executionId, + error: resultRecord.error ?? 'Workflow execution failed', + }, + { status: 500 } + ) + } + + const result = resultRecord.output as QueuedWorkflowExecutionResult + + const resultForResponseBlock = { + success: result.success, + logs: result.logs, + output: result.output, + } + + if ( + auth.authType !== AuthType.INTERNAL_JWT && + workflowHasResponseBlock(resultForResponseBlock) + ) { + return createHttpResponseFromBlock(resultForResponseBlock) + } + + return NextResponse.json( + { + success: result.success, + executionId, + output: result.output, + error: result.error, + metadata: result.metadata, + }, + { status: result.statusCode ?? 200 } + ) + } catch (error: unknown) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + + const errorMessage = error instanceof Error ? error.message : 'Unknown error' + + logger.error(`[${requestId}] Queued non-SSE execution failed: ${errorMessage}`) + + return NextResponse.json( + { + success: false, + error: errorMessage, + }, + { status: 500 } + ) + } + } + const timeoutController = createTimeoutAbortController( preprocessResult.executionTimeout?.sync ) try { - const metadata: ExecutionMetadata = { - requestId, - executionId, - workflowId, - workspaceId, - userId: actorUserId, - sessionUserId: isClientSession ? userId : undefined, - workflowUserId: workflow.userId, - triggerType, - useDraftState: shouldUseDraftState, - startTime: new Date().toISOString(), - isClientSession, - enforceCredentialAccess: useAuthenticatedUserAsActor, - workflowStateOverride: effectiveWorkflowStateOverride, - callChain, - } - - const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} - const snapshot = new ExecutionSnapshot( metadata, workflow, @@ -809,6 +992,53 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: } if (shouldUseDraftState) { + const shouldDispatchViaQueue = shouldUseBullMQ() && !INLINE_TRIGGER_TYPES.has(triggerType) + if (shouldDispatchViaQueue) { + const metadata: ExecutionMetadata = { + requestId, + executionId, + workflowId, + workspaceId, + userId: actorUserId, + sessionUserId: isClientSession ? userId : undefined, + workflowUserId: workflow.userId, + triggerType, + useDraftState: shouldUseDraftState, + startTime: new Date().toISOString(), + isClientSession, + enforceCredentialAccess: useAuthenticatedUserAsActor, + workflowStateOverride: effectiveWorkflowStateOverride, + callChain, + } + + const executionVariables = cachedWorkflowData?.variables ?? workflow.variables ?? {} + + await enqueueDirectWorkflowExecution( + { + workflow, + metadata, + input: processedInput, + variables: executionVariables, + selectedOutputs, + includeFileBase64, + base64MaxBytes, + stopAfterBlockId, + timeoutMs: preprocessResult.executionTimeout?.sync, + runFromBlock: resolvedRunFromBlock, + streamEvents: true, + }, + 1, + 'interactive' + ) + + return new NextResponse(createBufferedExecutionStream(executionId), { + headers: { + ...SSE_HEADERS, + 'X-Execution-Id': executionId, + }, + }) + } + logger.info(`[${requestId}] Using SSE console log streaming (manual execution)`) } else { logger.info(`[${requestId}] Using streaming API response`) @@ -1277,6 +1507,17 @@ export async function POST(req: NextRequest, { params }: { params: Promise<{ id: }, }) } catch (error: any) { + if (error instanceof DispatchQueueFullError) { + return NextResponse.json( + { + error: 'Service temporarily at capacity', + message: error.message, + retryAfterSeconds: 10, + }, + { status: 503, headers: { 'Retry-After': '10' } } + ) + } + logger.error(`[${requestId}] Failed to start workflow execution:`, error) return NextResponse.json( { error: error.message || 'Failed to start workflow execution' }, diff --git a/apps/sim/app/workspace/[workspaceId]/home/components/message-content/utils.ts b/apps/sim/app/workspace/[workspaceId]/home/components/message-content/utils.ts index 2ef81f298b4..0c7807a576d 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/components/message-content/utils.ts +++ b/apps/sim/app/workspace/[workspaceId]/home/components/message-content/utils.ts @@ -82,6 +82,7 @@ const TOOL_ICONS: Record = { create_job: { title: 'Creating job', phaseLabel: 'Resource', phase: 'resource' }, manage_job: { title: 'Updating job', phaseLabel: 'Management', phase: 'management' }, update_job_history: { title: 'Updating job', phaseLabel: 'Management', phase: 'management' }, + job_respond: { title: 'Explaining job scheduled', phaseLabel: 'Execution', phase: 'execution' }, // Management manage_mcp_tool: { title: 'Updating integration', phaseLabel: 'Management', phase: 'management' }, manage_skill: { title: 'Updating skill', phaseLabel: 'Management', phase: 'management' }, diff --git a/apps/sim/background/schedule-execution.ts b/apps/sim/background/schedule-execution.ts index 6aa3a306044..38f17af1db0 100644 --- a/apps/sim/background/schedule-execution.ts +++ b/apps/sim/background/schedule-execution.ts @@ -303,6 +303,7 @@ async function runWorkflowExecution({ export type ScheduleExecutionPayload = { scheduleId: string workflowId: string + workspaceId?: string executionId?: string requestId?: string correlation?: AsyncExecutionCorrelation diff --git a/apps/sim/background/workflow-execution.ts b/apps/sim/background/workflow-execution.ts index 49756d82c27..aa2411a2580 100644 --- a/apps/sim/background/workflow-execution.ts +++ b/apps/sim/background/workflow-execution.ts @@ -36,6 +36,7 @@ export function buildWorkflowCorrelation( export type WorkflowExecutionPayload = { workflowId: string userId: string + workspaceId?: string input?: any triggerType?: CoreTriggerType executionId?: string diff --git a/apps/sim/background/workspace-notification-delivery.ts b/apps/sim/background/workspace-notification-delivery.ts index 1886d5462e3..41d08f80cbd 100644 --- a/apps/sim/background/workspace-notification-delivery.ts +++ b/apps/sim/background/workspace-notification-delivery.ts @@ -1,5 +1,5 @@ import { createHmac } from 'crypto' -import { db } from '@sim/db' +import { db, workflowExecutionLogs } from '@sim/db' import { account, workspaceNotificationDelivery, @@ -17,11 +17,14 @@ import { import { checkUsageStatus } from '@/lib/billing/calculations/usage-monitor' import { getHighestPrioritySubscription } from '@/lib/billing/core/subscription' import { dollarsToCredits } from '@/lib/billing/credits/conversion' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' +import { acquireLock } from '@/lib/core/config/redis' import { RateLimiter } from '@/lib/core/rate-limiter' import { decryptSecret } from '@/lib/core/security/encryption' import { secureFetchWithValidation } from '@/lib/core/security/input-validation.server' import { formatDuration } from '@/lib/core/utils/formatting' import { getBaseUrl } from '@/lib/core/utils/urls' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import type { TraceSpan, WorkflowExecutionLog } from '@/lib/logs/types' import { sendEmail } from '@/lib/messaging/email/mailer' import type { AlertConfig } from '@/lib/notifications/alert-rules' @@ -32,6 +35,7 @@ const logger = createLogger('WorkspaceNotificationDelivery') const MAX_ATTEMPTS = 5 const RETRY_DELAYS = [5 * 1000, 15 * 1000, 60 * 1000, 3 * 60 * 1000, 10 * 60 * 1000] +const NOTIFICATION_DISPATCH_LOCK_TTL_SECONDS = 3 function getRetryDelayWithJitter(baseDelay: number): number { const jitter = Math.random() * 0.1 * baseDelay @@ -486,12 +490,170 @@ async function updateDeliveryStatus( export interface NotificationDeliveryParams { deliveryId: string subscriptionId: string + workspaceId: string notificationType: 'webhook' | 'email' | 'slack' log: WorkflowExecutionLog alertConfig?: AlertConfig } -export async function executeNotificationDelivery(params: NotificationDeliveryParams) { +export type NotificationDeliveryResult = + | { status: 'success' | 'skipped' | 'failed' } + | { status: 'retry'; retryDelayMs: number } + +async function buildRetryLog(params: NotificationDeliveryParams): Promise { + const conditions = [eq(workflowExecutionLogs.executionId, params.log.executionId)] + if (params.log.workflowId) { + conditions.push(eq(workflowExecutionLogs.workflowId, params.log.workflowId)) + } + + const [storedLog] = await db + .select() + .from(workflowExecutionLogs) + .where(and(...conditions)) + .limit(1) + + if (storedLog) { + return storedLog as unknown as WorkflowExecutionLog + } + + const now = new Date().toISOString() + return { + id: `retry_log_${params.deliveryId}`, + workflowId: params.log.workflowId, + executionId: params.log.executionId, + stateSnapshotId: '', + level: 'info', + trigger: 'system', + startedAt: now, + endedAt: now, + totalDurationMs: 0, + executionData: {}, + cost: { total: 0 }, + createdAt: now, + } +} + +export async function enqueueNotificationDeliveryDispatch( + params: NotificationDeliveryParams +): Promise { + if (!isBullMQEnabled()) { + return false + } + + const lockAcquired = await acquireLock( + `workspace-notification-dispatch:${params.deliveryId}`, + params.deliveryId, + NOTIFICATION_DISPATCH_LOCK_TTL_SECONDS + ) + if (!lockAcquired) { + return false + } + + await enqueueWorkspaceDispatch({ + workspaceId: params.workspaceId, + lane: 'lightweight', + queueName: 'workspace-notification-delivery', + bullmqJobName: 'workspace-notification-delivery', + bullmqPayload: createBullMQJobData(params), + metadata: { + workflowId: params.log.workflowId ?? undefined, + }, + }) + + return true +} + +const STUCK_IN_PROGRESS_THRESHOLD_MS = 5 * 60 * 1000 + +export async function sweepPendingNotificationDeliveries(limit = 50): Promise { + if (!isBullMQEnabled()) { + return 0 + } + + const stuckThreshold = new Date(Date.now() - STUCK_IN_PROGRESS_THRESHOLD_MS) + + await db + .update(workspaceNotificationDelivery) + .set({ + status: 'pending', + updatedAt: new Date(), + }) + .where( + and( + eq(workspaceNotificationDelivery.status, 'in_progress'), + lte(workspaceNotificationDelivery.lastAttemptAt, stuckThreshold) + ) + ) + + const dueDeliveries = await db + .select({ + deliveryId: workspaceNotificationDelivery.id, + subscriptionId: workspaceNotificationDelivery.subscriptionId, + workflowId: workspaceNotificationDelivery.workflowId, + executionId: workspaceNotificationDelivery.executionId, + workspaceId: workspaceNotificationSubscription.workspaceId, + alertConfig: workspaceNotificationSubscription.alertConfig, + notificationType: workspaceNotificationSubscription.notificationType, + }) + .from(workspaceNotificationDelivery) + .innerJoin( + workspaceNotificationSubscription, + eq(workspaceNotificationDelivery.subscriptionId, workspaceNotificationSubscription.id) + ) + .where( + and( + eq(workspaceNotificationDelivery.status, 'pending'), + or( + isNull(workspaceNotificationDelivery.nextAttemptAt), + lte(workspaceNotificationDelivery.nextAttemptAt, new Date()) + ) + ) + ) + .limit(limit) + + let enqueued = 0 + + for (const delivery of dueDeliveries) { + const params: NotificationDeliveryParams = { + deliveryId: delivery.deliveryId, + subscriptionId: delivery.subscriptionId, + workspaceId: delivery.workspaceId, + notificationType: delivery.notificationType, + log: await buildRetryLog({ + deliveryId: delivery.deliveryId, + subscriptionId: delivery.subscriptionId, + workspaceId: delivery.workspaceId, + notificationType: delivery.notificationType, + log: { + id: '', + workflowId: delivery.workflowId, + executionId: delivery.executionId, + stateSnapshotId: '', + level: 'info', + trigger: 'system', + startedAt: '', + endedAt: '', + totalDurationMs: 0, + executionData: {}, + cost: { total: 0 }, + createdAt: '', + }, + alertConfig: (delivery.alertConfig as AlertConfig | null) ?? undefined, + }), + alertConfig: (delivery.alertConfig as AlertConfig | null) ?? undefined, + } + + if (await enqueueNotificationDeliveryDispatch(params)) { + enqueued += 1 + } + } + + return enqueued +} + +export async function executeNotificationDelivery( + params: NotificationDeliveryParams +): Promise { const { deliveryId, subscriptionId, notificationType, log, alertConfig } = params try { @@ -504,7 +666,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (!subscription || !subscription.active) { logger.warn(`Subscription ${subscriptionId} not found or inactive`) await updateDeliveryStatus(deliveryId, 'failed', 'Subscription not found or inactive') - return + return { status: 'failed' } } const claimed = await db @@ -529,7 +691,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (claimed.length === 0) { logger.info(`Delivery ${deliveryId} not claimable`) - return + return { status: 'skipped' } } const attempts = claimed[0].attempts @@ -539,7 +701,7 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (!payload) { await updateDeliveryStatus(deliveryId, 'failed', 'Workflow was archived or deleted') logger.info(`Skipping delivery ${deliveryId} - workflow was archived or deleted`) - return + return { status: 'failed' } } let result: { success: boolean; status?: number; error?: string } @@ -561,39 +723,35 @@ export async function executeNotificationDelivery(params: NotificationDeliveryPa if (result.success) { await updateDeliveryStatus(deliveryId, 'success', undefined, result.status) logger.info(`${notificationType} notification delivered successfully`, { deliveryId }) - } else { - if (attempts < MAX_ATTEMPTS) { - const retryDelay = getRetryDelayWithJitter( - RETRY_DELAYS[attempts - 1] || RETRY_DELAYS[RETRY_DELAYS.length - 1] - ) - const nextAttemptAt = new Date(Date.now() + retryDelay) + return { status: 'success' } + } + if (attempts < MAX_ATTEMPTS) { + const retryDelay = getRetryDelayWithJitter( + RETRY_DELAYS[attempts - 1] || RETRY_DELAYS[RETRY_DELAYS.length - 1] + ) + const nextAttemptAt = new Date(Date.now() + retryDelay) - await updateDeliveryStatus( - deliveryId, - 'pending', - result.error, - result.status, - nextAttemptAt - ) + await updateDeliveryStatus(deliveryId, 'pending', result.error, result.status, nextAttemptAt) - logger.info( - `${notificationType} notification failed, scheduled retry ${attempts}/${MAX_ATTEMPTS}`, - { - deliveryId, - error: result.error, - } - ) - } else { - await updateDeliveryStatus(deliveryId, 'failed', result.error, result.status) - logger.error(`${notificationType} notification failed after ${MAX_ATTEMPTS} attempts`, { + logger.info( + `${notificationType} notification failed, scheduled retry ${attempts}/${MAX_ATTEMPTS}`, + { deliveryId, error: result.error, - }) - } + } + ) + return { status: 'retry', retryDelayMs: retryDelay } } + await updateDeliveryStatus(deliveryId, 'failed', result.error, result.status) + logger.error(`${notificationType} notification failed after ${MAX_ATTEMPTS} attempts`, { + deliveryId, + error: result.error, + }) + return { status: 'failed' } } catch (error) { logger.error('Notification delivery failed', { deliveryId, error }) await updateDeliveryStatus(deliveryId, 'failed', 'Internal error') + return { status: 'failed' } } } diff --git a/apps/sim/lib/auth/hybrid.ts b/apps/sim/lib/auth/hybrid.ts index c9a9262ebc6..3f2311e7927 100644 --- a/apps/sim/lib/auth/hybrid.ts +++ b/apps/sim/lib/auth/hybrid.ts @@ -14,6 +14,20 @@ export const AuthType = { export type AuthTypeValue = (typeof AuthType)[keyof typeof AuthType] +const API_KEY_HEADER = 'x-api-key' +const BEARER_PREFIX = 'Bearer ' + +/** + * Lightweight header-only check for whether a request carries external API credentials. + * Does NOT validate the credentials — only inspects headers to classify the request + * as programmatic API traffic vs interactive session traffic. + */ +export function hasExternalApiCredentials(headers: Headers): boolean { + if (headers.has(API_KEY_HEADER)) return true + const auth = headers.get('authorization') + return auth !== null && auth.startsWith(BEARER_PREFIX) +} + export interface AuthResult { success: boolean userId?: string diff --git a/apps/sim/lib/billing/subscriptions/utils.ts b/apps/sim/lib/billing/subscriptions/utils.ts index 3b26fe2a4e8..078bc77294d 100644 --- a/apps/sim/lib/billing/subscriptions/utils.ts +++ b/apps/sim/lib/billing/subscriptions/utils.ts @@ -13,7 +13,7 @@ import { isPro, isTeam, } from '@/lib/billing/plan-helpers' -import type { EnterpriseSubscriptionMetadata } from '@/lib/billing/types' +import { parseEnterpriseSubscriptionMetadata } from '@/lib/billing/types' import { env } from '@/lib/core/config/env' export const ENTITLED_SUBSCRIPTION_STATUSES = ['active', 'past_due'] as const @@ -80,27 +80,15 @@ export function checkEnterprisePlan(subscription: any): boolean { return isEnterprise(subscription?.plan) && hasPaidSubscriptionStatus(subscription?.status) } -/** - * Type guard to check if metadata is valid EnterpriseSubscriptionMetadata - */ -function isEnterpriseMetadata(metadata: unknown): metadata is EnterpriseSubscriptionMetadata { - return ( - !!metadata && - typeof metadata === 'object' && - 'seats' in metadata && - typeof (metadata as EnterpriseSubscriptionMetadata).seats === 'string' - ) -} - export function getEffectiveSeats(subscription: any): number { if (!subscription) { return 0 } if (isEnterprise(subscription.plan)) { - const metadata = subscription.metadata as EnterpriseSubscriptionMetadata | null - if (isEnterpriseMetadata(metadata)) { - return Number.parseInt(metadata.seats, 10) + const metadata = parseEnterpriseSubscriptionMetadata(subscription.metadata) + if (metadata) { + return metadata.seats } return 0 } diff --git a/apps/sim/lib/billing/types/index.ts b/apps/sim/lib/billing/types/index.ts index e3c3f2de559..3c3f846fc8f 100644 --- a/apps/sim/lib/billing/types/index.ts +++ b/apps/sim/lib/billing/types/index.ts @@ -2,18 +2,47 @@ * Billing System Types * Centralized type definitions for the billing system */ +import { z } from 'zod' -export interface EnterpriseSubscriptionMetadata { - plan: 'enterprise' +export const enterpriseSubscriptionMetadataSchema = z.object({ + plan: z + .string() + .transform((v) => v.toLowerCase()) + .pipe(z.literal('enterprise')), // The referenceId must be provided in Stripe metadata to link to the organization // This gets stored in the subscription.referenceId column - referenceId: string + referenceId: z.string().min(1), // The fixed monthly price for this enterprise customer (as string from Stripe metadata) // This will be used to set the organization's usage limit - monthlyPrice: string - // Number of seats for invitation limits (not for billing) (as string from Stripe metadata) - // We set Stripe quantity to 1 and use this for actual seat count - seats: string + monthlyPrice: z.coerce.number().positive(), + // Number of seats for invitation limits (not for billing) + seats: z.coerce.number().int().positive(), + // Optional custom workspace concurrency limit for enterprise workspaces + workspaceConcurrencyLimit: z.coerce.number().int().positive().optional(), +}) + +export type EnterpriseSubscriptionMetadata = z.infer + +const enterpriseWorkspaceConcurrencyMetadataSchema = z.object({ + workspaceConcurrencyLimit: z.coerce.number().int().positive().optional(), +}) + +export type EnterpriseWorkspaceConcurrencyMetadata = z.infer< + typeof enterpriseWorkspaceConcurrencyMetadataSchema +> + +export function parseEnterpriseSubscriptionMetadata( + value: unknown +): EnterpriseSubscriptionMetadata | null { + const result = enterpriseSubscriptionMetadataSchema.safeParse(value) + return result.success ? result.data : null +} + +export function parseEnterpriseWorkspaceConcurrencyMetadata( + value: unknown +): EnterpriseWorkspaceConcurrencyMetadata | null { + const result = enterpriseWorkspaceConcurrencyMetadataSchema.safeParse(value) + return result.success ? result.data : null } export interface UsageData { diff --git a/apps/sim/lib/billing/webhooks/enterprise.ts b/apps/sim/lib/billing/webhooks/enterprise.ts index cf20b52b395..92c3bcf459f 100644 --- a/apps/sim/lib/billing/webhooks/enterprise.ts +++ b/apps/sim/lib/billing/webhooks/enterprise.ts @@ -6,26 +6,10 @@ import type Stripe from 'stripe' import { getEmailSubject, renderEnterpriseSubscriptionEmail } from '@/components/emails' import { sendEmail } from '@/lib/messaging/email/mailer' import { getFromEmailAddress } from '@/lib/messaging/email/utils' -import type { EnterpriseSubscriptionMetadata } from '../types' +import { parseEnterpriseSubscriptionMetadata } from '../types' const logger = createLogger('BillingEnterprise') -function isEnterpriseMetadata(value: unknown): value is EnterpriseSubscriptionMetadata { - return ( - !!value && - typeof value === 'object' && - 'plan' in value && - 'referenceId' in value && - 'monthlyPrice' in value && - 'seats' in value && - typeof value.plan === 'string' && - value.plan.toLowerCase() === 'enterprise' && - typeof value.referenceId === 'string' && - typeof value.monthlyPrice === 'string' && - typeof value.seats === 'string' - ) -} - export async function handleManualEnterpriseSubscription(event: Stripe.Event) { const stripeSubscription = event.data.object as Stripe.Subscription @@ -63,37 +47,16 @@ export async function handleManualEnterpriseSubscription(event: Stripe.Event) { throw new Error('Unable to resolve referenceId for subscription') } - if (!isEnterpriseMetadata(metadata)) { + const enterpriseMetadata = parseEnterpriseSubscriptionMetadata(metadata) + if (!enterpriseMetadata) { logger.error('[subscription.created] Invalid enterprise metadata shape', { subscriptionId: stripeSubscription.id, metadata, }) throw new Error('Invalid enterprise metadata for subscription') } - const enterpriseMetadata = metadata - const metadataJson: Record = { ...enterpriseMetadata } - - // Extract and parse seats and monthly price from metadata (they come as strings from Stripe) - const seats = Number.parseInt(enterpriseMetadata.seats, 10) - const monthlyPrice = Number.parseFloat(enterpriseMetadata.monthlyPrice) - - if (!seats || seats <= 0 || Number.isNaN(seats)) { - logger.error('[subscription.created] Invalid or missing seats in enterprise metadata', { - subscriptionId: stripeSubscription.id, - seatsRaw: enterpriseMetadata.seats, - seatsParsed: seats, - }) - throw new Error('Enterprise subscription must include valid seats in metadata') - } - if (!monthlyPrice || monthlyPrice <= 0 || Number.isNaN(monthlyPrice)) { - logger.error('[subscription.created] Invalid or missing monthlyPrice in enterprise metadata', { - subscriptionId: stripeSubscription.id, - monthlyPriceRaw: enterpriseMetadata.monthlyPrice, - monthlyPriceParsed: monthlyPrice, - }) - throw new Error('Enterprise subscription must include valid monthlyPrice in metadata') - } + const { seats, monthlyPrice } = enterpriseMetadata // Get the first subscription item which contains the period information const referenceItem = stripeSubscription.items?.data?.[0] @@ -117,7 +80,7 @@ export async function handleManualEnterpriseSubscription(event: Stripe.Event) { ? new Date(stripeSubscription.trial_start * 1000) : null, trialEnd: stripeSubscription.trial_end ? new Date(stripeSubscription.trial_end * 1000) : null, - metadata: metadataJson, + metadata: metadata as Record, } const existing = await db diff --git a/apps/sim/lib/billing/workspace-concurrency.test.ts b/apps/sim/lib/billing/workspace-concurrency.test.ts new file mode 100644 index 00000000000..462e24a8e06 --- /dev/null +++ b/apps/sim/lib/billing/workspace-concurrency.test.ts @@ -0,0 +1,146 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockGetHighestPrioritySubscription, + mockGetWorkspaceBilledAccountUserId, + mockFeatureFlags, + mockRedisGet, + mockRedisSet, + mockRedisDel, + mockRedisKeys, + mockGetRedisClient, +} = vi.hoisted(() => ({ + mockGetHighestPrioritySubscription: vi.fn(), + mockGetWorkspaceBilledAccountUserId: vi.fn(), + mockFeatureFlags: { + isBillingEnabled: true, + }, + mockRedisGet: vi.fn(), + mockRedisSet: vi.fn(), + mockRedisDel: vi.fn(), + mockRedisKeys: vi.fn(), + mockGetRedisClient: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/billing/core/plan', () => ({ + getHighestPrioritySubscription: mockGetHighestPrioritySubscription, +})) + +vi.mock('@/lib/workspaces/utils', () => ({ + getWorkspaceBilledAccountUserId: mockGetWorkspaceBilledAccountUserId, +})) + +vi.mock('@/lib/core/config/redis', () => ({ + getRedisClient: mockGetRedisClient, +})) + +vi.mock('@/lib/core/config/feature-flags', () => mockFeatureFlags) + +import { + getWorkspaceConcurrencyLimit, + resetWorkspaceConcurrencyLimitCache, +} from '@/lib/billing/workspace-concurrency' + +describe('workspace concurrency billing', () => { + beforeEach(() => { + vi.clearAllMocks() + mockFeatureFlags.isBillingEnabled = true + + mockRedisGet.mockResolvedValue(null) + mockRedisSet.mockResolvedValue('OK') + mockRedisDel.mockResolvedValue(1) + mockRedisKeys.mockResolvedValue([]) + mockGetRedisClient.mockReturnValue({ + get: mockRedisGet, + set: mockRedisSet, + del: mockRedisDel, + keys: mockRedisKeys, + }) + }) + + it('returns free tier when no billed account exists', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue(null) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(5) + }) + + it('returns pro limit for pro billing accounts', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_6000', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(50) + }) + + it('returns max limit for max plan tiers', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_25000', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(200) + }) + + it('returns max limit for legacy team plans', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'team', + metadata: null, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(200) + }) + + it('returns enterprise metadata override when present', async () => { + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'enterprise', + metadata: { + workspaceConcurrencyLimit: '350', + }, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(350) + }) + + it('uses free-tier limit when billing is disabled', async () => { + mockFeatureFlags.isBillingEnabled = false + mockGetWorkspaceBilledAccountUserId.mockResolvedValue('user-1') + mockGetHighestPrioritySubscription.mockResolvedValue({ + plan: 'pro_25000', + metadata: { + workspaceConcurrencyLimit: 999, + }, + }) + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(5) + }) + + it('uses redis cache when available', async () => { + mockRedisGet.mockResolvedValueOnce('123') + + await expect(getWorkspaceConcurrencyLimit('workspace-1')).resolves.toBe(123) + expect(mockGetWorkspaceBilledAccountUserId).not.toHaveBeenCalled() + }) + + it('can clear a specific workspace cache entry', async () => { + await resetWorkspaceConcurrencyLimitCache('workspace-1') + + expect(mockRedisDel).toHaveBeenCalledWith('workspace-concurrency-limit:workspace-1') + }) +}) diff --git a/apps/sim/lib/billing/workspace-concurrency.ts b/apps/sim/lib/billing/workspace-concurrency.ts new file mode 100644 index 00000000000..acb07169f91 --- /dev/null +++ b/apps/sim/lib/billing/workspace-concurrency.ts @@ -0,0 +1,170 @@ +import { createLogger } from '@sim/logger' +import { getHighestPrioritySubscription } from '@/lib/billing/core/plan' +import { getPlanTierCredits, isEnterprise, isPro, isTeam } from '@/lib/billing/plan-helpers' +import { parseEnterpriseWorkspaceConcurrencyMetadata } from '@/lib/billing/types' +import { env } from '@/lib/core/config/env' +import { isBillingEnabled } from '@/lib/core/config/feature-flags' +import { getRedisClient } from '@/lib/core/config/redis' +import { getWorkspaceBilledAccountUserId } from '@/lib/workspaces/utils' + +const logger = createLogger('WorkspaceConcurrencyBilling') + +const CACHE_TTL_MS = 60_000 +const CACHE_TTL_SECONDS = Math.floor(CACHE_TTL_MS / 1000) + +interface CacheEntry { + value: number + expiresAt: number +} + +const inMemoryConcurrencyCache = new Map() + +function cacheKey(workspaceId: string): string { + return `workspace-concurrency-limit:${workspaceId}` +} + +function parsePositiveLimit(value: unknown): number | null { + if (typeof value === 'number' && Number.isFinite(value) && value > 0) { + return Math.floor(value) + } + + if (typeof value === 'string') { + const parsed = Number.parseInt(value, 10) + if (Number.isFinite(parsed) && parsed > 0) { + return parsed + } + } + + return null +} + +function getFreeConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_FREE, 10) || 5 +} + +function getProConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_PRO, 10) || 50 +} + +function getTeamConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_TEAM, 10) || 200 +} + +function getEnterpriseDefaultConcurrencyLimit(): number { + return Number.parseInt(env.WORKSPACE_CONCURRENCY_ENTERPRISE, 10) || 200 +} + +function getEnterpriseConcurrencyLimit(metadata: unknown): number { + const enterpriseMetadata = parseEnterpriseWorkspaceConcurrencyMetadata(metadata) + return enterpriseMetadata?.workspaceConcurrencyLimit ?? getEnterpriseDefaultConcurrencyLimit() +} + +function getPlanConcurrencyLimit(plan: string | null | undefined, metadata: unknown): number { + if (!isBillingEnabled) { + return getFreeConcurrencyLimit() + } + + if (!plan) { + return getFreeConcurrencyLimit() + } + + if (isEnterprise(plan)) { + return getEnterpriseConcurrencyLimit(metadata) + } + + if (isTeam(plan)) { + return getTeamConcurrencyLimit() + } + + const credits = getPlanTierCredits(plan) + if (credits >= 25_000) { + return getTeamConcurrencyLimit() + } + + if (isPro(plan)) { + return getProConcurrencyLimit() + } + + return getFreeConcurrencyLimit() +} + +export async function getWorkspaceConcurrencyLimit(workspaceId: string): Promise { + const redis = getRedisClient() + + if (redis) { + const cached = await redis.get(cacheKey(workspaceId)) + const cachedValue = parsePositiveLimit(cached) + if (cachedValue !== null) { + return cachedValue + } + } else { + const cached = inMemoryConcurrencyCache.get(workspaceId) + if (cached && cached.expiresAt > Date.now()) { + return cached.value + } + } + + try { + const billedAccountUserId = await getWorkspaceBilledAccountUserId(workspaceId) + if (!billedAccountUserId) { + if (redis) { + await redis.set( + cacheKey(workspaceId), + String(getFreeConcurrencyLimit()), + 'EX', + CACHE_TTL_SECONDS + ) + } else { + inMemoryConcurrencyCache.set(workspaceId, { + value: getFreeConcurrencyLimit(), + expiresAt: Date.now() + CACHE_TTL_MS, + }) + } + return getFreeConcurrencyLimit() + } + + const subscription = await getHighestPrioritySubscription(billedAccountUserId) + const limit = getPlanConcurrencyLimit(subscription?.plan, subscription?.metadata) + + if (redis) { + await redis.set(cacheKey(workspaceId), String(limit), 'EX', CACHE_TTL_SECONDS) + } else { + inMemoryConcurrencyCache.set(workspaceId, { + value: limit, + expiresAt: Date.now() + CACHE_TTL_MS, + }) + } + + return limit + } catch (error) { + logger.error('Failed to resolve workspace concurrency limit, using free tier', { + workspaceId, + error, + }) + + return getFreeConcurrencyLimit() + } +} + +export async function resetWorkspaceConcurrencyLimitCache(workspaceId?: string): Promise { + if (!workspaceId) { + inMemoryConcurrencyCache.clear() + } else { + inMemoryConcurrencyCache.delete(workspaceId) + } + + const redis = getRedisClient() + if (!redis) { + return + } + + if (workspaceId) { + await redis.del(cacheKey(workspaceId)) + return + } + + const keys = await redis.keys('workspace-concurrency-limit:*') + if (keys.length > 0) { + await redis.del(...keys) + } +} diff --git a/apps/sim/lib/core/admission/gate.ts b/apps/sim/lib/core/admission/gate.ts new file mode 100644 index 00000000000..0e6b7552df3 --- /dev/null +++ b/apps/sim/lib/core/admission/gate.ts @@ -0,0 +1,62 @@ +import { createLogger } from '@sim/logger' +import { NextResponse } from 'next/server' +import { env } from '@/lib/core/config/env' + +const logger = createLogger('AdmissionGate') + +const MAX_INFLIGHT = Number.parseInt(env.ADMISSION_GATE_MAX_INFLIGHT ?? '') || 500 + +let inflight = 0 + +export interface AdmissionTicket { + release: () => void +} + +/** + * Attempts to admit a request through the in-process gate. + * Returns a ticket with a release() handle on success, or null if at capacity. + * Zero external calls — purely in-process atomic counter. Each pod maintains its + * own counter, so the effective aggregate limit across N pods is N × MAX_INFLIGHT. + * Configure ADMISSION_GATE_MAX_INFLIGHT per pod based on what each pod can sustain. + */ +export function tryAdmit(): AdmissionTicket | null { + if (inflight >= MAX_INFLIGHT) { + return null + } + + inflight++ + let released = false + + return { + release() { + if (released) return + released = true + inflight-- + }, + } +} + +/** + * Returns a 429 response for requests rejected by the admission gate. + */ +export function admissionRejectedResponse(): NextResponse { + logger.warn('Admission gate rejecting request', { inflight, maxInflight: MAX_INFLIGHT }) + return NextResponse.json( + { + error: 'Too many requests', + message: 'Server is at capacity. Please retry shortly.', + retryAfterSeconds: 5, + }, + { + status: 429, + headers: { 'Retry-After': '5' }, + } + ) +} + +/** + * Returns the current gate metrics for observability. + */ +export function getAdmissionGateStatus(): { inflight: number; maxInflight: number } { + return { inflight, maxInflight: MAX_INFLIGHT } +} diff --git a/apps/sim/lib/core/async-jobs/backends/bullmq.ts b/apps/sim/lib/core/async-jobs/backends/bullmq.ts new file mode 100644 index 00000000000..a7bb4647ef4 --- /dev/null +++ b/apps/sim/lib/core/async-jobs/backends/bullmq.ts @@ -0,0 +1,106 @@ +import { createLogger } from '@sim/logger' +import type { Job as BullMQJob } from 'bullmq' +import { + type EnqueueOptions, + JOB_STATUS, + type Job, + type JobQueueBackend, + type JobStatus, + type JobType, +} from '@/lib/core/async-jobs/types' +import { type BullMQJobData, createBullMQJobData, getBullMQQueue } from '@/lib/core/bullmq' + +const logger = createLogger('BullMQJobQueue') + +function mapBullMQStatus(status: string): JobStatus { + switch (status) { + case 'active': + return JOB_STATUS.PROCESSING + case 'completed': + return JOB_STATUS.COMPLETED + case 'failed': + return JOB_STATUS.FAILED + default: + return JOB_STATUS.PENDING + } +} + +async function toJob( + queueType: JobType, + bullJob: BullMQJob> | null +): Promise { + if (!bullJob) { + return null + } + + const status = mapBullMQStatus(await bullJob.getState()) + + return { + id: bullJob.id ?? '', + type: queueType, + payload: bullJob.data.payload, + status, + createdAt: new Date(bullJob.timestamp), + startedAt: bullJob.processedOn ? new Date(bullJob.processedOn) : undefined, + completedAt: bullJob.finishedOn ? new Date(bullJob.finishedOn) : undefined, + attempts: bullJob.attemptsMade, + maxAttempts: bullJob.opts.attempts ?? 1, + error: bullJob.failedReason || undefined, + output: bullJob.returnvalue, + metadata: bullJob.data.metadata ?? {}, + } +} + +export class BullMQJobQueue implements JobQueueBackend { + async enqueue( + type: JobType, + payload: TPayload, + options?: EnqueueOptions + ): Promise { + const queue = getBullMQQueue(type) + + const job = await queue.add( + options?.name ?? type, + createBullMQJobData(payload, options?.metadata), + { + jobId: options?.jobId, + attempts: options?.maxAttempts, + priority: options?.priority, + delay: options?.delayMs, + } + ) + + logger.debug('Enqueued job via BullMQ', { + jobId: job.id, + type, + name: options?.name ?? type, + }) + + return String(job.id) + } + + async getJob(jobId: string): Promise { + const workflowJob = await getBullMQQueue('workflow-execution').getJob(jobId) + if (workflowJob) { + return toJob('workflow-execution', workflowJob) + } + + const webhookJob = await getBullMQQueue('webhook-execution').getJob(jobId) + if (webhookJob) { + return toJob('webhook-execution', webhookJob) + } + + const scheduleJob = await getBullMQQueue('schedule-execution').getJob(jobId) + if (scheduleJob) { + return toJob('schedule-execution', scheduleJob) + } + + return null + } + + async startJob(_jobId: string): Promise {} + + async completeJob(_jobId: string, _output: unknown): Promise {} + + async markJobFailed(_jobId: string, _error: string): Promise {} +} diff --git a/apps/sim/lib/core/async-jobs/backends/index.ts b/apps/sim/lib/core/async-jobs/backends/index.ts index 144094e6407..0abb55d6af5 100644 --- a/apps/sim/lib/core/async-jobs/backends/index.ts +++ b/apps/sim/lib/core/async-jobs/backends/index.ts @@ -1,3 +1,3 @@ +export { BullMQJobQueue } from './bullmq' export { DatabaseJobQueue } from './database' -export { RedisJobQueue } from './redis' export { TriggerDevJobQueue } from './trigger-dev' diff --git a/apps/sim/lib/core/async-jobs/backends/redis.test.ts b/apps/sim/lib/core/async-jobs/backends/redis.test.ts deleted file mode 100644 index ea0a5df6078..00000000000 --- a/apps/sim/lib/core/async-jobs/backends/redis.test.ts +++ /dev/null @@ -1,176 +0,0 @@ -/** - * @vitest-environment node - */ -import { createMockRedis, loggerMock, type MockRedis } from '@sim/testing' -import { beforeEach, describe, expect, it, vi } from 'vitest' - -vi.mock('@sim/logger', () => loggerMock) - -import { - JOB_MAX_LIFETIME_SECONDS, - JOB_RETENTION_SECONDS, - JOB_STATUS, -} from '@/lib/core/async-jobs/types' -import { RedisJobQueue } from './redis' - -describe('RedisJobQueue', () => { - let mockRedis: MockRedis - let queue: RedisJobQueue - - beforeEach(() => { - vi.clearAllMocks() - mockRedis = createMockRedis() - queue = new RedisJobQueue(mockRedis as never) - }) - - describe('enqueue', () => { - it.concurrent('should create a job with pending status', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - const jobId = await localQueue.enqueue('workflow-execution', { test: 'data' }) - - expect(jobId).toMatch(/^run_/) - expect(localRedis.hset).toHaveBeenCalledTimes(1) - - const [key, data] = localRedis.hset.mock.calls[0] - expect(key).toBe(`async-jobs:job:${jobId}`) - expect(data.status).toBe(JOB_STATUS.PENDING) - expect(data.type).toBe('workflow-execution') - }) - - it.concurrent('should set max lifetime TTL on enqueue', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - const jobId = await localQueue.enqueue('workflow-execution', { test: 'data' }) - - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_MAX_LIFETIME_SECONDS - ) - }) - }) - - describe('completeJob', () => { - it.concurrent('should set status to completed and set TTL', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const jobId = 'run_test123' - - await localQueue.completeJob(jobId, { result: 'success' }) - - expect(localRedis.hset).toHaveBeenCalledWith(`async-jobs:job:${jobId}`, { - status: JOB_STATUS.COMPLETED, - completedAt: expect.any(String), - output: JSON.stringify({ result: 'success' }), - updatedAt: expect.any(String), - }) - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_RETENTION_SECONDS - ) - }) - - it.concurrent('should set TTL to 24 hours (86400 seconds)', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.completeJob('run_test123', {}) - - expect(localRedis.expire).toHaveBeenCalledWith(expect.any(String), 86400) - }) - }) - - describe('markJobFailed', () => { - it.concurrent('should set status to failed and set TTL', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const jobId = 'run_test456' - const error = 'Something went wrong' - - await localQueue.markJobFailed(jobId, error) - - expect(localRedis.hset).toHaveBeenCalledWith(`async-jobs:job:${jobId}`, { - status: JOB_STATUS.FAILED, - completedAt: expect.any(String), - error, - updatedAt: expect.any(String), - }) - expect(localRedis.expire).toHaveBeenCalledWith( - `async-jobs:job:${jobId}`, - JOB_RETENTION_SECONDS - ) - }) - - it.concurrent('should set TTL to 24 hours (86400 seconds)', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.markJobFailed('run_test456', 'error') - - expect(localRedis.expire).toHaveBeenCalledWith(expect.any(String), 86400) - }) - }) - - describe('startJob', () => { - it.concurrent('should not set TTL when starting a job', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - - await localQueue.startJob('run_test789') - - expect(localRedis.hset).toHaveBeenCalled() - expect(localRedis.expire).not.toHaveBeenCalled() - }) - }) - - describe('getJob', () => { - it.concurrent('should return null for non-existent job', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - localRedis.hgetall.mockResolvedValue({}) - - const job = await localQueue.getJob('run_nonexistent') - - expect(job).toBeNull() - }) - - it.concurrent('should deserialize job data correctly', async () => { - const localRedis = createMockRedis() - const localQueue = new RedisJobQueue(localRedis as never) - const now = new Date() - localRedis.hgetall.mockResolvedValue({ - id: 'run_test', - type: 'workflow-execution', - payload: JSON.stringify({ foo: 'bar' }), - status: JOB_STATUS.COMPLETED, - createdAt: now.toISOString(), - startedAt: now.toISOString(), - completedAt: now.toISOString(), - attempts: '1', - maxAttempts: '3', - error: '', - output: JSON.stringify({ result: 'ok' }), - metadata: JSON.stringify({ workflowId: 'wf_123' }), - }) - - const job = await localQueue.getJob('run_test') - - expect(job).not.toBeNull() - expect(job?.id).toBe('run_test') - expect(job?.type).toBe('workflow-execution') - expect(job?.payload).toEqual({ foo: 'bar' }) - expect(job?.status).toBe(JOB_STATUS.COMPLETED) - expect(job?.output).toEqual({ result: 'ok' }) - expect(job?.metadata.workflowId).toBe('wf_123') - }) - }) -}) - -describe('JOB_RETENTION_SECONDS', () => { - it.concurrent('should be 24 hours in seconds', async () => { - expect(JOB_RETENTION_SECONDS).toBe(24 * 60 * 60) - expect(JOB_RETENTION_SECONDS).toBe(86400) - }) -}) diff --git a/apps/sim/lib/core/async-jobs/backends/redis.ts b/apps/sim/lib/core/async-jobs/backends/redis.ts deleted file mode 100644 index 6a361d0e9c0..00000000000 --- a/apps/sim/lib/core/async-jobs/backends/redis.ts +++ /dev/null @@ -1,146 +0,0 @@ -import { createLogger } from '@sim/logger' -import type Redis from 'ioredis' -import { - type EnqueueOptions, - JOB_MAX_LIFETIME_SECONDS, - JOB_RETENTION_SECONDS, - JOB_STATUS, - type Job, - type JobMetadata, - type JobQueueBackend, - type JobStatus, - type JobType, -} from '@/lib/core/async-jobs/types' - -const logger = createLogger('RedisJobQueue') - -const KEYS = { - job: (id: string) => `async-jobs:job:${id}`, -} as const - -function serializeJob(job: Job): Record { - return { - id: job.id, - type: job.type, - payload: JSON.stringify(job.payload), - status: job.status, - createdAt: job.createdAt.toISOString(), - startedAt: job.startedAt?.toISOString() ?? '', - completedAt: job.completedAt?.toISOString() ?? '', - attempts: job.attempts.toString(), - maxAttempts: job.maxAttempts.toString(), - error: job.error ?? '', - output: job.output !== undefined ? JSON.stringify(job.output) : '', - metadata: JSON.stringify(job.metadata), - updatedAt: new Date().toISOString(), - } -} - -function deserializeJob(data: Record): Job | null { - if (!data || !data.id) return null - - try { - return { - id: data.id, - type: data.type as JobType, - payload: JSON.parse(data.payload), - status: data.status as JobStatus, - createdAt: new Date(data.createdAt), - startedAt: data.startedAt ? new Date(data.startedAt) : undefined, - completedAt: data.completedAt ? new Date(data.completedAt) : undefined, - attempts: Number.parseInt(data.attempts, 10), - maxAttempts: Number.parseInt(data.maxAttempts, 10), - error: data.error || undefined, - output: data.output ? JSON.parse(data.output) : undefined, - metadata: JSON.parse(data.metadata) as JobMetadata, - } - } catch (error) { - logger.error('Failed to deserialize job', { error, data }) - return null - } -} - -export class RedisJobQueue implements JobQueueBackend { - private redis: Redis - - constructor(redis: Redis) { - this.redis = redis - } - - async enqueue( - type: JobType, - payload: TPayload, - options?: EnqueueOptions - ): Promise { - const jobId = `run_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` - const now = new Date() - - const job: Job = { - id: jobId, - type, - payload, - status: JOB_STATUS.PENDING, - createdAt: now, - attempts: 0, - maxAttempts: options?.maxAttempts ?? 3, - metadata: options?.metadata ?? {}, - } - - const key = KEYS.job(jobId) - const serialized = serializeJob(job as Job) - await this.redis.hset(key, serialized) - await this.redis.expire(key, JOB_MAX_LIFETIME_SECONDS) - - logger.debug('Enqueued job', { jobId, type }) - return jobId - } - - async getJob(jobId: string): Promise { - const data = await this.redis.hgetall(KEYS.job(jobId)) - return deserializeJob(data) - } - - async startJob(jobId: string): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.PROCESSING, - startedAt: now.toISOString(), - updatedAt: now.toISOString(), - }) - await this.redis.hincrby(key, 'attempts', 1) - - logger.debug('Started job', { jobId }) - } - - async completeJob(jobId: string, output: unknown): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.COMPLETED, - completedAt: now.toISOString(), - output: JSON.stringify(output), - updatedAt: now.toISOString(), - }) - await this.redis.expire(key, JOB_RETENTION_SECONDS) - - logger.debug('Completed job', { jobId }) - } - - async markJobFailed(jobId: string, error: string): Promise { - const now = new Date() - const key = KEYS.job(jobId) - - await this.redis.hset(key, { - status: JOB_STATUS.FAILED, - completedAt: now.toISOString(), - error, - updatedAt: now.toISOString(), - }) - await this.redis.expire(key, JOB_RETENTION_SECONDS) - - logger.debug('Marked job as failed', { jobId }) - } -} diff --git a/apps/sim/lib/core/async-jobs/config.ts b/apps/sim/lib/core/async-jobs/config.ts index 0537a6a8ef9..c4f0a4dcf83 100644 --- a/apps/sim/lib/core/async-jobs/config.ts +++ b/apps/sim/lib/core/async-jobs/config.ts @@ -1,7 +1,7 @@ import { createLogger } from '@sim/logger' import type { AsyncBackendType, JobQueueBackend } from '@/lib/core/async-jobs/types' +import { isBullMQEnabled } from '@/lib/core/bullmq' import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' -import { getRedisClient } from '@/lib/core/config/redis' const logger = createLogger('AsyncJobsConfig') @@ -11,16 +11,15 @@ let cachedInlineBackend: JobQueueBackend | null = null /** * Determines which async backend to use based on environment configuration. - * Follows the fallback chain: trigger.dev → redis → database + * Follows the fallback chain: trigger.dev → bullmq → database */ export function getAsyncBackendType(): AsyncBackendType { if (isTriggerDevEnabled) { return 'trigger-dev' } - const redis = getRedisClient() - if (redis) { - return 'redis' + if (isBullMQEnabled()) { + return 'bullmq' } return 'database' @@ -43,13 +42,9 @@ export async function getJobQueue(): Promise { cachedBackend = new TriggerDevJobQueue() break } - case 'redis': { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis client not available but redis backend was selected') - } - const { RedisJobQueue } = await import('@/lib/core/async-jobs/backends/redis') - cachedBackend = new RedisJobQueue(redis) + case 'bullmq': { + const { BullMQJobQueue } = await import('@/lib/core/async-jobs/backends/bullmq') + cachedBackend = new BullMQJobQueue() break } case 'database': { @@ -62,6 +57,10 @@ export async function getJobQueue(): Promise { cachedBackendType = type logger.info(`Async job backend initialized: ${type}`) + if (!cachedBackend) { + throw new Error(`Failed to initialize async backend: ${type}`) + } + return cachedBackend } @@ -73,20 +72,19 @@ export function getCurrentBackendType(): AsyncBackendType | null { } /** - * Gets a job queue backend that bypasses Trigger.dev (Redis -> Database). - * Used for non-polling webhooks that should always execute inline. + * Gets a job queue backend that bypasses Trigger.dev (BullMQ -> Database). + * Used for execution paths that must avoid Trigger.dev cold starts. */ export async function getInlineJobQueue(): Promise { if (cachedInlineBackend) { return cachedInlineBackend } - const redis = getRedisClient() let type: string - if (redis) { - const { RedisJobQueue } = await import('@/lib/core/async-jobs/backends/redis') - cachedInlineBackend = new RedisJobQueue(redis) - type = 'redis' + if (isBullMQEnabled()) { + const { BullMQJobQueue } = await import('@/lib/core/async-jobs/backends/bullmq') + cachedInlineBackend = new BullMQJobQueue() + type = 'bullmq' } else { const { DatabaseJobQueue } = await import('@/lib/core/async-jobs/backends/database') cachedInlineBackend = new DatabaseJobQueue() @@ -98,11 +96,15 @@ export async function getInlineJobQueue(): Promise { } /** - * Checks if jobs should be executed inline (fire-and-forget). - * For Redis/DB backends, we execute inline. Trigger.dev handles execution itself. + * Checks if jobs should be executed inline in-process. + * Database fallback is the only mode that still relies on inline execution. */ export function shouldExecuteInline(): boolean { - return getAsyncBackendType() !== 'trigger-dev' + return getAsyncBackendType() === 'database' +} + +export function shouldUseBullMQ(): boolean { + return isBullMQEnabled() } /** diff --git a/apps/sim/lib/core/async-jobs/index.ts b/apps/sim/lib/core/async-jobs/index.ts index 24e6f1e526f..76ec7072207 100644 --- a/apps/sim/lib/core/async-jobs/index.ts +++ b/apps/sim/lib/core/async-jobs/index.ts @@ -5,6 +5,7 @@ export { getJobQueue, resetJobQueueCache, shouldExecuteInline, + shouldUseBullMQ, } from './config' export type { AsyncBackendType, diff --git a/apps/sim/lib/core/async-jobs/types.ts b/apps/sim/lib/core/async-jobs/types.ts index 27137ddadc3..c4bdc27c5ef 100644 --- a/apps/sim/lib/core/async-jobs/types.ts +++ b/apps/sim/lib/core/async-jobs/types.ts @@ -62,6 +62,10 @@ export interface JobMetadata { export interface EnqueueOptions { maxAttempts?: number metadata?: JobMetadata + jobId?: string + priority?: number + name?: string + delayMs?: number } /** @@ -95,4 +99,4 @@ export interface JobQueueBackend { markJobFailed(jobId: string, error: string): Promise } -export type AsyncBackendType = 'trigger-dev' | 'redis' | 'database' +export type AsyncBackendType = 'trigger-dev' | 'bullmq' | 'database' diff --git a/apps/sim/lib/core/bullmq/connection.ts b/apps/sim/lib/core/bullmq/connection.ts new file mode 100644 index 00000000000..80def9d5cb5 --- /dev/null +++ b/apps/sim/lib/core/bullmq/connection.ts @@ -0,0 +1,29 @@ +import type { ConnectionOptions } from 'bullmq' +import { env } from '@/lib/core/config/env' + +export function isBullMQEnabled(): boolean { + return Boolean(env.REDIS_URL) +} + +export function getBullMQConnectionOptions(): ConnectionOptions { + if (!env.REDIS_URL) { + throw new Error('BullMQ requires REDIS_URL') + } + + const redisUrl = new URL(env.REDIS_URL) + const isTls = redisUrl.protocol === 'rediss:' + const port = redisUrl.port ? Number.parseInt(redisUrl.port, 10) : 6379 + const dbPath = redisUrl.pathname.replace('/', '') + const db = dbPath ? Number.parseInt(dbPath, 10) : undefined + + return { + host: redisUrl.hostname, + port, + username: redisUrl.username || undefined, + password: redisUrl.password || undefined, + db: Number.isFinite(db) ? db : undefined, + maxRetriesPerRequest: null, + enableReadyCheck: false, + ...(isTls ? { tls: {} } : {}), + } +} diff --git a/apps/sim/lib/core/bullmq/index.ts b/apps/sim/lib/core/bullmq/index.ts new file mode 100644 index 00000000000..efe937aa476 --- /dev/null +++ b/apps/sim/lib/core/bullmq/index.ts @@ -0,0 +1,16 @@ +export { getBullMQConnectionOptions, isBullMQEnabled } from './connection' +export { + type BullMQJobData, + createBullMQJobData, + getBullMQQueue, + getBullMQQueueByName, + getKnowledgeConnectorSyncQueue, + getKnowledgeDocumentProcessingQueue, + getMothershipJobExecutionQueue, + getWorkflowQueueEvents, + getWorkspaceNotificationDeliveryQueue, + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from './queues' diff --git a/apps/sim/lib/core/bullmq/queues.ts b/apps/sim/lib/core/bullmq/queues.ts new file mode 100644 index 00000000000..2278a309d95 --- /dev/null +++ b/apps/sim/lib/core/bullmq/queues.ts @@ -0,0 +1,196 @@ +import { Queue, QueueEvents } from 'bullmq' +import type { JobMetadata, JobType } from '@/lib/core/async-jobs/types' +import { getBullMQConnectionOptions } from '@/lib/core/bullmq/connection' +import type { WorkspaceDispatchQueueName } from '@/lib/core/workspace-dispatch/types' + +export const KNOWLEDGE_CONNECTOR_SYNC_QUEUE = 'knowledge-connector-sync' as const +export const KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE = 'knowledge-process-document' as const +export const MOTHERSHIP_JOB_EXECUTION_QUEUE = 'mothership-job-execution' as const +export const WORKSPACE_NOTIFICATION_DELIVERY_QUEUE = 'workspace-notification-delivery' as const + +export interface BullMQJobData { + payload: TPayload + metadata?: JobMetadata +} + +let workflowQueueInstance: Queue | null = null +let webhookQueueInstance: Queue | null = null +let scheduleQueueInstance: Queue | null = null +let knowledgeConnectorSyncQueueInstance: Queue | null = null +let knowledgeDocumentProcessingQueueInstance: Queue | null = null +let mothershipJobExecutionQueueInstance: Queue | null = null +let workspaceNotificationDeliveryQueueInstance: Queue | null = null +let workflowQueueEventsInstance: QueueEvents | null = null + +function getQueueDefaultOptions(type: JobType) { + switch (type) { + case 'workflow-execution': + return { + attempts: 3, + backoff: { type: 'exponential' as const, delay: 1000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + } + case 'webhook-execution': + return { + attempts: 2, + backoff: { type: 'exponential' as const, delay: 2000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 3 * 24 * 60 * 60 }, + } + case 'schedule-execution': + return { + attempts: 2, + backoff: { type: 'exponential' as const, delay: 5000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 3 * 24 * 60 * 60 }, + } + } +} + +function createQueue(type: JobType): Queue { + return new Queue(type, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: getQueueDefaultOptions(type), + }) +} + +function createNamedQueue( + name: + | typeof KNOWLEDGE_CONNECTOR_SYNC_QUEUE + | typeof KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE + | typeof MOTHERSHIP_JOB_EXECUTION_QUEUE + | typeof WORKSPACE_NOTIFICATION_DELIVERY_QUEUE +): Queue { + switch (name) { + case KNOWLEDGE_CONNECTOR_SYNC_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 3, + backoff: { type: 'exponential', delay: 5000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 3, + backoff: { type: 'exponential', delay: 1000 }, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case MOTHERSHIP_JOB_EXECUTION_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 1, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + case WORKSPACE_NOTIFICATION_DELIVERY_QUEUE: + return new Queue(name, { + connection: getBullMQConnectionOptions(), + defaultJobOptions: { + attempts: 1, + removeOnComplete: { age: 24 * 60 * 60 }, + removeOnFail: { age: 7 * 24 * 60 * 60 }, + }, + }) + } +} + +export function getBullMQQueue(type: JobType): Queue { + switch (type) { + case 'workflow-execution': + if (!workflowQueueInstance) { + workflowQueueInstance = createQueue(type) + } + return workflowQueueInstance + case 'webhook-execution': + if (!webhookQueueInstance) { + webhookQueueInstance = createQueue(type) + } + return webhookQueueInstance + case 'schedule-execution': + if (!scheduleQueueInstance) { + scheduleQueueInstance = createQueue(type) + } + return scheduleQueueInstance + } +} + +export function getBullMQQueueByName(queueName: WorkspaceDispatchQueueName): Queue { + switch (queueName) { + case 'workflow-execution': + case 'webhook-execution': + case 'schedule-execution': + return getBullMQQueue(queueName) + case KNOWLEDGE_CONNECTOR_SYNC_QUEUE: + return getKnowledgeConnectorSyncQueue() + case KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE: + return getKnowledgeDocumentProcessingQueue() + case MOTHERSHIP_JOB_EXECUTION_QUEUE: + return getMothershipJobExecutionQueue() + case WORKSPACE_NOTIFICATION_DELIVERY_QUEUE: + return getWorkspaceNotificationDeliveryQueue() + } +} + +export function getWorkflowQueueEvents(): QueueEvents { + if (!workflowQueueEventsInstance) { + workflowQueueEventsInstance = new QueueEvents('workflow-execution', { + connection: getBullMQConnectionOptions(), + }) + } + + return workflowQueueEventsInstance +} + +export function getKnowledgeConnectorSyncQueue(): Queue { + if (!knowledgeConnectorSyncQueueInstance) { + knowledgeConnectorSyncQueueInstance = createNamedQueue(KNOWLEDGE_CONNECTOR_SYNC_QUEUE) + } + + return knowledgeConnectorSyncQueueInstance +} + +export function getKnowledgeDocumentProcessingQueue(): Queue { + if (!knowledgeDocumentProcessingQueueInstance) { + knowledgeDocumentProcessingQueueInstance = createNamedQueue(KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE) + } + + return knowledgeDocumentProcessingQueueInstance +} + +export function getMothershipJobExecutionQueue(): Queue { + if (!mothershipJobExecutionQueueInstance) { + mothershipJobExecutionQueueInstance = createNamedQueue(MOTHERSHIP_JOB_EXECUTION_QUEUE) + } + + return mothershipJobExecutionQueueInstance +} + +export function getWorkspaceNotificationDeliveryQueue(): Queue { + if (!workspaceNotificationDeliveryQueueInstance) { + workspaceNotificationDeliveryQueueInstance = createNamedQueue( + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE + ) + } + + return workspaceNotificationDeliveryQueueInstance +} + +export function createBullMQJobData( + payload: TPayload, + metadata?: JobMetadata +): BullMQJobData { + return { + payload, + metadata: metadata ?? {}, + } +} diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts index 65ac812ec86..4e9bd27feda 100644 --- a/apps/sim/lib/core/config/env.ts +++ b/apps/sim/lib/core/config/env.ts @@ -183,6 +183,11 @@ export const env = createEnv({ // Data Retention FREE_PLAN_LOG_RETENTION_DAYS: z.string().optional(), // Log retention days for free plan users + // Admission & Burst Protection + ADMISSION_GATE_MAX_INFLIGHT: z.string().optional().default('500'), // Max concurrent in-flight execution requests per pod + DISPATCH_MAX_QUEUE_PER_WORKSPACE: z.string().optional().default('1000'), // Max queued dispatch jobs per workspace + DISPATCH_MAX_QUEUE_GLOBAL: z.string().optional().default('50000'), // Max queued dispatch jobs globally + // Rate Limiting Configuration RATE_LIMIT_WINDOW_MS: z.string().optional().default('60000'), // Rate limit window duration in milliseconds (default: 1 minute) MANUAL_EXECUTION_LIMIT: z.string().optional().default('999999'),// Manual execution bypass value (effectively unlimited) @@ -194,6 +199,10 @@ export const env = createEnv({ RATE_LIMIT_TEAM_ASYNC: z.string().optional().default('2500'), // Team tier async API executions per minute RATE_LIMIT_ENTERPRISE_SYNC: z.string().optional().default('600'), // Enterprise tier sync API executions per minute RATE_LIMIT_ENTERPRISE_ASYNC: z.string().optional().default('5000'), // Enterprise tier async API executions per minute + WORKSPACE_CONCURRENCY_FREE: z.string().optional().default('5'), // Free tier concurrent workspace executions + WORKSPACE_CONCURRENCY_PRO: z.string().optional().default('50'), // Pro tier concurrent workspace executions + WORKSPACE_CONCURRENCY_TEAM: z.string().optional().default('200'), // Team/Max tier concurrent workspace executions + WORKSPACE_CONCURRENCY_ENTERPRISE: z.string().optional().default('200'), // Enterprise default concurrent workspace executions // Timeout Configuration EXECUTION_TIMEOUT_FREE: z.string().optional().default('300'), // 5 minutes diff --git a/apps/sim/lib/core/workspace-dispatch/adapter.ts b/apps/sim/lib/core/workspace-dispatch/adapter.ts new file mode 100644 index 00000000000..6d55a70ff3c --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/adapter.ts @@ -0,0 +1,80 @@ +import type { + WorkspaceDispatchClaimResult, + WorkspaceDispatchEnqueueInput, + WorkspaceDispatchJobRecord, + WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +export interface WorkspaceDispatchStorageAdapter { + saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise + getDispatchJobRecord(jobId: string): Promise + listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise + updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise + enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise + restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise + claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise + getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + getGlobalQueueDepth(): Promise + reconcileGlobalQueueDepth(knownCount: number): Promise + popNextWorkspaceId(): Promise + getQueuedWorkspaceCount(): Promise + hasActiveWorkspace(workspaceId: string): Promise + ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise + requeueWorkspaceId(workspaceId: string): Promise + workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise + removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise + cleanupExpiredWorkspaceLeases(workspaceId: string): Promise + countActiveWorkspaceLeases(workspaceId: string): Promise + hasWorkspaceLease(workspaceId: string, leaseId: string): Promise + createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise + refreshWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise + releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise + removeWorkspaceIfIdle(workspaceId: string, lanes: readonly WorkspaceDispatchLane[]): Promise + markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise + markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise + markDispatchJobRunning(jobId: string): Promise + markDispatchJobCompleted(jobId: string, output: unknown): Promise + markDispatchJobFailed(jobId: string, error: string): Promise + clear(): Promise + dispose(): void +} diff --git a/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts b/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts new file mode 100644 index 00000000000..6daa485f918 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/dispatcher.test.ts @@ -0,0 +1,175 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { mockGetWorkspaceConcurrencyLimit, mockAcquireLock, mockReleaseLock } = vi.hoisted(() => ({ + mockGetWorkspaceConcurrencyLimit: vi.fn(), + mockAcquireLock: vi.fn(), + mockReleaseLock: vi.fn(), +})) + +vi.mock('@/lib/billing/workspace-concurrency', () => ({ + getWorkspaceConcurrencyLimit: mockGetWorkspaceConcurrencyLimit, +})) + +vi.mock('@/lib/core/config/redis', () => ({ + acquireLock: mockAcquireLock, + releaseLock: mockReleaseLock, + getRedisClient: vi.fn().mockReturnValue(null), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + getBullMQQueueByName: vi.fn().mockReturnValue({ + add: vi.fn().mockResolvedValue({ id: 'bullmq-1' }), + }), +})) + +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' +import { + DISPATCH_SCAN_RESULTS, + dispatchNextAdmissibleWorkspaceJob, +} from '@/lib/core/workspace-dispatch/planner' +import { + enqueueWorkspaceDispatchJob, + setWorkspaceDispatchStorageAdapter, +} from '@/lib/core/workspace-dispatch/store' + +describe('workspace dispatch integration (memory-backed)', () => { + let store: MemoryWorkspaceDispatchStorage + + beforeEach(async () => { + vi.clearAllMocks() + store = new MemoryWorkspaceDispatchStorage() + setWorkspaceDispatchStorageAdapter(store) + + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(5) + mockAcquireLock.mockResolvedValue(true) + mockReleaseLock.mockResolvedValue(true) + }) + + async function enqueue( + workspaceId: string, + overrides: { lane?: string; delayMs?: number; priority?: number } = {} + ) { + return enqueueWorkspaceDispatchJob({ + workspaceId, + lane: (overrides.lane ?? 'runtime') as 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'wf-1' } }, + metadata: { workflowId: 'wf-1' }, + delayMs: overrides.delayMs, + priority: overrides.priority, + }) + } + + it('admits jobs round-robin across workspaces', async () => { + await enqueue('ws-a') + await enqueue('ws-b') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + const r2 = await dispatchNextAdmissibleWorkspaceJob() + const r3 = await dispatchNextAdmissibleWorkspaceJob() + + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + expect(r3).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('respects workspace concurrency limits', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + await enqueue('ws-a') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.NO_PROGRESS) + }) + + it('skips delayed jobs and admits ready ones in same lane', async () => { + await enqueue('ws-a', { delayMs: 60_000 }) + await enqueue('ws-a', { delayMs: 0 }) + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('returns delayed when all jobs are delayed', async () => { + await enqueue('ws-a', { delayMs: 60_000 }) + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.NO_PROGRESS) + }) + + it('returns no_workspace when queue is empty', async () => { + const result = await dispatchNextAdmissibleWorkspaceJob() + expect(result).toBe(DISPATCH_SCAN_RESULTS.NO_WORKSPACE) + }) + + it('lease cleanup frees capacity for new admissions', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + const record = await enqueue('ws-a') + await enqueue('ws-a') + + const r1 = await dispatchNextAdmissibleWorkspaceJob() + expect(r1).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + + const updated = await store.getDispatchJobRecord(record.id) + if (updated?.lease) { + await store.releaseWorkspaceLease('ws-a', updated.lease.leaseId) + } + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('expired leases are cleaned up during claim', async () => { + mockGetWorkspaceConcurrencyLimit.mockResolvedValue(1) + + await enqueue('ws-a') + await enqueue('ws-a') + + const claimResult = await store.claimWorkspaceJob('ws-a', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'old-lease', + now: Date.now(), + leaseTtlMs: 1, + }) + expect(claimResult.type).toBe('admitted') + + await new Promise((resolve) => setTimeout(resolve, 10)) + + const r2 = await dispatchNextAdmissibleWorkspaceJob() + expect(r2).toBe(DISPATCH_SCAN_RESULTS.ADMITTED) + }) + + it('recovers job to waiting via restoreWorkspaceDispatchJob', async () => { + const record = await enqueue('ws-a') + + await store.claimWorkspaceJob('ws-a', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-1', + now: Date.now(), + leaseTtlMs: 1000, + }) + + await store.markDispatchJobAdmitted(record.id, 'ws-a', 'lease-1', Date.now() + 10000) + + const admitted = await store.getDispatchJobRecord(record.id) + expect(admitted).toBeDefined() + const resetRecord = { ...admitted!, status: 'waiting' as const, lease: undefined } + await store.restoreWorkspaceDispatchJob(resetRecord) + + const restored = await store.getDispatchJobRecord(record.id) + expect(restored?.status).toBe('waiting') + expect(restored?.lease).toBeUndefined() + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/dispatcher.ts b/apps/sim/lib/core/workspace-dispatch/dispatcher.ts new file mode 100644 index 00000000000..1122107ea4b --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/dispatcher.ts @@ -0,0 +1,156 @@ +import { createLogger } from '@sim/logger' +import { env } from '@/lib/core/config/env' +import { + enqueueWorkspaceDispatchJob, + getDispatchJobRecord, + getGlobalQueueDepth, + getQueuedWorkspaceCount, + getWorkspaceQueueDepth, +} from '@/lib/core/workspace-dispatch/store' +import { + WORKSPACE_DISPATCH_LANES, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, +} from '@/lib/core/workspace-dispatch/types' +import { DISPATCH_SCAN_RESULTS, dispatchNextAdmissibleWorkspaceJob } from './planner' +import { reconcileWorkspaceDispatchState } from './reconciler' + +const logger = createLogger('WorkspaceDispatcher') +const WAIT_POLL_INTERVAL_MS = 250 +const RECONCILE_INTERVAL_MS = 30_000 +const MAX_QUEUE_PER_WORKSPACE = Number.parseInt(env.DISPATCH_MAX_QUEUE_PER_WORKSPACE ?? '') || 1000 +const MAX_QUEUE_GLOBAL = Number.parseInt(env.DISPATCH_MAX_QUEUE_GLOBAL ?? '') || 50_000 + +let dispatcherRunning = false +let dispatcherWakePending = false +let lastReconcileAt = 0 + +async function runDispatcherLoop(): Promise { + if (dispatcherRunning) { + dispatcherWakePending = true + return + } + + dispatcherRunning = true + + try { + const now = Date.now() + if (now - lastReconcileAt >= RECONCILE_INTERVAL_MS) { + await reconcileWorkspaceDispatchState() + lastReconcileAt = now + } + + do { + dispatcherWakePending = false + const queuedWorkspaces = await getQueuedWorkspaceCount() + if (queuedWorkspaces === 0) { + continue + } + + let admitted = 0 + let scanned = 0 + const loopStartMs = Date.now() + + for (let index = 0; index < queuedWorkspaces; index++) { + scanned++ + const result = await dispatchNextAdmissibleWorkspaceJob() + if (result === DISPATCH_SCAN_RESULTS.ADMITTED) { + admitted++ + } + if (result === DISPATCH_SCAN_RESULTS.NO_WORKSPACE) { + break + } + } + + if (admitted > 0) { + dispatcherWakePending = true + } + + if (admitted > 0 || scanned > 0) { + logger.info('Dispatcher pass', { + admitted, + scanned, + queuedWorkspaces, + durationMs: Date.now() - loopStartMs, + }) + } + } while (dispatcherWakePending) + } catch (error) { + logger.error('Workspace dispatcher loop failed', { error }) + } finally { + dispatcherRunning = false + } +} + +export class DispatchQueueFullError extends Error { + readonly statusCode = 503 + + constructor( + readonly scope: 'workspace' | 'global', + readonly depth: number, + readonly limit: number + ) { + super( + scope === 'workspace' + ? `Workspace queue is at capacity (${depth}/${limit})` + : `Global dispatch queue is at capacity (${depth}/${limit})` + ) + this.name = 'DispatchQueueFullError' + } +} + +export async function enqueueWorkspaceDispatch( + input: WorkspaceDispatchEnqueueInput +): Promise { + const [workspaceDepth, globalDepth] = await Promise.all([ + getWorkspaceQueueDepth(input.workspaceId, WORKSPACE_DISPATCH_LANES), + getGlobalQueueDepth(), + ]) + + if (workspaceDepth >= MAX_QUEUE_PER_WORKSPACE) { + logger.warn('Workspace dispatch queue at capacity', { + workspaceId: input.workspaceId, + depth: workspaceDepth, + limit: MAX_QUEUE_PER_WORKSPACE, + }) + throw new DispatchQueueFullError('workspace', workspaceDepth, MAX_QUEUE_PER_WORKSPACE) + } + + if (globalDepth >= MAX_QUEUE_GLOBAL) { + logger.warn('Global dispatch queue at capacity', { + depth: globalDepth, + limit: MAX_QUEUE_GLOBAL, + }) + throw new DispatchQueueFullError('global', globalDepth, MAX_QUEUE_GLOBAL) + } + + const record = await enqueueWorkspaceDispatchJob(input) + void runDispatcherLoop() + return record.id +} + +export async function wakeWorkspaceDispatcher(): Promise { + await runDispatcherLoop() +} + +export async function waitForDispatchJob( + dispatchJobId: string, + timeoutMs: number +): Promise { + const deadline = Date.now() + timeoutMs + + while (Date.now() < deadline) { + const record = await getDispatchJobRecord(dispatchJobId) + if (!record) { + throw new Error(`Dispatch job not found: ${dispatchJobId}`) + } + + if (record.status === 'completed' || record.status === 'failed') { + return record + } + + await new Promise((resolve) => setTimeout(resolve, WAIT_POLL_INTERVAL_MS)) + } + + throw new Error(`Timed out waiting for dispatch job ${dispatchJobId}`) +} diff --git a/apps/sim/lib/core/workspace-dispatch/factory.ts b/apps/sim/lib/core/workspace-dispatch/factory.ts new file mode 100644 index 00000000000..3a07c68cf01 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/factory.ts @@ -0,0 +1,42 @@ +import { createLogger } from '@sim/logger' +import { getRedisClient } from '@/lib/core/config/redis' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' +import { RedisWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/redis-store' + +const logger = createLogger('WorkspaceDispatchFactory') + +let cachedAdapter: WorkspaceDispatchStorageAdapter | null = null + +export function createWorkspaceDispatchStorageAdapter(): WorkspaceDispatchStorageAdapter { + if (cachedAdapter) { + return cachedAdapter + } + + const redis = getRedisClient() + + if (redis) { + logger.info('Workspace dispatcher: Using Redis storage') + const adapter = new RedisWorkspaceDispatchStorage(redis) + cachedAdapter = adapter + return adapter + } + + logger.warn( + 'Workspace dispatcher: Using in-memory storage; distributed fairness is disabled in multi-process deployments' + ) + const adapter = new MemoryWorkspaceDispatchStorage() + cachedAdapter = adapter + return adapter +} + +export function setWorkspaceDispatchStorageAdapter(adapter: WorkspaceDispatchStorageAdapter): void { + cachedAdapter = adapter +} + +export function resetWorkspaceDispatchStorageAdapter(): void { + if (cachedAdapter) { + cachedAdapter.dispose() + cachedAdapter = null + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/index.ts b/apps/sim/lib/core/workspace-dispatch/index.ts new file mode 100644 index 00000000000..74645372c9f --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/index.ts @@ -0,0 +1,32 @@ +export type { WorkspaceDispatchStorageAdapter } from './adapter' +export { + DispatchQueueFullError, + enqueueWorkspaceDispatch, + waitForDispatchJob, + wakeWorkspaceDispatcher, +} from './dispatcher' +export { + createWorkspaceDispatchStorageAdapter, + resetWorkspaceDispatchStorageAdapter, +} from './factory' +export { + markDispatchJobAdmitted, + markDispatchJobAdmitting, + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + refreshWorkspaceLease, + releaseWorkspaceLease, +} from './store' +export { + WORKSPACE_DISPATCH_LANES, + WORKSPACE_DISPATCH_STATUSES, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobContext, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, + type WorkspaceDispatchLeaseInfo, + type WorkspaceDispatchQueueName, + type WorkspaceDispatchStatus, +} from './types' +export { getDispatchRuntimeMetadata, runDispatchedJob } from './worker' diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts new file mode 100644 index 00000000000..87a54de26d1 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.test.ts @@ -0,0 +1,65 @@ +/** + * @vitest-environment node + */ +import { afterEach, describe, expect, it } from 'vitest' +import { MemoryWorkspaceDispatchStorage } from '@/lib/core/workspace-dispatch/memory-store' + +describe('memory workspace dispatch storage', () => { + const store = new MemoryWorkspaceDispatchStorage() + + afterEach(async () => { + await store.clear() + }) + + it('claims a runnable job and marks it admitting with a lease', async () => { + const record = await store.enqueueWorkspaceDispatchJob({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'workflow-1' } }, + metadata: { + workflowId: 'workflow-1', + }, + }) + + const result = await store.claimWorkspaceJob('workspace-1', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-1', + now: Date.now(), + leaseTtlMs: 1000, + }) + + expect(result.type).toBe('admitted') + if (result.type === 'admitted') { + expect(result.record.id).toBe(record.id) + expect(result.record.status).toBe('admitting') + expect(result.record.lease?.leaseId).toBe('lease-1') + } + }) + + it('returns delayed when only delayed jobs exist', async () => { + await store.enqueueWorkspaceDispatchJob({ + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: { payload: { workflowId: 'workflow-1' } }, + metadata: { + workflowId: 'workflow-1', + }, + delayMs: 5000, + }) + + const result = await store.claimWorkspaceJob('workspace-1', { + lanes: ['runtime'], + concurrencyLimit: 1, + leaseId: 'lease-2', + now: Date.now(), + leaseTtlMs: 1000, + }) + + expect(result.type).toBe('delayed') + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/memory-store.ts b/apps/sim/lib/core/workspace-dispatch/memory-store.ts new file mode 100644 index 00000000000..1c874d091be --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/memory-store.ts @@ -0,0 +1,505 @@ +import { createLogger } from '@sim/logger' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + type WorkspaceDispatchClaimResult, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchMemoryStore') +const JOB_TTL_MS = 48 * 60 * 60 * 1000 + +export class MemoryWorkspaceDispatchStorage implements WorkspaceDispatchStorageAdapter { + private jobs = new Map() + private workspaceOrder: string[] = [] + private laneQueues = new Map() + private leases = new Map>() + private cleanupInterval: NodeJS.Timeout | null = null + + constructor() { + this.cleanupInterval = setInterval(() => { + void this.clearExpiredState() + }, 60_000) + this.cleanupInterval.unref() + } + + private queueKey(workspaceId: string, lane: WorkspaceDispatchLane): string { + return `${workspaceId}:${lane}` + } + + private ensureWorkspaceQueued(workspaceId: string): void { + if (!this.workspaceOrder.includes(workspaceId)) { + this.workspaceOrder.push(workspaceId) + } + } + + private getLaneQueue(workspaceId: string, lane: WorkspaceDispatchLane): string[] { + const key = this.queueKey(workspaceId, lane) + const existing = this.laneQueues.get(key) + if (existing) { + return existing + } + + const queue: string[] = [] + this.laneQueues.set(key, queue) + return queue + } + + private sortQueue(queue: string[]): void { + queue.sort((leftId, rightId) => { + const left = this.jobs.get(leftId) + const right = this.jobs.get(rightId) + if (!left || !right) { + return 0 + } + + if (left.priority !== right.priority) { + return left.priority - right.priority + } + + return left.createdAt - right.createdAt + }) + } + + private getLeaseMap(workspaceId: string): Map { + const existing = this.leases.get(workspaceId) + if (existing) { + return existing + } + + const leaseMap = new Map() + this.leases.set(workspaceId, leaseMap) + return leaseMap + } + + private async clearExpiredState(): Promise { + const now = Date.now() + + for (const [jobId, record] of this.jobs.entries()) { + if ( + (record.status === 'completed' || record.status === 'failed') && + record.completedAt && + now - record.completedAt > JOB_TTL_MS + ) { + this.jobs.delete(jobId) + } + } + + for (const [workspaceId, leaseMap] of this.leases.entries()) { + for (const [leaseId, expiresAt] of leaseMap.entries()) { + if (expiresAt <= now) { + leaseMap.delete(leaseId) + } + } + if (leaseMap.size === 0) { + this.leases.delete(workspaceId) + } + } + } + + async saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + this.jobs.set(record.id, record) + } + + async getDispatchJobRecord(jobId: string): Promise { + return this.jobs.get(jobId) ?? null + } + + async listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise { + return Array.from(this.jobs.values()).filter((record) => statuses.includes(record.status)) + } + + private static readonly TERMINAL_STATUSES = new Set(['completed', 'failed']) + + async updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise { + const current = this.jobs.get(jobId) + if (!current) { + return null + } + + const updated = updater(current) + if ( + MemoryWorkspaceDispatchStorage.TERMINAL_STATUSES.has(current.status) && + !MemoryWorkspaceDispatchStorage.TERMINAL_STATUSES.has(updated.status) + ) { + return current + } + + this.jobs.set(jobId, updated) + return updated + } + + async enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise { + const id = input.id ?? `dispatch_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` + const createdAt = Date.now() + + const record: WorkspaceDispatchJobRecord = { + id, + workspaceId: input.workspaceId, + lane: input.lane, + queueName: input.queueName, + bullmqJobName: input.bullmqJobName, + bullmqPayload: input.bullmqPayload, + metadata: input.metadata, + priority: input.priority ?? 100, + maxAttempts: input.maxAttempts, + delayMs: input.delayMs, + status: 'waiting', + createdAt, + } + + this.jobs.set(id, record) + const queue = this.getLaneQueue(record.workspaceId, record.lane) + queue.push(id) + this.sortQueue(queue) + this.ensureWorkspaceQueued(record.workspaceId) + return record + } + + async restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + this.jobs.set(record.id, record) + const queue = this.getLaneQueue(record.workspaceId, record.lane) + if (!queue.includes(record.id)) { + queue.push(record.id) + this.sortQueue(queue) + } + this.ensureWorkspaceQueued(record.workspaceId) + } + + async claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + if (this.getLeaseMap(workspaceId).size >= options.concurrencyLimit) { + this.ensureWorkspaceQueued(workspaceId) + return { type: WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED } + } + + let selectedRecord: WorkspaceDispatchJobRecord | null = null + let selectedLane: WorkspaceDispatchLane | null = null + let nextReadyAt: number | null = null + + for (const lane of options.lanes) { + const queue = this.getLaneQueue(workspaceId, lane) + for (let scanIndex = 0; scanIndex < queue.length && scanIndex < 20; ) { + const jobId = queue[scanIndex] + const record = this.jobs.get(jobId) + if (!record) { + queue.splice(scanIndex, 1) + continue + } + + const readyAt = record.createdAt + (record.delayMs ?? 0) + if (readyAt <= options.now) { + selectedRecord = record + selectedLane = lane + queue.splice(scanIndex, 1) + break + } + + nextReadyAt = nextReadyAt ? Math.min(nextReadyAt, readyAt) : readyAt + scanIndex++ + } + + if (selectedRecord) { + break + } + } + + if (!selectedRecord || !selectedLane) { + const hasPending = await this.workspaceHasPendingJobs(workspaceId, options.lanes) + if (!hasPending) { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + return { type: WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY } + } + + this.ensureWorkspaceQueued(workspaceId) + return { + type: WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED, + nextReadyAt: nextReadyAt ?? options.now, + } + } + + const leaseExpiresAt = options.now + options.leaseTtlMs + this.getLeaseMap(workspaceId).set(options.leaseId, leaseExpiresAt) + + const updatedRecord: WorkspaceDispatchJobRecord = { + ...selectedRecord, + status: 'admitting', + lease: { + workspaceId, + leaseId: options.leaseId, + }, + metadata: { + ...selectedRecord.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } + this.jobs.set(updatedRecord.id, updatedRecord) + + const hasPending = await this.workspaceHasPendingJobs(workspaceId, options.lanes) + if (hasPending) { + this.ensureWorkspaceQueued(workspaceId) + } else { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + } + + return { + type: WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED, + record: updatedRecord, + leaseId: options.leaseId, + leaseExpiresAt, + } + } + + async getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + let depth = 0 + for (const lane of lanes) { + depth += this.getLaneQueue(workspaceId, lane).length + } + return depth + } + + async getGlobalQueueDepth(): Promise { + const terminalStatuses = new Set(['completed', 'failed']) + let count = 0 + for (const job of this.jobs.values()) { + if (!terminalStatuses.has(job.status)) { + count++ + } + } + return count + } + + async reconcileGlobalQueueDepth(_knownCount: number): Promise { + // no-op: memory store computes depth on the fly + } + + async popNextWorkspaceId(): Promise { + const now = Date.now() + const maxScans = this.workspaceOrder.length + for (let i = 0; i < maxScans; i++) { + const id = this.workspaceOrder.shift() + if (!id) return null + const readyAt = this.workspaceReadyAt.get(id) + if (readyAt && readyAt > now) { + this.workspaceOrder.push(id) + continue + } + this.workspaceReadyAt.delete(id) + return id + } + return null + } + + async getQueuedWorkspaceCount(): Promise { + return this.workspaceOrder.length + } + + async hasActiveWorkspace(workspaceId: string): Promise { + return this.workspaceOrder.includes(workspaceId) + } + + private workspaceReadyAt = new Map() + + async ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + if (readyAt && readyAt > Date.now()) { + this.workspaceReadyAt.set(workspaceId, readyAt) + } + this.ensureWorkspaceQueued(workspaceId) + } + + async requeueWorkspaceId(workspaceId: string): Promise { + this.ensureWorkspaceQueued(workspaceId) + } + + async workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + return lanes.some((lane) => this.getLaneQueue(workspaceId, lane).length > 0) + } + + async getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const queue = this.getLaneQueue(workspaceId, lane) + while (queue.length > 0) { + const jobId = queue[0] + const job = this.jobs.get(jobId) + if (job) { + return job + } + queue.shift() + } + } + + return null + } + + async removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise { + const queue = this.getLaneQueue(workspaceId, lane) + const index = queue.indexOf(jobId) + if (index >= 0) { + queue.splice(index, 1) + } + } + + async cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + const leaseMap = this.getLeaseMap(workspaceId) + const now = Date.now() + for (const [leaseId, expiresAt] of leaseMap.entries()) { + if (expiresAt <= now) { + leaseMap.delete(leaseId) + } + } + } + + async countActiveWorkspaceLeases(workspaceId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.getLeaseMap(workspaceId).size + } + + async hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.getLeaseMap(workspaceId).has(leaseId) + } + + async createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise { + const expiresAt = Date.now() + ttlMs + this.getLeaseMap(workspaceId).set(leaseId, expiresAt) + return expiresAt + } + + async refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number + ): Promise { + return this.createWorkspaceLease(workspaceId, leaseId, ttlMs) + } + + async releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + this.getLeaseMap(workspaceId).delete(leaseId) + } + + async removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + const hasPending = await this.workspaceHasPendingJobs(workspaceId, lanes) + if (!hasPending) { + this.workspaceOrder = this.workspaceOrder.filter((value) => value !== workspaceId) + } + } + + async markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitted', + admittedAt: Date.now(), + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitting', + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobRunning(jobId: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'running', + startedAt: record.startedAt ?? Date.now(), + })) + } + + async markDispatchJobCompleted(jobId: string, output: unknown): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'completed', + completedAt: Date.now(), + output, + })) + } + + async markDispatchJobFailed(jobId: string, error: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'failed', + completedAt: Date.now(), + error, + })) + } + + async clear(): Promise { + this.jobs.clear() + this.workspaceOrder = [] + this.laneQueues.clear() + this.leases.clear() + this.workspaceReadyAt.clear() + } + + dispose(): void { + if (this.cleanupInterval) { + clearInterval(this.cleanupInterval) + this.cleanupInterval = null + } + void this.clear().catch((error) => { + logger.error('Failed to clear memory workspace dispatch storage', { error }) + }) + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/planner.ts b/apps/sim/lib/core/workspace-dispatch/planner.ts new file mode 100644 index 00000000000..8ba42f83a0a --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/planner.ts @@ -0,0 +1,154 @@ +import { createLogger } from '@sim/logger' +import { getWorkspaceConcurrencyLimit } from '@/lib/billing/workspace-concurrency' +import { type BullMQJobData, getBullMQQueueByName } from '@/lib/core/bullmq' +import { acquireLock, releaseLock } from '@/lib/core/config/redis' +import { + claimWorkspaceJob, + markDispatchJobAdmitted, + popNextWorkspaceId, + releaseWorkspaceLease, + removeWorkspaceIfIdle, + requeueWorkspaceId, +} from '@/lib/core/workspace-dispatch/store' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + WORKSPACE_DISPATCH_LANES, + type WorkspaceDispatchJobRecord, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchPlanner') + +const LEASE_TTL_MS = 15 * 60 * 1000 +const WORKSPACE_CLAIM_LOCK_TTL_SECONDS = 10 + +export const DISPATCH_SCAN_RESULTS = { + NO_WORKSPACE: 'no_workspace', + NO_PROGRESS: 'no_progress', + ADMITTED: 'admitted', +} as const + +export type DispatchScanResult = (typeof DISPATCH_SCAN_RESULTS)[keyof typeof DISPATCH_SCAN_RESULTS] + +function attachDispatchMetadata( + bullmqPayload: unknown, + record: WorkspaceDispatchJobRecord, + leaseId: string, + leaseExpiresAt: number +): BullMQJobData { + if ( + bullmqPayload && + typeof bullmqPayload === 'object' && + 'payload' in bullmqPayload && + 'metadata' in bullmqPayload + ) { + const data = bullmqPayload as BullMQJobData + return { + payload: data.payload, + metadata: { + ...(data.metadata ?? {}), + dispatchJobId: record.id, + dispatchWorkspaceId: record.workspaceId, + dispatchLeaseId: leaseId, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } + } + + return { + payload: bullmqPayload, + metadata: { + ...record.metadata, + dispatchJobId: record.id, + dispatchWorkspaceId: record.workspaceId, + dispatchLeaseId: leaseId, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + } +} + +async function finalizeAdmittedJob( + record: WorkspaceDispatchJobRecord, + leaseId: string, + leaseExpiresAt: number +): Promise { + try { + await getBullMQQueueByName(record.queueName).add( + record.bullmqJobName, + attachDispatchMetadata(record.bullmqPayload, record, leaseId, leaseExpiresAt), + { + jobId: record.id, + attempts: record.maxAttempts, + priority: record.priority, + } + ) + + await markDispatchJobAdmitted(record.id, record.workspaceId, leaseId, leaseExpiresAt) + } catch (error) { + await releaseWorkspaceLease(record.workspaceId, leaseId).catch(() => undefined) + throw error + } +} + +export async function dispatchNextAdmissibleWorkspaceJob(): Promise { + const workspaceId = await popNextWorkspaceId() + if (!workspaceId) { + return DISPATCH_SCAN_RESULTS.NO_WORKSPACE + } + + const lockValue = `lock_${crypto.randomUUID()}` + try { + const lockKey = `workspace-dispatch:claim-lock:${workspaceId}` + const acquired = await acquireLock(lockKey, lockValue, WORKSPACE_CLAIM_LOCK_TTL_SECONDS) + if (!acquired) { + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + } + + const limit = await getWorkspaceConcurrencyLimit(workspaceId) + const leaseId = `lease_${crypto.randomUUID()}` + const claimResult = await claimWorkspaceJob(workspaceId, { + lanes: WORKSPACE_DISPATCH_LANES, + concurrencyLimit: limit, + leaseId, + now: Date.now(), + leaseTtlMs: LEASE_TTL_MS, + }) + + switch (claimResult.type) { + case WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED: + logger.debug('Workspace concurrency limit reached', { workspaceId, limit }) + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED: + logger.debug('Workspace has only delayed jobs', { + workspaceId, + nextReadyAt: claimResult.nextReadyAt, + }) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY: + await removeWorkspaceIfIdle(workspaceId, WORKSPACE_DISPATCH_LANES) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + case WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED: + logger.info('Admitting workspace job', { + workspaceId, + dispatchJobId: claimResult.record.id, + lane: claimResult.record.lane, + queueName: claimResult.record.queueName, + }) + await finalizeAdmittedJob( + claimResult.record, + claimResult.leaseId, + claimResult.leaseExpiresAt + ) + return DISPATCH_SCAN_RESULTS.ADMITTED + } + } catch (error) { + logger.error('Failed to dispatch workspace job', { workspaceId, error }) + await requeueWorkspaceId(workspaceId) + return DISPATCH_SCAN_RESULTS.NO_PROGRESS + } finally { + await releaseLock(`workspace-dispatch:claim-lock:${workspaceId}`, lockValue).catch( + () => undefined + ) + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts b/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts new file mode 100644 index 00000000000..a61d0dc4d1b --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/reconciler.test.ts @@ -0,0 +1,225 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockGetBullMQQueueByName, + mockHasActiveWorkspace, + mockEnsureWorkspaceActive, + mockHasWorkspaceLease, + mockListDispatchJobsByStatuses, + mockMarkDispatchJobAdmitted, + mockMarkDispatchJobCompleted, + mockMarkDispatchJobFailed, + mockRefreshWorkspaceLease, + mockReleaseWorkspaceLease, + mockRemoveWorkspaceJobFromLane, + mockRestoreWorkspaceDispatchJob, + mockWakeWorkspaceDispatcher, +} = vi.hoisted(() => ({ + mockGetBullMQQueueByName: vi.fn(), + mockHasActiveWorkspace: vi.fn(), + mockEnsureWorkspaceActive: vi.fn(), + mockHasWorkspaceLease: vi.fn(), + mockListDispatchJobsByStatuses: vi.fn(), + mockMarkDispatchJobAdmitted: vi.fn(), + mockMarkDispatchJobCompleted: vi.fn(), + mockMarkDispatchJobFailed: vi.fn(), + mockRefreshWorkspaceLease: vi.fn(), + mockReleaseWorkspaceLease: vi.fn(), + mockRemoveWorkspaceJobFromLane: vi.fn(), + mockRestoreWorkspaceDispatchJob: vi.fn(), + mockWakeWorkspaceDispatcher: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/core/bullmq', () => ({ + getBullMQQueueByName: mockGetBullMQQueueByName, +})) + +vi.mock('@/lib/core/workspace-dispatch/store', () => ({ + ensureWorkspaceActive: mockEnsureWorkspaceActive, + hasActiveWorkspace: mockHasActiveWorkspace, + hasWorkspaceLease: mockHasWorkspaceLease, + listDispatchJobsByStatuses: mockListDispatchJobsByStatuses, + markDispatchJobAdmitted: mockMarkDispatchJobAdmitted, + markDispatchJobCompleted: mockMarkDispatchJobCompleted, + markDispatchJobFailed: mockMarkDispatchJobFailed, + reconcileGlobalQueueDepth: vi.fn().mockResolvedValue(undefined), + refreshWorkspaceLease: mockRefreshWorkspaceLease, + releaseWorkspaceLease: mockReleaseWorkspaceLease, + removeWorkspaceJobFromLane: mockRemoveWorkspaceJobFromLane, + restoreWorkspaceDispatchJob: mockRestoreWorkspaceDispatchJob, +})) + +vi.mock('@/lib/core/workspace-dispatch/dispatcher', () => ({ + wakeWorkspaceDispatcher: mockWakeWorkspaceDispatcher, +})) + +import { reconcileWorkspaceDispatchState } from '@/lib/core/workspace-dispatch/reconciler' + +describe('workspace dispatch reconciler', () => { + beforeEach(() => { + vi.clearAllMocks() + mockHasActiveWorkspace.mockResolvedValue(true) + mockRemoveWorkspaceJobFromLane.mockResolvedValue(undefined) + }) + + it('marks dispatch job completed when BullMQ job is completed', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'running', + createdAt: 1, + lease: { + workspaceId: 'workspace-1', + leaseId: 'lease-1', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue({ + getState: vi.fn().mockResolvedValue('completed'), + returnvalue: { ok: true }, + }), + }) + + await reconcileWorkspaceDispatchState() + + expect(mockMarkDispatchJobCompleted).toHaveBeenCalledWith('dispatch-1', { ok: true }) + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-1', 'lease-1') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('restores admitted jobs to waiting when lease and BullMQ job are gone', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-2', + workspaceId: 'workspace-2', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'admitted', + createdAt: 1, + admittedAt: 2, + lease: { + workspaceId: 'workspace-2', + leaseId: 'lease-2', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + mockHasWorkspaceLease.mockResolvedValue(false) + + await reconcileWorkspaceDispatchState() + + expect(mockRestoreWorkspaceDispatchJob).toHaveBeenCalledWith( + expect.objectContaining({ + id: 'dispatch-2', + status: 'waiting', + lease: undefined, + }) + ) + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('reacquires the lease for a live admitting BullMQ job', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-3', + workspaceId: 'workspace-3', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { + dispatchLeaseExpiresAt: 12345, + }, + priority: 10, + status: 'admitting', + createdAt: 1, + lease: { + workspaceId: 'workspace-3', + leaseId: 'lease-3', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue({ + getState: vi.fn().mockResolvedValue('active'), + }), + }) + mockHasWorkspaceLease.mockResolvedValue(false) + + await reconcileWorkspaceDispatchState() + + expect(mockRefreshWorkspaceLease).toHaveBeenCalledWith('workspace-3', 'lease-3', 15 * 60 * 1000) + expect(mockMarkDispatchJobAdmitted).toHaveBeenCalledWith( + 'dispatch-3', + 'workspace-3', + 'lease-3', + 12345 + ) + expect(mockRemoveWorkspaceJobFromLane).toHaveBeenCalledWith( + 'workspace-3', + 'runtime', + 'dispatch-3' + ) + }) + + it('releases leaked lease and restores waiting when BullMQ job is gone but lease remains', async () => { + mockListDispatchJobsByStatuses.mockResolvedValue([ + { + id: 'dispatch-4', + workspaceId: 'workspace-4', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: {}, + priority: 10, + status: 'running', + createdAt: 1, + lease: { + workspaceId: 'workspace-4', + leaseId: 'lease-4', + }, + }, + ]) + mockGetBullMQQueueByName.mockReturnValue({ + getJob: vi.fn().mockResolvedValue(null), + }) + mockHasWorkspaceLease.mockResolvedValue(true) + + await reconcileWorkspaceDispatchState() + + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-4', 'lease-4') + expect(mockRestoreWorkspaceDispatchJob).toHaveBeenCalledWith( + expect.objectContaining({ + id: 'dispatch-4', + status: 'waiting', + }) + ) + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/reconciler.ts b/apps/sim/lib/core/workspace-dispatch/reconciler.ts new file mode 100644 index 00000000000..739d75533b1 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/reconciler.ts @@ -0,0 +1,226 @@ +import { createLogger } from '@sim/logger' +import { getBullMQQueueByName } from '@/lib/core/bullmq' +import { + ensureWorkspaceActive, + hasActiveWorkspace, + hasWorkspaceLease, + listDispatchJobsByStatuses, + markDispatchJobAdmitted, + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + reconcileGlobalQueueDepth, + refreshWorkspaceLease, + releaseWorkspaceLease, + removeWorkspaceJobFromLane, + restoreWorkspaceDispatchJob, +} from '@/lib/core/workspace-dispatch/store' +import type { WorkspaceDispatchJobRecord } from '@/lib/core/workspace-dispatch/types' +import { wakeWorkspaceDispatcher } from './dispatcher' + +const logger = createLogger('WorkspaceDispatchReconciler') +const LEASE_TTL_MS = 15 * 60 * 1000 + +function resetToWaiting(record: WorkspaceDispatchJobRecord): WorkspaceDispatchJobRecord { + return { + ...record, + status: 'waiting', + admittedAt: undefined, + startedAt: undefined, + completedAt: undefined, + output: undefined, + error: undefined, + lease: undefined, + } +} + +async function reconcileTerminalBullMQState(record: WorkspaceDispatchJobRecord): Promise { + const queue = getBullMQQueueByName(record.queueName) + const job = await queue.getJob(record.id) + if (!job) { + return false + } + + const state = await job.getState() + if (state === 'completed') { + await markDispatchJobCompleted(record.id, job.returnvalue) + if (record.lease) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + } + return true + } + + if (state === 'failed' && job.attemptsMade >= (job.opts.attempts ?? 1)) { + await markDispatchJobFailed(record.id, job.failedReason || 'Job failed') + if (record.lease) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + } + return true + } + + return false +} + +async function reconcileStrandedDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + if (!record.lease && record.status !== 'waiting') { + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true + } + + if (!record.lease) { + return false + } + + const hasLease = await hasWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + const queue = getBullMQQueueByName(record.queueName) + const job = await queue.getJob(record.id) + if (hasLease) { + if (!job) { + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true + } + + return false + } + + if (job) { + if (record.status === 'admitting') { + await refreshWorkspaceLease(record.lease.workspaceId, record.lease.leaseId, LEASE_TTL_MS) + await markDispatchJobAdmitted( + record.id, + record.lease.workspaceId, + record.lease.leaseId, + (record.metadata as { dispatchLeaseExpiresAt?: number }).dispatchLeaseExpiresAt ?? + Date.now() + ) + await removeWorkspaceJobFromLane(record.workspaceId, record.lane, record.id).catch( + () => undefined + ) + return true + } + await refreshWorkspaceLease(record.lease.workspaceId, record.lease.leaseId, LEASE_TTL_MS) + if (record.status === 'admitted') { + await markDispatchJobRunning(record.id) + return true + } + return false + } + + await restoreWorkspaceDispatchJob(resetToWaiting(record)) + return true +} + +async function reconcileTerminalDispatchLease( + record: WorkspaceDispatchJobRecord +): Promise { + if ((record.status !== 'completed' && record.status !== 'failed') || !record.lease) { + return false + } + + const hasLease = await hasWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + if (!hasLease) { + return false + } + + await releaseWorkspaceLease(record.lease.workspaceId, record.lease.leaseId) + return true +} + +async function reconcileWaitingWorkspaceTracking( + waitingJobs: WorkspaceDispatchJobRecord[] +): Promise { + let changed = false + const earliestByWorkspace = new Map() + + for (const record of waitingJobs) { + const readyAt = record.createdAt + (record.delayMs ?? 0) + const current = earliestByWorkspace.get(record.workspaceId) + if (current === undefined || readyAt < current) { + earliestByWorkspace.set(record.workspaceId, readyAt) + } + } + + for (const [workspaceId, nextReadyAt] of earliestByWorkspace.entries()) { + const active = await hasActiveWorkspace(workspaceId) + if (!active) { + await ensureWorkspaceActive(workspaceId, nextReadyAt) + changed = true + } + } + + return changed +} + +export async function reconcileWorkspaceDispatchState(): Promise { + const allJobs = await listDispatchJobsByStatuses([ + 'waiting', + 'admitting', + 'admitted', + 'running', + 'completed', + 'failed', + ]) + + const activeJobs: WorkspaceDispatchJobRecord[] = [] + const waitingJobs: WorkspaceDispatchJobRecord[] = [] + const terminalJobs: WorkspaceDispatchJobRecord[] = [] + let nonTerminalCount = 0 + + for (const job of allJobs) { + switch (job.status) { + case 'admitting': + case 'admitted': + case 'running': + activeJobs.push(job) + nonTerminalCount++ + break + case 'waiting': + waitingJobs.push(job) + nonTerminalCount++ + break + case 'completed': + case 'failed': + terminalJobs.push(job) + break + } + } + + let changed = false + + for (const record of activeJobs) { + const terminal = await reconcileTerminalBullMQState(record) + if (terminal) { + changed = true + continue + } + + const restored = await reconcileStrandedDispatchJob(record) + if (restored) { + changed = true + } + } + + if (await reconcileWaitingWorkspaceTracking(waitingJobs)) { + changed = true + } + + for (const record of terminalJobs) { + if (await reconcileTerminalDispatchLease(record)) { + changed = true + } + } + + await reconcileGlobalQueueDepth(nonTerminalCount).catch((error) => { + logger.error('Failed to reconcile global queue depth', { error }) + }) + + if (changed) { + logger.info('Workspace dispatch reconciliation updated state', { + activeJobsInspected: activeJobs.length, + waitingJobsInspected: waitingJobs.length, + terminalJobsInspected: terminalJobs.length, + }) + await wakeWorkspaceDispatcher() + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/redis-store.ts b/apps/sim/lib/core/workspace-dispatch/redis-store.ts new file mode 100644 index 00000000000..8fbf8dfee4f --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/redis-store.ts @@ -0,0 +1,577 @@ +import { createLogger } from '@sim/logger' +import type Redis from 'ioredis' +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + WORKSPACE_DISPATCH_CLAIM_RESULTS, + type WorkspaceDispatchClaimResult, + type WorkspaceDispatchEnqueueInput, + type WorkspaceDispatchJobRecord, + type WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +const logger = createLogger('WorkspaceDispatchRedisStore') + +const DISPATCH_PREFIX = 'workspace-dispatch:v1' +const JOB_TTL_SECONDS = 48 * 60 * 60 +const SEQUENCE_KEY = `${DISPATCH_PREFIX}:sequence` +const ACTIVE_WORKSPACES_KEY = `${DISPATCH_PREFIX}:workspaces` +const GLOBAL_DEPTH_KEY = `${DISPATCH_PREFIX}:global-depth` +const CLAIM_JOB_SCRIPT = ` +local workspaceId = ARGV[1] +local now = tonumber(ARGV[2]) +local concurrencyLimit = tonumber(ARGV[3]) +local leaseId = ARGV[4] +local leaseExpiresAt = tonumber(ARGV[5]) +local lanes = cjson.decode(ARGV[6]) +local sequenceKey = ARGV[7] +local activeWorkspacesKey = ARGV[8] +local jobPrefix = ARGV[9] +local workspacePrefix = ARGV[10] +local jobTtlSeconds = tonumber(ARGV[11]) + +local function laneKey(lane) + return workspacePrefix .. workspaceId .. ':lane:' .. lane +end + +local function leaseKey() + return workspacePrefix .. workspaceId .. ':leases' +end + +local function workspaceHasPending() + local minReadyAt = nil + local hasPending = false + + for _, lane in ipairs(lanes) do + local ids = redis.call('ZRANGE', laneKey(lane), 0, 0) + if #ids > 0 then + local raw = redis.call('GET', jobPrefix .. ids[1]) + if raw then + hasPending = true + local record = cjson.decode(raw) + local readyAt = (record.createdAt or 0) + (record.delayMs or 0) + if (minReadyAt == nil) or (readyAt < minReadyAt) then + minReadyAt = readyAt + end + else + redis.call('ZREM', laneKey(lane), ids[1]) + end + end + end + + return hasPending, minReadyAt +end + +redis.call('ZREMRANGEBYSCORE', leaseKey(), 0, now) +local activeLeaseCount = redis.call('ZCARD', leaseKey()) +if activeLeaseCount >= concurrencyLimit then + return cjson.encode({ type = 'limit_reached' }) +end + +local selectedId = nil +local selectedLane = nil +local selectedRecord = nil +local delayedNextReadyAt = nil + +local maxScanPerLane = 20 + +for _, lane in ipairs(lanes) do + local ids = redis.call('ZRANGE', laneKey(lane), 0, maxScanPerLane - 1) + for _, candidateId in ipairs(ids) do + local raw = redis.call('GET', jobPrefix .. candidateId) + if raw then + local record = cjson.decode(raw) + local readyAt = (record.createdAt or 0) + (record.delayMs or 0) + if readyAt <= now then + selectedId = candidateId + selectedLane = lane + selectedRecord = record + break + end + + if (delayedNextReadyAt == nil) or (readyAt < delayedNextReadyAt) then + delayedNextReadyAt = readyAt + end + else + redis.call('ZREM', laneKey(lane), candidateId) + end + end + + if selectedRecord then + break + end +end + +if selectedRecord == nil then + local hasPending, minReadyAt = workspaceHasPending() + if not hasPending then + return cjson.encode({ type = 'empty' }) + end + + local sequence = redis.call('INCR', sequenceKey) + local score = sequence + if minReadyAt ~= nil and minReadyAt > now then + score = minReadyAt * 1000000 + sequence + end + redis.call('ZADD', activeWorkspacesKey, score, workspaceId) + + return cjson.encode({ + type = 'delayed', + nextReadyAt = delayedNextReadyAt or minReadyAt or now + }) +end + +redis.call('ZADD', leaseKey(), leaseExpiresAt, leaseId) +selectedRecord.status = 'admitting' +selectedRecord.lease = { + workspaceId = workspaceId, + leaseId = leaseId +} +if selectedRecord.metadata == nil then + selectedRecord.metadata = {} +end +selectedRecord.metadata.dispatchLeaseExpiresAt = leaseExpiresAt + +redis.call('SET', jobPrefix .. selectedId, cjson.encode(selectedRecord), 'EX', jobTtlSeconds) +redis.call('ZREM', laneKey(selectedLane), selectedId) + +local hasPending, minReadyAt = workspaceHasPending() +if hasPending then + local sequence = redis.call('INCR', sequenceKey) + local score = sequence + if minReadyAt ~= nil and minReadyAt > now then + score = minReadyAt * 1000000 + sequence + end + redis.call('ZADD', activeWorkspacesKey, score, workspaceId) +end + +return cjson.encode({ + type = 'admitted', + record = selectedRecord, + leaseId = leaseId, + leaseExpiresAt = leaseExpiresAt +}) +` + +function jobKey(jobId: string): string { + return `${DISPATCH_PREFIX}:job:${jobId}` +} + +function workspaceLaneKey(workspaceId: string, lane: WorkspaceDispatchLane): string { + return `${DISPATCH_PREFIX}:workspace:${workspaceId}:lane:${lane}` +} + +function workspaceLeaseKey(workspaceId: string): string { + return `${DISPATCH_PREFIX}:workspace:${workspaceId}:leases` +} + +function createPriorityScore(priority: number, sequence: number): number { + return priority * 1_000_000_000_000 + sequence +} + +export class RedisWorkspaceDispatchStorage implements WorkspaceDispatchStorageAdapter { + constructor(private redis: Redis) {} + + private async nextSequence(): Promise { + return this.redis.incr(SEQUENCE_KEY) + } + + async saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + await this.redis.set(jobKey(record.id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + } + + async getDispatchJobRecord(jobId: string): Promise { + const raw = await this.redis.get(jobKey(jobId)) + if (!raw) { + return null + } + + try { + return JSON.parse(raw) as WorkspaceDispatchJobRecord + } catch (error) { + logger.warn('Corrupted dispatch job record, deleting', { jobId, error }) + await this.redis.del(jobKey(jobId)) + return null + } + } + + async listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] + ): Promise { + let cursor = '0' + const jobs: WorkspaceDispatchJobRecord[] = [] + + do { + const [nextCursor, keys] = await this.redis.scan( + cursor, + 'MATCH', + `${DISPATCH_PREFIX}:job:*`, + 'COUNT', + 100 + ) + cursor = nextCursor + + if (keys.length === 0) { + continue + } + + const values = await this.redis.mget(...keys) + for (const value of values) { + if (!value) { + continue + } + try { + const record = JSON.parse(value) as WorkspaceDispatchJobRecord + if (statuses.includes(record.status)) { + jobs.push(record) + } + } catch { + // Best effort during reconciliation scans. + } + } + } while (cursor !== '0') + + return jobs + } + + private static readonly TERMINAL_STATUSES = new Set(['completed', 'failed']) + + async updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord + ): Promise { + const current = await this.getDispatchJobRecord(jobId) + if (!current) { + return null + } + + const updated = updater(current) + if ( + RedisWorkspaceDispatchStorage.TERMINAL_STATUSES.has(current.status) && + !RedisWorkspaceDispatchStorage.TERMINAL_STATUSES.has(updated.status) + ) { + return current + } + + await this.saveDispatchJob(updated) + return updated + } + + async enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput + ): Promise { + const id = input.id ?? `dispatch_${crypto.randomUUID().replace(/-/g, '').slice(0, 20)}` + const createdAt = Date.now() + const sequence = await this.nextSequence() + + const record: WorkspaceDispatchJobRecord = { + id, + workspaceId: input.workspaceId, + lane: input.lane, + queueName: input.queueName, + bullmqJobName: input.bullmqJobName, + bullmqPayload: input.bullmqPayload, + metadata: input.metadata, + priority: input.priority ?? 100, + maxAttempts: input.maxAttempts, + delayMs: input.delayMs, + status: 'waiting', + createdAt, + } + + const score = createPriorityScore(record.priority, sequence) + const pipeline = this.redis.pipeline() + pipeline.set(jobKey(id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + pipeline.zadd(workspaceLaneKey(record.workspaceId, record.lane), score, id) + pipeline.zadd(ACTIVE_WORKSPACES_KEY, 'NX', sequence, record.workspaceId) + pipeline.incr(GLOBAL_DEPTH_KEY) + await pipeline.exec() + + return record + } + + async restoreWorkspaceDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + const sequence = await this.nextSequence() + const score = createPriorityScore(record.priority, sequence) + const pipeline = this.redis.pipeline() + pipeline.set(jobKey(record.id), JSON.stringify(record), 'EX', JOB_TTL_SECONDS) + pipeline.zadd(workspaceLaneKey(record.workspaceId, record.lane), score, record.id) + pipeline.zadd(ACTIVE_WORKSPACES_KEY, 'NX', sequence, record.workspaceId) + await pipeline.exec() + } + + async claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } + ): Promise { + const raw = await this.redis.eval( + CLAIM_JOB_SCRIPT, + 0, + workspaceId, + String(options.now), + String(options.concurrencyLimit), + options.leaseId, + String(options.now + options.leaseTtlMs), + JSON.stringify(options.lanes), + SEQUENCE_KEY, + ACTIVE_WORKSPACES_KEY, + `${DISPATCH_PREFIX}:job:`, + `${DISPATCH_PREFIX}:workspace:`, + String(JOB_TTL_SECONDS) + ) + + const parsed = JSON.parse(String(raw)) as WorkspaceDispatchClaimResult + switch (parsed.type) { + case WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED: + case WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY: + return parsed + default: + throw new Error( + `Unknown dispatch claim result: ${String((parsed as { type?: string }).type)}` + ) + } + } + + async getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + if (lanes.length === 0) return 0 + const pipeline = this.redis.pipeline() + for (const lane of lanes) { + pipeline.zcard(workspaceLaneKey(workspaceId, lane)) + } + const results = await pipeline.exec() + let depth = 0 + for (const result of results ?? []) { + if (result && !result[0]) { + depth += (result[1] as number) ?? 0 + } + } + return depth + } + + async getGlobalQueueDepth(): Promise { + const count = await this.redis.get(GLOBAL_DEPTH_KEY) + return count ? Math.max(0, Number.parseInt(count, 10)) : 0 + } + + async reconcileGlobalQueueDepth(knownCount: number): Promise { + await this.redis.set(GLOBAL_DEPTH_KEY, knownCount) + } + + async popNextWorkspaceId(): Promise { + const result = await this.redis.zpopmin(ACTIVE_WORKSPACES_KEY) + if (!result || result.length === 0) { + return null + } + + return result[0] ?? null + } + + async getQueuedWorkspaceCount(): Promise { + return this.redis.zcard(ACTIVE_WORKSPACES_KEY) + } + + async hasActiveWorkspace(workspaceId: string): Promise { + return (await this.redis.zscore(ACTIVE_WORKSPACES_KEY, workspaceId)) !== null + } + + async ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + const sequence = await this.nextSequence() + const score = readyAt && readyAt > Date.now() ? readyAt * 1_000_000 + sequence : sequence + await this.redis.zadd(ACTIVE_WORKSPACES_KEY, 'NX', score, workspaceId) + } + + async requeueWorkspaceId(workspaceId: string): Promise { + const sequence = await this.nextSequence() + await this.redis.zadd(ACTIVE_WORKSPACES_KEY, sequence, workspaceId) + } + + async workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const count = await this.redis.zcard(workspaceLaneKey(workspaceId, lane)) + if (count > 0) { + return true + } + } + + return false + } + + async getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + for (const lane of lanes) { + const ids = await this.redis.zrange(workspaceLaneKey(workspaceId, lane), 0, 0) + if (ids.length === 0) { + continue + } + + const record = await this.getDispatchJobRecord(ids[0]) + if (!record) { + await this.redis.zrem(workspaceLaneKey(workspaceId, lane), ids[0]) + continue + } + + return record + } + + return null + } + + async removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string + ): Promise { + await this.redis.zrem(workspaceLaneKey(workspaceId, lane), jobId) + } + + async cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + await this.redis.zremrangebyscore(workspaceLeaseKey(workspaceId), 0, Date.now()) + } + + async countActiveWorkspaceLeases(workspaceId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return this.redis.zcard(workspaceLeaseKey(workspaceId)) + } + + async hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.cleanupExpiredWorkspaceLeases(workspaceId) + return (await this.redis.zscore(workspaceLeaseKey(workspaceId), leaseId)) !== null + } + + async createWorkspaceLease(workspaceId: string, leaseId: string, ttlMs: number): Promise { + const expiresAt = Date.now() + ttlMs + await this.redis.zadd(workspaceLeaseKey(workspaceId), expiresAt, leaseId) + return expiresAt + } + + async refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number + ): Promise { + return this.createWorkspaceLease(workspaceId, leaseId, ttlMs) + } + + async releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + await this.redis.zrem(workspaceLeaseKey(workspaceId), leaseId) + } + + async removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] + ): Promise { + const hasPendingJobs = await this.workspaceHasPendingJobs(workspaceId, lanes) + if (!hasPendingJobs) { + await this.redis.zrem(ACTIVE_WORKSPACES_KEY, workspaceId) + } + } + + async markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitted', + admittedAt: Date.now(), + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number + ): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'admitting', + lease: { + workspaceId, + leaseId, + }, + metadata: { + ...record.metadata, + dispatchLeaseExpiresAt: leaseExpiresAt, + }, + })) + } + + async markDispatchJobRunning(jobId: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'running', + startedAt: record.startedAt ?? Date.now(), + })) + } + + async markDispatchJobCompleted(jobId: string, output: unknown): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'completed', + completedAt: Date.now(), + output, + })) + await this.redis.decr(GLOBAL_DEPTH_KEY).catch(() => undefined) + } + + async markDispatchJobFailed(jobId: string, error: string): Promise { + await this.updateDispatchJobRecord(jobId, (record) => ({ + ...record, + status: 'failed', + completedAt: Date.now(), + error, + })) + await this.redis.decr(GLOBAL_DEPTH_KEY).catch(() => undefined) + } + + async clear(): Promise { + let cursor = '0' + const keys: string[] = [] + + do { + const [nextCursor, foundKeys] = await this.redis.scan( + cursor, + 'MATCH', + `${DISPATCH_PREFIX}:*`, + 'COUNT', + 100 + ) + cursor = nextCursor + keys.push(...foundKeys) + } while (cursor !== '0') + + if (keys.length > 0) { + await this.redis.del(...keys) + } + } + + dispose(): void { + logger.info('Redis workspace dispatch storage disposed') + } +} diff --git a/apps/sim/lib/core/workspace-dispatch/status.test.ts b/apps/sim/lib/core/workspace-dispatch/status.test.ts new file mode 100644 index 00000000000..e72e210b18d --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/status.test.ts @@ -0,0 +1,102 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { presentDispatchOrJobStatus } from '@/lib/core/workspace-dispatch/status' + +describe('workspace dispatch status presentation', () => { + it('presents waiting dispatch jobs with queue metadata', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-1', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 10, + status: 'waiting', + createdAt: 1000, + }, + null + ) + + expect(result).toEqual({ + status: 'waiting', + metadata: { + createdAt: new Date(1000), + admittedAt: undefined, + startedAt: undefined, + completedAt: undefined, + queueName: 'workflow-execution', + lane: 'runtime', + workspaceId: 'workspace-1', + }, + estimatedDuration: 300000, + }) + }) + + it('presents admitting dispatch jobs distinctly', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-1a', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'workflow-execution', + bullmqJobName: 'workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 10, + status: 'admitting', + createdAt: 1000, + }, + null + ) + + expect(result.status).toBe('admitting') + expect(result.estimatedDuration).toBe(300000) + }) + + it('presents completed dispatch jobs with output and duration', () => { + const result = presentDispatchOrJobStatus( + { + id: 'dispatch-2', + workspaceId: 'workspace-1', + lane: 'interactive', + queueName: 'workflow-execution', + bullmqJobName: 'direct-workflow-execution', + bullmqPayload: {}, + metadata: { workflowId: 'workflow-1' }, + priority: 1, + status: 'completed', + createdAt: 1000, + admittedAt: 1500, + startedAt: 2000, + completedAt: 7000, + output: { success: true }, + }, + null + ) + + expect(result.status).toBe('completed') + expect(result.output).toEqual({ success: true }) + expect(result.metadata.duration).toBe(5000) + }) + + it('falls back to legacy job status when no dispatch record exists', () => { + const result = presentDispatchOrJobStatus(null, { + id: 'job-1', + type: 'workflow-execution', + payload: {}, + status: 'pending', + createdAt: new Date(1000), + attempts: 0, + maxAttempts: 3, + metadata: {}, + }) + + expect(result.status).toBe('queued') + expect(result.estimatedDuration).toBe(300000) + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/status.ts b/apps/sim/lib/core/workspace-dispatch/status.ts new file mode 100644 index 00000000000..fc5d934434c --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/status.ts @@ -0,0 +1,110 @@ +import type { Job, JobStatus } from '@/lib/core/async-jobs/types' +import type { WorkspaceDispatchJobRecord } from '@/lib/core/workspace-dispatch/types' + +export type DispatchPresentedStatus = + | 'waiting' + | 'admitting' + | 'admitted' + | 'running' + | 'completed' + | 'failed' + | 'queued' + | JobStatus + +export interface DispatchStatusPresentation { + status: DispatchPresentedStatus + metadata: { + createdAt?: Date + admittedAt?: Date + startedAt?: Date + completedAt?: Date + queueName?: string + lane?: string + workspaceId?: string + duration?: number + } + output?: unknown + error?: string + estimatedDuration?: number +} + +export function presentDispatchOrJobStatus( + dispatchJob: WorkspaceDispatchJobRecord | null, + job: Job | null +): DispatchStatusPresentation { + if (dispatchJob) { + const startedAt = dispatchJob.startedAt ? new Date(dispatchJob.startedAt) : undefined + const completedAt = dispatchJob.completedAt ? new Date(dispatchJob.completedAt) : undefined + + const response: DispatchStatusPresentation = { + status: dispatchJob.status, + metadata: { + createdAt: new Date(dispatchJob.createdAt), + admittedAt: dispatchJob.admittedAt ? new Date(dispatchJob.admittedAt) : undefined, + startedAt, + completedAt, + queueName: dispatchJob.queueName, + lane: dispatchJob.lane, + workspaceId: dispatchJob.workspaceId, + }, + } + + if (startedAt && completedAt) { + response.metadata.duration = completedAt.getTime() - startedAt.getTime() + } + + if (dispatchJob.status === 'completed') { + response.output = dispatchJob.output + } + + if (dispatchJob.status === 'failed') { + response.error = dispatchJob.error + } + + if ( + dispatchJob.status === 'waiting' || + dispatchJob.status === 'admitting' || + dispatchJob.status === 'admitted' || + dispatchJob.status === 'running' + ) { + response.estimatedDuration = 300000 + } + + return response + } + + if (!job) { + return { + status: 'queued', + metadata: {}, + } + } + + const mappedStatus = job.status === 'pending' ? 'queued' : job.status + const response: DispatchStatusPresentation = { + status: mappedStatus, + metadata: { + createdAt: job.createdAt, + startedAt: job.startedAt, + completedAt: job.completedAt, + }, + } + + if (job.startedAt && job.completedAt) { + response.metadata.duration = job.completedAt.getTime() - job.startedAt.getTime() + } + + if (job.status === 'completed') { + response.output = job.output + } + + if (job.status === 'failed') { + response.error = job.error + } + + if (job.status === 'processing' || job.status === 'pending') { + response.estimatedDuration = 300000 + } + + return response +} diff --git a/apps/sim/lib/core/workspace-dispatch/store.ts b/apps/sim/lib/core/workspace-dispatch/store.ts new file mode 100644 index 00000000000..86c1c3951a9 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/store.ts @@ -0,0 +1,193 @@ +import type { WorkspaceDispatchStorageAdapter } from '@/lib/core/workspace-dispatch/adapter' +import { + setWorkspaceDispatchStorageAdapter as _setAdapter, + createWorkspaceDispatchStorageAdapter, +} from '@/lib/core/workspace-dispatch/factory' +import type { + WorkspaceDispatchClaimResult, + WorkspaceDispatchEnqueueInput, + WorkspaceDispatchJobRecord, + WorkspaceDispatchLane, +} from '@/lib/core/workspace-dispatch/types' + +function getAdapter() { + return createWorkspaceDispatchStorageAdapter() +} + +export function setWorkspaceDispatchStorageAdapter(adapter: WorkspaceDispatchStorageAdapter): void { + _setAdapter(adapter) +} + +export async function saveDispatchJob(record: WorkspaceDispatchJobRecord): Promise { + return getAdapter().saveDispatchJob(record) +} + +export async function getDispatchJobRecord( + jobId: string +): Promise { + return getAdapter().getDispatchJobRecord(jobId) +} + +export async function listDispatchJobsByStatuses( + statuses: readonly WorkspaceDispatchJobRecord['status'][] +): Promise { + return getAdapter().listDispatchJobsByStatuses(statuses) +} + +export async function updateDispatchJobRecord( + jobId: string, + updater: (record: WorkspaceDispatchJobRecord) => WorkspaceDispatchJobRecord +): Promise { + return getAdapter().updateDispatchJobRecord(jobId, updater) +} + +export async function enqueueWorkspaceDispatchJob( + input: WorkspaceDispatchEnqueueInput +): Promise { + return getAdapter().enqueueWorkspaceDispatchJob(input) +} + +export async function restoreWorkspaceDispatchJob( + record: WorkspaceDispatchJobRecord +): Promise { + return getAdapter().restoreWorkspaceDispatchJob(record) +} + +export async function claimWorkspaceJob( + workspaceId: string, + options: { + lanes: readonly WorkspaceDispatchLane[] + concurrencyLimit: number + leaseId: string + now: number + leaseTtlMs: number + } +): Promise { + return getAdapter().claimWorkspaceJob(workspaceId, options) +} + +export async function getWorkspaceQueueDepth( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().getWorkspaceQueueDepth(workspaceId, lanes) +} + +export async function getGlobalQueueDepth(): Promise { + return getAdapter().getGlobalQueueDepth() +} + +export async function reconcileGlobalQueueDepth(knownCount: number): Promise { + return getAdapter().reconcileGlobalQueueDepth(knownCount) +} + +export async function popNextWorkspaceId(): Promise { + return getAdapter().popNextWorkspaceId() +} + +export async function getQueuedWorkspaceCount(): Promise { + return getAdapter().getQueuedWorkspaceCount() +} + +export async function hasActiveWorkspace(workspaceId: string): Promise { + return getAdapter().hasActiveWorkspace(workspaceId) +} + +export async function ensureWorkspaceActive(workspaceId: string, readyAt?: number): Promise { + return getAdapter().ensureWorkspaceActive(workspaceId, readyAt) +} + +export async function requeueWorkspaceId(workspaceId: string): Promise { + return getAdapter().requeueWorkspaceId(workspaceId) +} + +export async function workspaceHasPendingJobs( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().workspaceHasPendingJobs(workspaceId, lanes) +} + +export async function getNextWorkspaceJob( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().getNextWorkspaceJob(workspaceId, lanes) +} + +export async function removeWorkspaceJobFromLane( + workspaceId: string, + lane: WorkspaceDispatchLane, + jobId: string +): Promise { + return getAdapter().removeWorkspaceJobFromLane(workspaceId, lane, jobId) +} + +export async function cleanupExpiredWorkspaceLeases(workspaceId: string): Promise { + return getAdapter().cleanupExpiredWorkspaceLeases(workspaceId) +} + +export async function countActiveWorkspaceLeases(workspaceId: string): Promise { + return getAdapter().countActiveWorkspaceLeases(workspaceId) +} + +export async function hasWorkspaceLease(workspaceId: string, leaseId: string): Promise { + return getAdapter().hasWorkspaceLease(workspaceId, leaseId) +} + +export async function createWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number +): Promise { + return getAdapter().createWorkspaceLease(workspaceId, leaseId, ttlMs) +} + +export async function refreshWorkspaceLease( + workspaceId: string, + leaseId: string, + ttlMs: number +): Promise { + return getAdapter().refreshWorkspaceLease(workspaceId, leaseId, ttlMs) +} + +export async function releaseWorkspaceLease(workspaceId: string, leaseId: string): Promise { + return getAdapter().releaseWorkspaceLease(workspaceId, leaseId) +} + +export async function removeWorkspaceIfIdle( + workspaceId: string, + lanes: readonly WorkspaceDispatchLane[] +): Promise { + return getAdapter().removeWorkspaceIfIdle(workspaceId, lanes) +} + +export async function markDispatchJobAdmitted( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number +): Promise { + return getAdapter().markDispatchJobAdmitted(jobId, workspaceId, leaseId, leaseExpiresAt) +} + +export async function markDispatchJobAdmitting( + jobId: string, + workspaceId: string, + leaseId: string, + leaseExpiresAt: number +): Promise { + return getAdapter().markDispatchJobAdmitting(jobId, workspaceId, leaseId, leaseExpiresAt) +} + +export async function markDispatchJobRunning(jobId: string): Promise { + return getAdapter().markDispatchJobRunning(jobId) +} + +export async function markDispatchJobCompleted(jobId: string, output: unknown): Promise { + return getAdapter().markDispatchJobCompleted(jobId, output) +} + +export async function markDispatchJobFailed(jobId: string, error: string): Promise { + return getAdapter().markDispatchJobFailed(jobId, error) +} diff --git a/apps/sim/lib/core/workspace-dispatch/types.ts b/apps/sim/lib/core/workspace-dispatch/types.ts new file mode 100644 index 00000000000..87218956644 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/types.ts @@ -0,0 +1,107 @@ +import type { JobMetadata, JobType } from '@/lib/core/async-jobs/types' +import type { + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from '@/lib/core/bullmq/queues' + +export const WORKSPACE_DISPATCH_LANES = [ + 'interactive', + 'runtime', + 'knowledge', + 'lightweight', +] as const + +export type WorkspaceDispatchLane = (typeof WORKSPACE_DISPATCH_LANES)[number] + +export type WorkspaceDispatchQueueName = + | JobType + | typeof KNOWLEDGE_CONNECTOR_SYNC_QUEUE + | typeof KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE + | typeof MOTHERSHIP_JOB_EXECUTION_QUEUE + | typeof WORKSPACE_NOTIFICATION_DELIVERY_QUEUE + +export const WORKSPACE_DISPATCH_STATUSES = { + WAITING: 'waiting', + ADMITTING: 'admitting', + ADMITTED: 'admitted', + RUNNING: 'running', + COMPLETED: 'completed', + FAILED: 'failed', +} as const + +export type WorkspaceDispatchStatus = + (typeof WORKSPACE_DISPATCH_STATUSES)[keyof typeof WORKSPACE_DISPATCH_STATUSES] + +export interface WorkspaceDispatchLeaseInfo { + workspaceId: string + leaseId: string +} + +export interface WorkspaceDispatchJobContext { + dispatchJobId: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + priority: number +} + +export interface WorkspaceDispatchJobRecord { + id: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + bullmqPayload: unknown + metadata: JobMetadata + priority: number + maxAttempts?: number + delayMs?: number + status: WorkspaceDispatchStatus + createdAt: number + admittedAt?: number + startedAt?: number + completedAt?: number + output?: unknown + error?: string + lease?: WorkspaceDispatchLeaseInfo +} + +export interface WorkspaceDispatchEnqueueInput { + id?: string + workspaceId: string + lane: WorkspaceDispatchLane + queueName: WorkspaceDispatchQueueName + bullmqJobName: string + bullmqPayload: unknown + metadata: JobMetadata + priority?: number + maxAttempts?: number + delayMs?: number +} + +export const WORKSPACE_DISPATCH_CLAIM_RESULTS = { + ADMITTED: 'admitted', + LIMIT_REACHED: 'limit_reached', + DELAYED: 'delayed', + EMPTY: 'empty', +} as const + +export type WorkspaceDispatchClaimResult = + | { + type: typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.ADMITTED + record: WorkspaceDispatchJobRecord + leaseId: string + leaseExpiresAt: number + } + | { + type: + | typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.LIMIT_REACHED + | typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.EMPTY + } + | { + type: typeof WORKSPACE_DISPATCH_CLAIM_RESULTS.DELAYED + nextReadyAt: number + } diff --git a/apps/sim/lib/core/workspace-dispatch/worker.test.ts b/apps/sim/lib/core/workspace-dispatch/worker.test.ts new file mode 100644 index 00000000000..1833b128cdd --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/worker.test.ts @@ -0,0 +1,98 @@ +/** + * @vitest-environment node + */ +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { + mockMarkDispatchJobCompleted, + mockMarkDispatchJobFailed, + mockMarkDispatchJobRunning, + mockReleaseWorkspaceLease, + mockWakeWorkspaceDispatcher, +} = vi.hoisted(() => ({ + mockMarkDispatchJobCompleted: vi.fn(), + mockMarkDispatchJobFailed: vi.fn(), + mockMarkDispatchJobRunning: vi.fn(), + mockReleaseWorkspaceLease: vi.fn(), + mockWakeWorkspaceDispatcher: vi.fn(), +})) + +vi.mock('@sim/logger', () => ({ + createLogger: () => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + debug: vi.fn(), + }), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + markDispatchJobCompleted: mockMarkDispatchJobCompleted, + markDispatchJobFailed: mockMarkDispatchJobFailed, + markDispatchJobRunning: mockMarkDispatchJobRunning, + releaseWorkspaceLease: mockReleaseWorkspaceLease, + wakeWorkspaceDispatcher: mockWakeWorkspaceDispatcher, +})) + +import { getDispatchRuntimeMetadata, runDispatchedJob } from '@/lib/core/workspace-dispatch/worker' + +describe('workspace dispatch worker lifecycle', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it('returns null for missing metadata', () => { + expect(getDispatchRuntimeMetadata(undefined)).toBeNull() + }) + + it('extracts dispatch runtime metadata when all fields are present', () => { + expect( + getDispatchRuntimeMetadata({ + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }) + ).toEqual({ + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }) + }) + + it('marks running, completed, releases lease, and wakes dispatcher on success', async () => { + const result = await runDispatchedJob( + { + dispatchJobId: 'dispatch-1', + dispatchWorkspaceId: 'workspace-1', + dispatchLeaseId: 'lease-1', + }, + async () => ({ success: true }) + ) + + expect(result).toEqual({ success: true }) + expect(mockMarkDispatchJobRunning).toHaveBeenCalledWith('dispatch-1') + expect(mockMarkDispatchJobCompleted).toHaveBeenCalledWith('dispatch-1', { success: true }) + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-1', 'lease-1') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) + + it('marks failed and still releases lease on error', async () => { + await expect( + runDispatchedJob( + { + dispatchJobId: 'dispatch-2', + dispatchWorkspaceId: 'workspace-2', + dispatchLeaseId: 'lease-2', + }, + async () => { + throw new Error('boom') + } + ) + ).rejects.toThrow('boom') + + expect(mockMarkDispatchJobRunning).toHaveBeenCalledWith('dispatch-2') + expect(mockMarkDispatchJobFailed).toHaveBeenCalledWith('dispatch-2', 'boom') + expect(mockReleaseWorkspaceLease).toHaveBeenCalledWith('workspace-2', 'lease-2') + expect(mockWakeWorkspaceDispatcher).toHaveBeenCalled() + }) +}) diff --git a/apps/sim/lib/core/workspace-dispatch/worker.ts b/apps/sim/lib/core/workspace-dispatch/worker.ts new file mode 100644 index 00000000000..ced31a599a3 --- /dev/null +++ b/apps/sim/lib/core/workspace-dispatch/worker.ts @@ -0,0 +1,104 @@ +import { createLogger } from '@sim/logger' +import { + markDispatchJobCompleted, + markDispatchJobFailed, + markDispatchJobRunning, + refreshWorkspaceLease, + releaseWorkspaceLease, + wakeWorkspaceDispatcher, +} from '@/lib/core/workspace-dispatch' + +const logger = createLogger('WorkspaceDispatchWorker') + +interface DispatchRuntimeMetadata { + dispatchJobId: string + dispatchWorkspaceId: string + dispatchLeaseId: string +} + +interface RunDispatchedJobOptions { + isFinalAttempt?: boolean + leaseTtlMs?: number +} + +const DEFAULT_LEASE_TTL_MS = 15 * 60 * 1000 +const LEASE_HEARTBEAT_INTERVAL_MS = 60_000 + +export function getDispatchRuntimeMetadata(metadata: unknown): DispatchRuntimeMetadata | null { + if (!metadata || typeof metadata !== 'object') { + return null + } + + const value = metadata as Partial + if (!value.dispatchJobId || !value.dispatchWorkspaceId || !value.dispatchLeaseId) { + return null + } + + return { + dispatchJobId: value.dispatchJobId, + dispatchWorkspaceId: value.dispatchWorkspaceId, + dispatchLeaseId: value.dispatchLeaseId, + } +} + +export async function runDispatchedJob( + metadata: unknown, + run: () => Promise, + options: RunDispatchedJobOptions = {} +): Promise { + const dispatchMetadata = getDispatchRuntimeMetadata(metadata) + + if (!dispatchMetadata) { + return run() + } + + const leaseTtlMs = options.leaseTtlMs ?? DEFAULT_LEASE_TTL_MS + const isFinalAttempt = options.isFinalAttempt ?? true + + await markDispatchJobRunning(dispatchMetadata.dispatchJobId) + + let heartbeatTimer: NodeJS.Timeout | null = setInterval(() => { + void refreshWorkspaceLease( + dispatchMetadata.dispatchWorkspaceId, + dispatchMetadata.dispatchLeaseId, + leaseTtlMs + ).catch((error) => { + logger.error('Failed to refresh dispatch lease', { error, dispatchMetadata }) + }) + }, LEASE_HEARTBEAT_INTERVAL_MS) + heartbeatTimer.unref() + + let succeeded = false + try { + const result = await run() + succeeded = true + await markDispatchJobCompleted(dispatchMetadata.dispatchJobId, result) + return result + } catch (error) { + if (isFinalAttempt && !succeeded) { + await markDispatchJobFailed( + dispatchMetadata.dispatchJobId, + error instanceof Error ? error.message : String(error) + ) + } + throw error + } finally { + if (heartbeatTimer) { + clearInterval(heartbeatTimer) + heartbeatTimer = null + } + + const shouldReleaseLease = succeeded || isFinalAttempt + if (shouldReleaseLease) { + try { + await releaseWorkspaceLease( + dispatchMetadata.dispatchWorkspaceId, + dispatchMetadata.dispatchLeaseId + ) + await wakeWorkspaceDispatcher() + } catch (error) { + logger.error('Failed to release dispatch lease', { error, dispatchMetadata }) + } + } + } +} diff --git a/apps/sim/lib/execution/buffered-stream.ts b/apps/sim/lib/execution/buffered-stream.ts new file mode 100644 index 00000000000..f1b413b6f96 --- /dev/null +++ b/apps/sim/lib/execution/buffered-stream.ts @@ -0,0 +1,111 @@ +import { createLogger } from '@sim/logger' +import { + type ExecutionStreamStatus, + getExecutionMeta, + readExecutionEvents, +} from '@/lib/execution/event-buffer' +import { formatSSEEvent } from '@/lib/workflows/executor/execution-events' + +const logger = createLogger('BufferedExecutionStream') + +const POLL_INTERVAL_MS = 500 +const MAX_POLL_DURATION_MS = 10 * 60 * 1000 + +function isTerminalStatus(status: ExecutionStreamStatus): boolean { + return status === 'complete' || status === 'error' || status === 'cancelled' +} + +export function createBufferedExecutionStream( + executionId: string, + initialEventId = 0 +): ReadableStream { + const encoder = new TextEncoder() + let closed = false + + return new ReadableStream({ + async start(controller) { + let lastEventId = initialEventId + const pollDeadline = Date.now() + MAX_POLL_DURATION_MS + + const enqueue = (text: string) => { + if (closed) { + return + } + + try { + controller.enqueue(encoder.encode(text)) + } catch { + closed = true + } + } + + try { + const initialEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of initialEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + + while (!closed && Date.now() < pollDeadline) { + const meta = await getExecutionMeta(executionId) + + if (meta && isTerminalStatus(meta.status)) { + const finalEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of finalEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + + enqueue('data: [DONE]\n\n') + controller.close() + return + } + + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) + if (closed) { + return + } + + const newEvents = await readExecutionEvents(executionId, lastEventId) + for (const entry of newEvents) { + if (closed) { + return + } + + enqueue(formatSSEEvent(entry.event)) + lastEventId = entry.eventId + } + } + + if (!closed) { + logger.warn('Buffered execution stream deadline reached', { executionId }) + enqueue('data: [DONE]\n\n') + controller.close() + } + } catch (error) { + logger.error('Buffered execution stream failed', { + executionId, + error: error instanceof Error ? error.message : String(error), + }) + + if (!closed) { + try { + controller.close() + } catch {} + } + } + }, + cancel() { + closed = true + logger.info('Client disconnected from buffered execution stream', { executionId }) + }, + }) +} diff --git a/apps/sim/lib/knowledge/connectors/sync-engine.ts b/apps/sim/lib/knowledge/connectors/sync-engine.ts index 0fe70313d74..260ae7fc89c 100644 --- a/apps/sim/lib/knowledge/connectors/sync-engine.ts +++ b/apps/sim/lib/knowledge/connectors/sync-engine.ts @@ -8,7 +8,9 @@ import { import { createLogger } from '@sim/logger' import { and, eq, gt, inArray, isNull, lt, ne, or, sql } from 'drizzle-orm' import { decryptApiKey } from '@/lib/api-key/crypto' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { getInternalApiBaseUrl } from '@/lib/core/utils/urls' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import type { DocumentData } from '@/lib/knowledge/documents/service' import { hardDeleteDocuments, @@ -140,8 +142,7 @@ export function resolveTagMapping( } /** - * Dispatch a connector sync — uses Trigger.dev when available, - * otherwise falls back to direct executeSync. + * Dispatch a connector sync using the configured background execution backend. */ export async function dispatchSync( connectorId: string, @@ -159,6 +160,38 @@ export async function dispatchSync( { tags: [`connector:${connectorId}`] } ) logger.info(`Dispatched connector sync to Trigger.dev`, { connectorId, requestId }) + } else if (isBullMQEnabled()) { + const connectorRows = await db + .select({ + workspaceId: knowledgeBase.workspaceId, + userId: knowledgeBase.userId, + }) + .from(knowledgeConnector) + .innerJoin(knowledgeBase, eq(knowledgeBase.id, knowledgeConnector.knowledgeBaseId)) + .where(eq(knowledgeConnector.id, connectorId)) + .limit(1) + + const workspaceId = connectorRows[0]?.workspaceId + const userId = connectorRows[0]?.userId + if (!workspaceId || !userId) { + throw new Error(`No workspace found for connector ${connectorId}`) + } + + await enqueueWorkspaceDispatch({ + workspaceId, + lane: 'knowledge', + queueName: 'knowledge-connector-sync', + bullmqJobName: 'knowledge-connector-sync', + bullmqPayload: createBullMQJobData({ + connectorId, + fullSync: options?.fullSync, + requestId, + }), + metadata: { + userId, + }, + }) + logger.info(`Dispatched connector sync to BullMQ`, { connectorId, requestId }) } else { executeSync(connectorId, { fullSync: options?.fullSync }).catch((error) => { logger.error(`Sync failed for connector ${connectorId}`, { diff --git a/apps/sim/lib/knowledge/documents/document-processor.ts b/apps/sim/lib/knowledge/documents/document-processor.ts index 72bf9007c9d..5ca6de84c9e 100644 --- a/apps/sim/lib/knowledge/documents/document-processor.ts +++ b/apps/sim/lib/knowledge/documents/document-processor.ts @@ -5,9 +5,10 @@ import { type Chunk, JsonYamlChunker, StructuredDataChunker, TextChunker } from import { env } from '@/lib/core/config/env' import { parseBuffer, parseFile } from '@/lib/file-parsers' import type { FileParseMetadata } from '@/lib/file-parsers/types' +import { resolveParserExtension } from '@/lib/knowledge/documents/parser-extension' import { retryWithExponentialBackoff } from '@/lib/knowledge/documents/utils' import { StorageService } from '@/lib/uploads' -import { getExtensionFromMimeType, isInternalFileUrl } from '@/lib/uploads/utils/file-utils' +import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils' import { downloadFileFromUrl } from '@/lib/uploads/utils/file-utils.server' import { mistralParserTool } from '@/tools/mistral/parser' @@ -759,10 +760,7 @@ async function parseDataURI(fileUrl: string, filename: string, mimeType: string) : decodeURIComponent(base64Data) } - const extension = - (filename.includes('.') ? filename.split('.').pop()?.toLowerCase() : undefined) || - getExtensionFromMimeType(mimeType) || - 'txt' + const extension = resolveParserExtension(filename, mimeType, 'txt') const buffer = Buffer.from(base64Data, 'base64') const result = await parseBuffer(buffer, extension) return result.content @@ -775,14 +773,7 @@ async function parseHttpFile( ): Promise<{ content: string; metadata?: FileParseMetadata }> { const buffer = await downloadFileWithTimeout(fileUrl) - let extension = filename.includes('.') ? filename.split('.').pop()?.toLowerCase() : undefined - if (!extension && mimeType) { - extension = getExtensionFromMimeType(mimeType) ?? undefined - } - if (!extension) { - throw new Error(`Could not determine file type for: ${filename}`) - } - + const extension = resolveParserExtension(filename, mimeType) const result = await parseBuffer(buffer, extension) return result } diff --git a/apps/sim/lib/knowledge/documents/parser-extension.test.ts b/apps/sim/lib/knowledge/documents/parser-extension.test.ts new file mode 100644 index 00000000000..4d65abdfef5 --- /dev/null +++ b/apps/sim/lib/knowledge/documents/parser-extension.test.ts @@ -0,0 +1,27 @@ +/** + * @vitest-environment node + */ +import { describe, expect, it } from 'vitest' +import { resolveParserExtension } from '@/lib/knowledge/documents/parser-extension' + +describe('resolveParserExtension', () => { + it('uses a supported filename extension when present', () => { + expect(resolveParserExtension('report.pdf', 'application/pdf')).toBe('pdf') + }) + + it('falls back to mime type when filename has no extension', () => { + expect( + resolveParserExtension('[Business] Your Thursday morning trip with Uber', 'text/plain') + ).toBe('txt') + }) + + it('falls back to mime type when filename extension is unsupported', () => { + expect(resolveParserExtension('uber-message.business', 'text/plain')).toBe('txt') + }) + + it('throws when neither filename nor mime type resolves to a supported parser', () => { + expect(() => + resolveParserExtension('uber-message.unknown', 'application/octet-stream') + ).toThrow('Unsupported file type') + }) +}) diff --git a/apps/sim/lib/knowledge/documents/parser-extension.ts b/apps/sim/lib/knowledge/documents/parser-extension.ts new file mode 100644 index 00000000000..ab32a7da1ed --- /dev/null +++ b/apps/sim/lib/knowledge/documents/parser-extension.ts @@ -0,0 +1,56 @@ +import { getExtensionFromMimeType } from '@/lib/uploads/utils/file-utils' + +const SUPPORTED_FILE_TYPES = [ + 'pdf', + 'csv', + 'docx', + 'doc', + 'txt', + 'md', + 'xlsx', + 'xls', + 'pptx', + 'ppt', + 'html', + 'htm', + 'json', + 'yaml', + 'yml', +] as const + +const SUPPORTED_FILE_TYPES_TEXT = SUPPORTED_FILE_TYPES.join(', ') + +function isSupportedParserExtension(extension: string): boolean { + return SUPPORTED_FILE_TYPES.includes(extension as (typeof SUPPORTED_FILE_TYPES)[number]) +} + +export function resolveParserExtension( + filename: string, + mimeType?: string, + fallback?: string +): string { + const filenameExtension = filename.includes('.') + ? filename.split('.').pop()?.toLowerCase() + : undefined + + if (filenameExtension && isSupportedParserExtension(filenameExtension)) { + return filenameExtension + } + + const mimeExtension = mimeType ? getExtensionFromMimeType(mimeType) : undefined + if (mimeExtension && isSupportedParserExtension(mimeExtension)) { + return mimeExtension + } + + if (fallback) { + return fallback + } + + if (filenameExtension) { + throw new Error( + `Unsupported file type: ${filenameExtension}. Supported types are: ${SUPPORTED_FILE_TYPES_TEXT}` + ) + } + + throw new Error(`Could not determine file type for ${filename || 'document'}`) +} diff --git a/apps/sim/lib/knowledge/documents/queue.ts b/apps/sim/lib/knowledge/documents/queue.ts deleted file mode 100644 index 31dd0879c70..00000000000 --- a/apps/sim/lib/knowledge/documents/queue.ts +++ /dev/null @@ -1,227 +0,0 @@ -import { createLogger } from '@sim/logger' -import { getRedisClient } from '@/lib/core/config/redis' -import { getStorageMethod, type StorageMethod } from '@/lib/core/storage' - -const logger = createLogger('DocumentQueue') - -interface QueueJob { - id: string - type: string - data: T - timestamp: number - attempts: number - maxAttempts: number -} - -interface QueueConfig { - maxConcurrent: number - retryDelay: number - maxRetries: number -} - -/** - * Document processing queue that uses either Redis or in-memory storage. - * Storage method is determined once at construction based on configuration. - * No switching on transient errors. - */ -export class DocumentProcessingQueue { - private config: QueueConfig - private storageMethod: StorageMethod - private processing = new Map>() - private inMemoryQueue: QueueJob[] = [] - private inMemoryProcessing = 0 - private processingStarted = false - - constructor(config: QueueConfig) { - this.config = config - this.storageMethod = getStorageMethod() - logger.info(`DocumentProcessingQueue using ${this.storageMethod} storage`) - } - - async addJob(type: string, data: T, options: { maxAttempts?: number } = {}): Promise { - const job: QueueJob = { - id: `${type}-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, - type, - data, - timestamp: Date.now(), - attempts: 0, - maxAttempts: options.maxAttempts || this.config.maxRetries, - } - - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis configured but client unavailable') - } - await redis.lpush('document-queue', JSON.stringify(job)) - logger.info(`Job ${job.id} added to Redis queue`) - } else { - this.inMemoryQueue.push(job) - logger.info(`Job ${job.id} added to in-memory queue`) - } - - return job.id - } - - async processJobs(processor: (job: QueueJob) => Promise): Promise { - if (this.processingStarted) { - logger.info('Queue processing already started, skipping') - return - } - - this.processingStarted = true - logger.info(`Starting queue processing (${this.storageMethod})`) - - if (this.storageMethod === 'redis') { - await this.processRedisJobs(processor) - } else { - await this.processInMemoryJobs(processor) - } - } - - private async processRedisJobs(processor: (job: QueueJob) => Promise) { - const redis = getRedisClient() - if (!redis) { - throw new Error('Redis configured but client unavailable') - } - - const processJobsContinuously = async () => { - while (true) { - if (this.processing.size >= this.config.maxConcurrent) { - await new Promise((resolve) => setTimeout(resolve, 100)) - continue - } - - try { - const result = await redis.rpop('document-queue') - if (!result) { - await new Promise((resolve) => setTimeout(resolve, 500)) - continue - } - - const job: QueueJob = JSON.parse(result) - const promise = this.executeJob(job, processor) - this.processing.set(job.id, promise) - - promise.finally(() => { - this.processing.delete(job.id) - }) - } catch (error: any) { - logger.error('Error processing Redis job:', error) - await new Promise((resolve) => setTimeout(resolve, 1000)) - } - } - } - - const processors = Array(this.config.maxConcurrent) - .fill(null) - .map(() => processJobsContinuously()) - - Promise.allSettled(processors).catch((error) => { - logger.error('Error in Redis queue processors:', error) - }) - } - - private async processInMemoryJobs(processor: (job: QueueJob) => Promise) { - const processInMemoryContinuously = async () => { - while (true) { - if (this.inMemoryProcessing >= this.config.maxConcurrent) { - await new Promise((resolve) => setTimeout(resolve, 100)) - continue - } - - const job = this.inMemoryQueue.shift() - if (!job) { - await new Promise((resolve) => setTimeout(resolve, 500)) - continue - } - - this.inMemoryProcessing++ - - this.executeJob(job, processor).finally(() => { - this.inMemoryProcessing-- - }) - } - } - - const processors = Array(this.config.maxConcurrent) - .fill(null) - .map(() => processInMemoryContinuously()) - - Promise.allSettled(processors).catch((error) => { - logger.error('Error in in-memory queue processors:', error) - }) - } - - private async executeJob( - job: QueueJob, - processor: (job: QueueJob) => Promise - ): Promise { - try { - job.attempts++ - logger.info(`Processing job ${job.id} (attempt ${job.attempts}/${job.maxAttempts})`) - - await processor(job) - logger.info(`Job ${job.id} completed successfully`) - } catch (error) { - logger.error(`Job ${job.id} failed (attempt ${job.attempts}):`, error) - - if (job.attempts < job.maxAttempts) { - const delay = this.config.retryDelay * 2 ** (job.attempts - 1) - - setTimeout(async () => { - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (!redis) { - logger.error('Redis unavailable for retry, job lost:', job.id) - return - } - await redis.lpush('document-queue', JSON.stringify(job)) - } else { - this.inMemoryQueue.push(job) - } - }, delay) - - logger.info(`Job ${job.id} will retry in ${delay}ms`) - } else { - logger.error(`Job ${job.id} failed permanently after ${job.attempts} attempts`) - } - } - } - - async getQueueStats(): Promise<{ - pending: number - processing: number - storageMethod: StorageMethod - }> { - let pending = 0 - - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (redis) { - pending = await redis.llen('document-queue') - } - } else { - pending = this.inMemoryQueue.length - } - - return { - pending, - processing: this.storageMethod === 'redis' ? this.processing.size : this.inMemoryProcessing, - storageMethod: this.storageMethod, - } - } - - async clearQueue(): Promise { - if (this.storageMethod === 'redis') { - const redis = getRedisClient() - if (redis) { - await redis.del('document-queue') - logger.info('Redis queue cleared') - } - } - - this.inMemoryQueue.length = 0 - logger.info('In-memory queue cleared') - } -} diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts index 5f7bf7a8447..5c802d1e699 100644 --- a/apps/sim/lib/knowledge/documents/service.ts +++ b/apps/sim/lib/knowledge/documents/service.ts @@ -25,10 +25,11 @@ import { type SQL, sql, } from 'drizzle-orm' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { env } from '@/lib/core/config/env' -import { getStorageMethod, isRedisStorage } from '@/lib/core/storage' +import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { processDocument } from '@/lib/knowledge/documents/document-processor' -import { DocumentProcessingQueue } from '@/lib/knowledge/documents/queue' import type { DocumentSortField, SortOrder } from '@/lib/knowledge/documents/types' import { generateEmbeddings } from '@/lib/knowledge/embeddings' import { @@ -87,22 +88,8 @@ const REDIS_PROCESSING_CONFIG = { delayBetweenDocuments: env.KB_CONFIG_DELAY_BETWEEN_DOCUMENTS || 50, } -let documentQueue: DocumentProcessingQueue | null = null - -export function getDocumentQueue(): DocumentProcessingQueue { - if (!documentQueue) { - const config = isRedisStorage() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG - documentQueue = new DocumentProcessingQueue({ - maxConcurrent: config.maxConcurrentDocuments, - retryDelay: env.KB_CONFIG_MIN_TIMEOUT || 1000, - maxRetries: env.KB_CONFIG_MAX_ATTEMPTS || 3, - }) - } - return documentQueue -} - export function getProcessingConfig() { - return isRedisStorage() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG + return isBullMQEnabled() ? REDIS_PROCESSING_CONFIG : PROCESSING_CONFIG } export interface DocumentData { @@ -134,6 +121,54 @@ export interface DocumentJobData { requestId: string } +export async function dispatchDocumentProcessingJob(payload: DocumentJobData): Promise { + if (isTriggerAvailable()) { + await tasks.trigger('knowledge-process-document', payload) + return + } + + if (isBullMQEnabled()) { + const workspaceRows = await db + .select({ + workspaceId: knowledgeBase.workspaceId, + userId: knowledgeBase.userId, + }) + .from(knowledgeBase) + .where(and(eq(knowledgeBase.id, payload.knowledgeBaseId), isNull(knowledgeBase.deletedAt))) + .limit(1) + + const workspaceId = workspaceRows[0]?.workspaceId + const userId = workspaceRows[0]?.userId + if (!workspaceId || !userId) { + throw new Error(`Knowledge base not found: ${payload.knowledgeBaseId}`) + } + + await enqueueWorkspaceDispatch({ + workspaceId, + lane: 'knowledge', + queueName: 'knowledge-process-document', + bullmqJobName: 'knowledge-process-document', + bullmqPayload: createBullMQJobData(payload), + metadata: { + userId, + }, + }) + return + } + + void processDocumentAsync( + payload.knowledgeBaseId, + payload.documentId, + payload.docData, + payload.processingOptions + ).catch((error) => { + logger.error(`[${payload.requestId}] Direct document processing failed`, { + documentId: payload.documentId, + error: error instanceof Error ? error.message : String(error), + }) + }) +} + export interface DocumentTagData { tagName: string fieldType: string @@ -322,7 +357,7 @@ export async function processDocumentTags( } /** - * Process documents with best available method: Trigger.dev > Redis queue > in-memory concurrency control + * Process documents with the configured background execution backend. */ export async function processDocumentsWithQueue( createdDocuments: DocumentData[], @@ -330,76 +365,29 @@ export async function processDocumentsWithQueue( processingOptions: ProcessingOptions, requestId: string ): Promise { - // Priority 1: Trigger.dev - if (isTriggerAvailable()) { - try { - logger.info( - `[${requestId}] Using Trigger.dev background processing for ${createdDocuments.length} documents` - ) - - const triggerPayloads = createdDocuments.map((doc) => ({ - knowledgeBaseId, - documentId: doc.documentId, - docData: { - filename: doc.filename, - fileUrl: doc.fileUrl, - fileSize: doc.fileSize, - mimeType: doc.mimeType, - }, - processingOptions, - requestId, - })) - - const result = await processDocumentsWithTrigger(triggerPayloads, requestId) - - if (result.success) { - logger.info( - `[${requestId}] Successfully triggered background processing: ${result.message}` - ) - return - } - logger.warn(`[${requestId}] Trigger.dev failed: ${result.message}, falling back to Redis`) - } catch (error) { - logger.warn(`[${requestId}] Trigger.dev processing failed, falling back to Redis:`, error) - } - } - - // Priority 2: Queue-based processing (Redis or in-memory based on storage method) - const queue = getDocumentQueue() - const storageMethod = getStorageMethod() + const jobPayloads = createdDocuments.map((doc) => ({ + knowledgeBaseId, + documentId: doc.documentId, + docData: { + filename: doc.filename, + fileUrl: doc.fileUrl, + fileSize: doc.fileSize, + mimeType: doc.mimeType, + }, + processingOptions, + requestId, + })) logger.info( - `[${requestId}] Using ${storageMethod} queue for ${createdDocuments.length} documents` - ) - - const jobPromises = createdDocuments.map((doc) => - queue.addJob('process-document', { - knowledgeBaseId, - documentId: doc.documentId, - docData: { - filename: doc.filename, - fileUrl: doc.fileUrl, - fileSize: doc.fileSize, - mimeType: doc.mimeType, - }, - processingOptions, - requestId, - }) + `[${requestId}] Dispatching background processing for ${jobPayloads.length} documents`, + { + backend: isTriggerAvailable() ? 'trigger-dev' : isBullMQEnabled() ? 'bullmq' : 'direct', + } ) - await Promise.all(jobPromises) - - queue - .processJobs(async (job) => { - const data = job.data as DocumentJobData - const { knowledgeBaseId, documentId, docData, processingOptions } = data - await processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions) - }) - .catch((error) => { - logger.error(`[${requestId}] Error in queue processing:`, error) - }) + await Promise.all(jobPayloads.map((payload) => dispatchDocumentProcessingJob(payload))) - logger.info(`[${requestId}] All documents queued for processing`) + logger.info(`[${requestId}] All documents dispatched for processing`) return } @@ -659,7 +647,7 @@ export async function processDocumentAsync( * Check if Trigger.dev is available and configured */ export function isTriggerAvailable(): boolean { - return !!(env.TRIGGER_SECRET_KEY && env.TRIGGER_DEV_ENABLED !== false) + return Boolean(env.TRIGGER_SECRET_KEY) && isTriggerDevEnabled } /** diff --git a/apps/sim/lib/logs/events.ts b/apps/sim/lib/logs/events.ts index bbf17b2320e..54392e2ebbf 100644 --- a/apps/sim/lib/logs/events.ts +++ b/apps/sim/lib/logs/events.ts @@ -12,6 +12,7 @@ import { } from '@/lib/notifications/alert-rules' import { getActiveWorkflowContext } from '@/lib/workflows/active-context' import { + enqueueNotificationDeliveryDispatch, executeNotificationDelivery, workspaceNotificationDeliveryTask, } from '@/background/workspace-notification-delivery' @@ -131,6 +132,7 @@ export async function emitWorkflowExecutionCompleted(log: WorkflowExecutionLog): const payload = { deliveryId, subscriptionId: subscription.id, + workspaceId, notificationType: subscription.notificationType, log: notificationLog, alertConfig: alertConfig || undefined, @@ -141,6 +143,10 @@ export async function emitWorkflowExecutionCompleted(log: WorkflowExecutionLog): logger.info( `Enqueued ${subscription.notificationType} notification ${deliveryId} via Trigger.dev` ) + } else if (await enqueueNotificationDeliveryDispatch(payload)) { + logger.info( + `Enqueued ${subscription.notificationType} notification ${deliveryId} via BullMQ` + ) } else { void executeNotificationDelivery(payload).catch((error) => { logger.error(`Direct notification delivery failed for ${deliveryId}`, { error }) diff --git a/apps/sim/lib/mothership/inbox/executor.ts b/apps/sim/lib/mothership/inbox/executor.ts index 06057f3244e..ae6ce93fbc9 100644 --- a/apps/sim/lib/mothership/inbox/executor.ts +++ b/apps/sim/lib/mothership/inbox/executor.ts @@ -1,6 +1,7 @@ import { copilotChats, db, mothershipInboxTask, permissions, user, workspace } from '@sim/db' import { createLogger } from '@sim/logger' import { and, eq, sql } from 'drizzle-orm' +import { createRunSegment } from '@/lib/copilot/async-runs/repository' import { resolveOrCreateChat } from '@/lib/copilot/chat-lifecycle' import { buildIntegrationToolSchemas } from '@/lib/copilot/chat-payload' import { requestChatTitle } from '@/lib/copilot/chat-streaming' @@ -187,10 +188,27 @@ export async function executeInboxTask(taskId: string): Promise { ...(fileAttachments.length > 0 ? { fileAttachments } : {}), } + const executionId = crypto.randomUUID() + const runId = crypto.randomUUID() + const runStreamId = crypto.randomUUID() + + if (chatId) { + await createRunSegment({ + id: runId, + executionId, + chatId, + userId, + workspaceId: ws.id, + streamId: runStreamId, + }).catch(() => {}) + } + const result = await orchestrateCopilotStream(requestPayload, { userId, workspaceId: ws.id, chatId: chatId ?? undefined, + executionId, + runId, goRoute: '/api/mothership/execute', autoExecuteTools: true, interactive: false, diff --git a/apps/sim/lib/notifications/inactivity-polling.ts b/apps/sim/lib/notifications/inactivity-polling.ts index 3a4505346d8..81aa0692dba 100644 --- a/apps/sim/lib/notifications/inactivity-polling.ts +++ b/apps/sim/lib/notifications/inactivity-polling.ts @@ -12,6 +12,7 @@ import { v4 as uuidv4 } from 'uuid' import { isTriggerDevEnabled } from '@/lib/core/config/feature-flags' import { TRIGGER_TYPES } from '@/lib/workflows/triggers/triggers' import { + enqueueNotificationDeliveryDispatch, executeNotificationDelivery, workspaceNotificationDeliveryTask, } from '@/background/workspace-notification-delivery' @@ -181,6 +182,7 @@ async function checkWorkflowInactivity( const payload = { deliveryId, subscriptionId: subscription.id, + workspaceId: workflowData.workspaceId, notificationType: subscription.notificationType, log: mockLog, alertConfig, @@ -188,6 +190,7 @@ async function checkWorkflowInactivity( if (isTriggerDevEnabled) { await workspaceNotificationDeliveryTask.trigger(payload) + } else if (await enqueueNotificationDeliveryDispatch(payload)) { } else { void executeNotificationDelivery(payload).catch((error) => { logger.error(`Direct notification delivery failed for ${deliveryId}`, { error }) diff --git a/apps/sim/lib/uploads/contexts/copilot/copilot-file-manager.ts b/apps/sim/lib/uploads/contexts/copilot/copilot-file-manager.ts index da61df38fce..7169bcb638e 100644 --- a/apps/sim/lib/uploads/contexts/copilot/copilot-file-manager.ts +++ b/apps/sim/lib/uploads/contexts/copilot/copilot-file-manager.ts @@ -5,8 +5,8 @@ import { generatePresignedDownloadUrl, generatePresignedUploadUrl, } from '@/lib/uploads/core/storage-service' -import { isImageFileType } from '@/lib/uploads/utils/file-utils' import type { PresignedUrlResponse } from '@/lib/uploads/shared/types' +import { isImageFileType } from '@/lib/uploads/utils/file-utils' const logger = createLogger('CopilotFileManager') diff --git a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts index 1d56d47b4b6..e247231c411 100644 --- a/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts +++ b/apps/sim/lib/uploads/contexts/workspace/workspace-file-manager.ts @@ -13,14 +13,10 @@ import { incrementStorageUsage, } from '@/lib/billing/storage' import { normalizeVfsSegment } from '@/lib/copilot/vfs/normalize-segment' -import { - downloadFile, - hasCloudStorage, - uploadFile, -} from '@/lib/uploads/core/storage-service' -import { getFileMetadataByKey, insertFileMetadata } from '@/lib/uploads/server/metadata' import { getPostgresErrorCode } from '@/lib/core/utils/pg-error' import { generateRestoreName } from '@/lib/core/utils/restore-name' +import { downloadFile, hasCloudStorage, uploadFile } from '@/lib/uploads/core/storage-service' +import { getFileMetadataByKey, insertFileMetadata } from '@/lib/uploads/server/metadata' import { isUuid, sanitizeFileName } from '@/executor/constants' import type { UserFile } from '@/executor/types' @@ -256,7 +252,10 @@ export async function uploadWorkspaceFile( } } - logger.error(`Failed to upload workspace file after ${MAX_UPLOAD_UNIQUE_RETRIES} attempts`, lastError) + logger.error( + `Failed to upload workspace file after ${MAX_UPLOAD_UNIQUE_RETRIES} attempts`, + lastError + ) throw new FileConflictError(fileName) } @@ -278,7 +277,13 @@ export async function trackChatUpload( const updated = await db .update(workspaceFiles) .set({ chatId, context: 'mothership' }) - .where(and(eq(workspaceFiles.key, s3Key), eq(workspaceFiles.workspaceId, workspaceId), isNull(workspaceFiles.deletedAt))) + .where( + and( + eq(workspaceFiles.key, s3Key), + eq(workspaceFiles.workspaceId, workspaceId), + isNull(workspaceFiles.deletedAt) + ) + ) .returning({ id: workspaceFiles.id }) if (updated.length > 0) { @@ -345,7 +350,10 @@ export async function listWorkspaceFiles( .from(workspaceFiles) .where( scope === 'all' - ? and(eq(workspaceFiles.workspaceId, workspaceId), eq(workspaceFiles.context, 'workspace')) + ? and( + eq(workspaceFiles.workspaceId, workspaceId), + eq(workspaceFiles.context, 'workspace') + ) : scope === 'archived' ? and( eq(workspaceFiles.workspaceId, workspaceId), @@ -414,7 +422,9 @@ export function normalizeWorkspaceFileReference(fileReference: string): string { /** * Canonical sandbox mount path for an existing workspace file. */ -export function getSandboxWorkspaceFilePath(file: Pick): string { +export function getSandboxWorkspaceFilePath( + file: Pick +): string { return `/home/user/files/${file.id}/${file.name}` } diff --git a/apps/sim/lib/uploads/utils/user-file-base64.server.ts b/apps/sim/lib/uploads/utils/user-file-base64.server.ts index f3abdf5acde..3aa2f219eb1 100644 --- a/apps/sim/lib/uploads/utils/user-file-base64.server.ts +++ b/apps/sim/lib/uploads/utils/user-file-base64.server.ts @@ -244,13 +244,13 @@ async function hydrateValue( * Hydrates UserFile objects within a value to include base64 content. * Returns the original structure with UserFile.base64 set where available. */ -export async function hydrateUserFilesWithBase64( - value: unknown, +export async function hydrateUserFilesWithBase64( + value: T, options: Base64HydrationOptions -): Promise { +): Promise { const logger = getHydrationLogger(options) const state = createHydrationState(options, logger) - return hydrateValue(value, options, state, logger) + return (await hydrateValue(value, options, state, logger)) as T } function isPlainObject(value: unknown): value is Record { diff --git a/apps/sim/lib/webhooks/processor.test.ts b/apps/sim/lib/webhooks/processor.test.ts index 20ae4408cd8..86876fda02b 100644 --- a/apps/sim/lib/webhooks/processor.test.ts +++ b/apps/sim/lib/webhooks/processor.test.ts @@ -9,12 +9,14 @@ const { mockUuidV4, mockPreprocessExecution, mockEnqueue, + mockEnqueueWorkspaceDispatch, mockGetJobQueue, mockShouldExecuteInline, } = vi.hoisted(() => ({ mockUuidV4: vi.fn(), mockPreprocessExecution: vi.fn(), mockEnqueue: vi.fn(), + mockEnqueueWorkspaceDispatch: vi.fn(), mockGetJobQueue: vi.fn(), mockShouldExecuteInline: vi.fn(), })) @@ -62,6 +64,15 @@ vi.mock('@/lib/core/async-jobs', () => ({ shouldExecuteInline: mockShouldExecuteInline, })) +vi.mock('@/lib/core/bullmq', () => ({ + isBullMQEnabled: vi.fn().mockReturnValue(true), + createBullMQJobData: vi.fn((payload: unknown, metadata?: unknown) => ({ payload, metadata })), +})) + +vi.mock('@/lib/core/workspace-dispatch', () => ({ + enqueueWorkspaceDispatch: mockEnqueueWorkspaceDispatch, +})) + vi.mock('@/lib/core/config/feature-flags', () => ({ isProd: false, })) @@ -142,6 +153,7 @@ describe('webhook processor execution identity', () => { actorUserId: 'actor-user-1', }) mockEnqueue.mockResolvedValue('job-1') + mockEnqueueWorkspaceDispatch.mockResolvedValue('job-1') mockGetJobQueue.mockResolvedValue({ enqueue: mockEnqueue }) mockShouldExecuteInline.mockReturnValue(false) mockUuidV4.mockReturnValue('generated-execution-id') @@ -202,15 +214,15 @@ describe('webhook processor execution identity', () => { ) expect(mockUuidV4).toHaveBeenCalledTimes(1) - expect(mockEnqueue).toHaveBeenCalledWith( - 'webhook-execution', - expect.objectContaining({ - executionId: 'generated-execution-id', - requestId: 'request-1', - correlation: preprocessingResult.correlation, - }), + expect(mockEnqueueWorkspaceDispatch).toHaveBeenCalledWith( expect.objectContaining({ + id: 'generated-execution-id', + workspaceId: 'workspace-1', + lane: 'runtime', + queueName: 'webhook-execution', metadata: expect.objectContaining({ + workflowId: 'workflow-1', + userId: 'actor-user-1', correlation: preprocessingResult.correlation, }), }) diff --git a/apps/sim/lib/webhooks/processor.ts b/apps/sim/lib/webhooks/processor.ts index 2561b2d794f..291a50917a6 100644 --- a/apps/sim/lib/webhooks/processor.ts +++ b/apps/sim/lib/webhooks/processor.ts @@ -7,8 +7,10 @@ import { v4 as uuidv4 } from 'uuid' import { isOrganizationOnTeamOrEnterprisePlan } from '@/lib/billing/core/subscription' import { getInlineJobQueue, getJobQueue, shouldExecuteInline } from '@/lib/core/async-jobs' import type { AsyncExecutionCorrelation } from '@/lib/core/async-jobs/types' +import { createBullMQJobData, isBullMQEnabled } from '@/lib/core/bullmq' import { isProd } from '@/lib/core/config/feature-flags' import { safeCompare } from '@/lib/core/security/encryption' +import { enqueueWorkspaceDispatch } from '@/lib/core/workspace-dispatch' import { getEffectiveDecryptedEnv } from '@/lib/environment/utils' import { preprocessExecution } from '@/lib/execution/preprocessing' import { @@ -1264,53 +1266,91 @@ export async function queueWebhookExecution( const isPolling = isPollingWebhookProvider(payload.provider) if (isPolling && !shouldExecuteInline()) { - const jobQueue = await getJobQueue() - const jobId = await jobQueue.enqueue('webhook-execution', payload, { - metadata: { - workflowId: foundWorkflow.id, - userId: actorUserId, - correlation, - }, - }) + const jobId = isBullMQEnabled() + ? await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: foundWorkflow.workspaceId, + lane: 'runtime', + queueName: 'webhook-execution', + bullmqJobName: 'webhook-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }), + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) + : await (await getJobQueue()).enqueue('webhook-execution', payload, { + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) logger.info( `[${options.requestId}] Queued polling webhook execution task ${jobId} for ${foundWebhook.provider} webhook via job queue` ) } else { const jobQueue = await getInlineJobQueue() - const jobId = await jobQueue.enqueue('webhook-execution', payload, { - metadata: { - workflowId: foundWorkflow.id, - userId: actorUserId, - correlation, - }, - }) + const jobId = isBullMQEnabled() + ? await enqueueWorkspaceDispatch({ + id: executionId, + workspaceId: foundWorkflow.workspaceId, + lane: 'runtime', + queueName: 'webhook-execution', + bullmqJobName: 'webhook-execution', + bullmqPayload: createBullMQJobData(payload, { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }), + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) + : await jobQueue.enqueue('webhook-execution', payload, { + metadata: { + workflowId: foundWorkflow.id, + userId: actorUserId, + correlation, + }, + }) logger.info( - `[${options.requestId}] Executing ${foundWebhook.provider} webhook ${jobId} inline` + `[${options.requestId}] Queued ${foundWebhook.provider} webhook execution ${jobId} via inline backend` ) - void (async () => { - try { - await jobQueue.startJob(jobId) - const output = await executeWebhookJob(payload) - await jobQueue.completeJob(jobId, output) - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - logger.error(`[${options.requestId}] Webhook execution failed`, { - jobId, - error: errorMessage, - }) + + if (shouldExecuteInline()) { + void (async () => { try { - await jobQueue.markJobFailed(jobId, errorMessage) - } catch (markFailedError) { - logger.error(`[${options.requestId}] Failed to mark job as failed`, { + await jobQueue.startJob(jobId) + const output = await executeWebhookJob(payload) + await jobQueue.completeJob(jobId, output) + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + logger.error(`[${options.requestId}] Webhook execution failed`, { jobId, - error: - markFailedError instanceof Error - ? markFailedError.message - : String(markFailedError), + error: errorMessage, }) + try { + await jobQueue.markJobFailed(jobId, errorMessage) + } catch (markFailedError) { + logger.error(`[${options.requestId}] Failed to mark job as failed`, { + jobId, + error: + markFailedError instanceof Error + ? markFailedError.message + : String(markFailedError), + }) + } } - } - })() + })() + } } if (foundWebhook.provider === 'microsoft-teams') { diff --git a/apps/sim/lib/workflows/executor/execution-events.ts b/apps/sim/lib/workflows/executor/execution-events.ts index e87f4d5971f..31eaacae6cd 100644 --- a/apps/sim/lib/workflows/executor/execution-events.ts +++ b/apps/sim/lib/workflows/executor/execution-events.ts @@ -242,18 +242,17 @@ export interface SSECallbackOptions { } /** - * Creates SSE callbacks for workflow execution streaming + * Creates execution callbacks using a provided event sink. */ -export function createSSECallbacks(options: SSECallbackOptions) { - const { executionId, workflowId, controller, isStreamClosed, setStreamClosed } = options +export function createExecutionCallbacks(options: { + executionId: string + workflowId: string + sendEvent: (event: ExecutionEvent) => void | Promise +}) { + const { executionId, workflowId, sendEvent } = options - const sendEvent = (event: ExecutionEvent) => { - if (isStreamClosed()) return - try { - controller.enqueue(encodeSSEEvent(event)) - } catch { - setStreamClosed() - } + const sendBufferedEvent = async (event: ExecutionEvent) => { + await sendEvent(event) } const onBlockStart = async ( @@ -264,7 +263,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { iterationContext?: IterationContext, childWorkflowContext?: ChildWorkflowContext ) => { - sendEvent({ + await sendBufferedEvent({ type: 'block:started', timestamp: new Date().toISOString(), executionId, @@ -331,7 +330,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { : {} if (hasError) { - sendEvent({ + await sendBufferedEvent({ type: 'block:error', timestamp: new Date().toISOString(), executionId, @@ -352,7 +351,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { }, }) } else { - sendEvent({ + await sendBufferedEvent({ type: 'block:completed', timestamp: new Date().toISOString(), executionId, @@ -386,7 +385,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { const { done, value } = await reader.read() if (done) break const chunk = decoder.decode(value, { stream: true }) - sendEvent({ + await sendBufferedEvent({ type: 'stream:chunk', timestamp: new Date().toISOString(), executionId, @@ -394,7 +393,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { data: { blockId, chunk }, }) } - sendEvent({ + await sendBufferedEvent({ type: 'stream:done', timestamp: new Date().toISOString(), executionId, @@ -414,7 +413,7 @@ export function createSSECallbacks(options: SSECallbackOptions) { iterationContext?: IterationContext, executionOrder?: number ) => { - sendEvent({ + void sendBufferedEvent({ type: 'block:childWorkflowStarted', timestamp: new Date().toISOString(), executionId, @@ -431,5 +430,33 @@ export function createSSECallbacks(options: SSECallbackOptions) { }) } - return { sendEvent, onBlockStart, onBlockComplete, onStream, onChildWorkflowInstanceReady } + return { + sendEvent: sendBufferedEvent, + onBlockStart, + onBlockComplete, + onStream, + onChildWorkflowInstanceReady, + } +} + +/** + * Creates SSE callbacks for workflow execution streaming + */ +export function createSSECallbacks(options: SSECallbackOptions) { + const { executionId, workflowId, controller, isStreamClosed, setStreamClosed } = options + + const sendEvent = (event: ExecutionEvent) => { + if (isStreamClosed()) return + try { + controller.enqueue(encodeSSEEvent(event)) + } catch { + setStreamClosed() + } + } + + return createExecutionCallbacks({ + executionId, + workflowId, + sendEvent, + }) } diff --git a/apps/sim/lib/workflows/executor/queued-workflow-execution.ts b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts new file mode 100644 index 00000000000..c60ba860a11 --- /dev/null +++ b/apps/sim/lib/workflows/executor/queued-workflow-execution.ts @@ -0,0 +1,339 @@ +import { createLogger } from '@sim/logger' +import { createTimeoutAbortController, getTimeoutErrorMessage } from '@/lib/core/execution-limits' +import { createExecutionEventWriter, setExecutionMeta } from '@/lib/execution/event-buffer' +import { LoggingSession } from '@/lib/logs/execution/logging-session' +import { buildTraceSpans } from '@/lib/logs/execution/trace-spans/trace-spans' +import { + cleanupExecutionBase64Cache, + hydrateUserFilesWithBase64, +} from '@/lib/uploads/utils/user-file-base64.server' +import { + executeWorkflowCore, + wasExecutionFinalizedByCore, +} from '@/lib/workflows/executor/execution-core' +import { + createExecutionCallbacks, + type ExecutionEvent, +} from '@/lib/workflows/executor/execution-events' +import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' +import { ExecutionSnapshot } from '@/executor/execution/snapshot' +import type { ExecutionMetadata, SerializableExecutionState } from '@/executor/execution/types' +import type { BlockLog, NormalizedBlockOutput } from '@/executor/types' +import { hasExecutionResult } from '@/executor/utils/errors' + +const logger = createLogger('QueuedWorkflowExecution') + +export const DIRECT_WORKFLOW_JOB_NAME = 'direct-workflow-execution' + +export interface QueuedWorkflowExecutionPayload { + workflow: Record + metadata: ExecutionMetadata + input: unknown + variables: Record + selectedOutputs?: string[] + includeFileBase64?: boolean + base64MaxBytes?: number + stopAfterBlockId?: string + timeoutMs?: number + runFromBlock?: { + startBlockId: string + sourceSnapshot: SerializableExecutionState + } + streamEvents?: boolean +} + +export interface QueuedWorkflowExecutionResult { + success: boolean + executionId: string + output: NormalizedBlockOutput + error?: string + logs?: BlockLog[] + status: 'success' | 'cancelled' | 'paused' | 'failed' + statusCode?: number + metadata?: { + duration?: number + startTime?: string + endTime?: string + } +} + +function buildResult( + status: QueuedWorkflowExecutionResult['status'], + result: { + success: boolean + output: NormalizedBlockOutput + error?: string + logs?: BlockLog[] + metadata?: { + duration?: number + startTime?: string + endTime?: string + } + }, + executionId: string, + statusCode?: number +): QueuedWorkflowExecutionResult { + return { + success: result.success, + executionId, + output: result.output, + error: result.error, + logs: result.logs, + status, + statusCode, + metadata: result.metadata, + } +} + +export async function executeQueuedWorkflowJob( + payload: QueuedWorkflowExecutionPayload +): Promise { + const { metadata } = payload + const { executionId, requestId, workflowId, triggerType } = metadata + const loggingSession = new LoggingSession(workflowId, executionId, triggerType, requestId) + const timeoutController = createTimeoutAbortController(payload.timeoutMs) + const eventWriter = payload.streamEvents ? createExecutionEventWriter(executionId) : null + + if (payload.streamEvents) { + await setExecutionMeta(executionId, { + status: 'active', + userId: metadata.userId, + workflowId, + }) + } + + try { + const snapshot = new ExecutionSnapshot( + metadata, + payload.workflow, + payload.input, + payload.variables, + payload.selectedOutputs ?? [] + ) + + let callbacks = {} + + if (eventWriter) { + const executionCallbacks = createExecutionCallbacks({ + executionId, + workflowId, + sendEvent: async (event: ExecutionEvent) => { + await eventWriter.write(event) + }, + }) + + callbacks = { + onBlockStart: executionCallbacks.onBlockStart, + onBlockComplete: executionCallbacks.onBlockComplete, + onStream: executionCallbacks.onStream, + onChildWorkflowInstanceReady: executionCallbacks.onChildWorkflowInstanceReady, + } + + await executionCallbacks.sendEvent({ + type: 'execution:started', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + startTime: metadata.startTime, + }, + }) + } + + const result = await executeWorkflowCore({ + snapshot, + callbacks, + loggingSession, + includeFileBase64: payload.includeFileBase64, + base64MaxBytes: payload.base64MaxBytes, + stopAfterBlockId: payload.stopAfterBlockId, + runFromBlock: payload.runFromBlock, + abortSignal: timeoutController.signal, + }) + + if ( + result.status === 'cancelled' && + timeoutController.isTimedOut() && + timeoutController.timeoutMs + ) { + const timeoutErrorMessage = getTimeoutErrorMessage(null, timeoutController.timeoutMs) + await loggingSession.markAsFailed(timeoutErrorMessage) + + if (eventWriter) { + await eventWriter.write({ + type: 'execution:error', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + error: timeoutErrorMessage, + duration: result.metadata?.duration || 0, + }, + }) + + await setExecutionMeta(executionId, { status: 'error' }) + } + + return buildResult( + 'cancelled', + { + success: false, + output: result.output, + error: timeoutErrorMessage, + logs: result.logs, + metadata: result.metadata + ? { + duration: result.metadata.duration, + startTime: result.metadata.startTime, + endTime: result.metadata.endTime, + } + : undefined, + }, + executionId, + 408 + ) + } + + if (result.status === 'paused') { + if (!result.snapshotSeed) { + await loggingSession.markAsFailed('Missing snapshot seed for paused execution') + } else { + await PauseResumeManager.persistPauseResult({ + workflowId, + executionId, + pausePoints: result.pausePoints || [], + snapshotSeed: result.snapshotSeed, + executorUserId: result.metadata?.userId, + }) + } + } else { + await PauseResumeManager.processQueuedResumes(executionId) + } + + const outputWithBase64 = payload.includeFileBase64 + ? await hydrateUserFilesWithBase64(result.output, { + requestId, + executionId, + maxBytes: payload.base64MaxBytes, + }) + : result.output + + if (eventWriter) { + if (result.status === 'cancelled') { + await eventWriter.write({ + type: 'execution:cancelled', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + duration: result.metadata?.duration || 0, + }, + }) + await setExecutionMeta(executionId, { status: 'cancelled' }) + } else { + await eventWriter.write({ + type: 'execution:completed', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + success: result.success, + output: outputWithBase64, + duration: result.metadata?.duration || 0, + startTime: result.metadata?.startTime || metadata.startTime, + endTime: result.metadata?.endTime || new Date().toISOString(), + }, + }) + await setExecutionMeta(executionId, { status: 'complete' }) + } + } + + return buildResult( + result.status === 'paused' + ? 'paused' + : result.status === 'cancelled' + ? 'cancelled' + : 'success', + { + success: result.success, + output: outputWithBase64, + error: result.error, + logs: result.logs, + metadata: result.metadata + ? { + duration: result.metadata.duration, + startTime: result.metadata.startTime, + endTime: result.metadata.endTime, + } + : undefined, + }, + executionId + ) + } catch (error) { + logger.error('Queued workflow execution failed', { + workflowId, + executionId, + error: error instanceof Error ? error.message : String(error), + }) + + if (!wasExecutionFinalizedByCore(error, executionId)) { + const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + const { traceSpans } = executionResult ? buildTraceSpans(executionResult) : { traceSpans: [] } + await loggingSession.safeCompleteWithError({ + error: { + message: error instanceof Error ? error.message : String(error), + stackTrace: error instanceof Error ? error.stack : undefined, + }, + traceSpans, + }) + } + + if (eventWriter) { + await eventWriter.write({ + type: 'execution:error', + timestamp: new Date().toISOString(), + executionId, + workflowId, + data: { + error: error instanceof Error ? error.message : String(error), + duration: 0, + }, + }) + await setExecutionMeta(executionId, { status: 'error' }) + } + + const executionResult = hasExecutionResult(error) ? error.executionResult : undefined + + return buildResult( + 'failed', + { + success: false, + output: executionResult?.output ?? {}, + error: executionResult?.error || (error instanceof Error ? error.message : String(error)), + logs: executionResult?.logs, + metadata: executionResult?.metadata + ? { + duration: executionResult.metadata.duration, + startTime: executionResult.metadata.startTime, + endTime: executionResult.metadata.endTime, + } + : undefined, + }, + executionId, + 500 + ) + } finally { + timeoutController.cleanup() + + if (eventWriter) { + await eventWriter.close() + } + + await cleanupExecutionBase64Cache(executionId).catch((error) => { + logger.error('Failed to cleanup queued workflow base64 cache', { + executionId, + error: error instanceof Error ? error.message : String(error), + }) + }) + } +} diff --git a/apps/sim/lib/workflows/utils.ts b/apps/sim/lib/workflows/utils.ts index 0fe34094093..bf5cd48e30b 100644 --- a/apps/sim/lib/workflows/utils.ts +++ b/apps/sim/lib/workflows/utils.ts @@ -249,7 +249,9 @@ export async function updateWorkflowRunCounts(workflowId: string, runs = 1) { } } -export const workflowHasResponseBlock = (executionResult: ExecutionResult): boolean => { +export const workflowHasResponseBlock = ( + executionResult: Pick +): boolean => { if (!executionResult?.logs || !Array.isArray(executionResult.logs) || !executionResult.success) { return false } @@ -261,7 +263,9 @@ export const workflowHasResponseBlock = (executionResult: ExecutionResult): bool return responseBlock !== undefined } -export const createHttpResponseFromBlock = (executionResult: ExecutionResult): NextResponse => { +export const createHttpResponseFromBlock = ( + executionResult: Pick +): NextResponse => { const { data = {}, status = 200, headers = {} } = executionResult.output const responseHeaders = new Headers({ diff --git a/apps/sim/package.json b/apps/sim/package.json index 981f39c7d93..03c91227186 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -11,10 +11,16 @@ "dev": "next dev --port 3000", "dev:webpack": "next dev --webpack", "dev:sockets": "bun run socket/index.ts", - "dev:full": "bunx concurrently -n \"App,Realtime\" -c \"cyan,magenta\" \"bun run dev\" \"bun run dev:sockets\"", + "dev:worker": "bun run worker/index.ts", + "dev:full": "bunx concurrently -n \"App,Realtime,Worker\" -c \"cyan,magenta,yellow\" \"bun run dev\" \"bun run dev:sockets\" \"bun run dev:worker\"", + "load:workflow": "bun run load:workflow:baseline", + "load:workflow:baseline": "BASE_URL=${BASE_URL:-http://localhost:3000} WARMUP_DURATION=${WARMUP_DURATION:-10} WARMUP_RATE=${WARMUP_RATE:-2} PEAK_RATE=${PEAK_RATE:-8} HOLD_DURATION=${HOLD_DURATION:-20} bunx artillery run scripts/load/workflow-concurrency.yml", + "load:workflow:waves": "BASE_URL=${BASE_URL:-http://localhost:3000} WAVE_ONE_DURATION=${WAVE_ONE_DURATION:-10} WAVE_ONE_RATE=${WAVE_ONE_RATE:-6} QUIET_DURATION=${QUIET_DURATION:-5} WAVE_TWO_DURATION=${WAVE_TWO_DURATION:-15} WAVE_TWO_RATE=${WAVE_TWO_RATE:-8} WAVE_THREE_DURATION=${WAVE_THREE_DURATION:-20} WAVE_THREE_RATE=${WAVE_THREE_RATE:-10} bunx artillery run scripts/load/workflow-waves.yml", + "load:workflow:isolation": "BASE_URL=${BASE_URL:-http://localhost:3000} ISOLATION_DURATION=${ISOLATION_DURATION:-30} TOTAL_RATE=${TOTAL_RATE:-9} WORKSPACE_A_WEIGHT=${WORKSPACE_A_WEIGHT:-8} WORKSPACE_B_WEIGHT=${WORKSPACE_B_WEIGHT:-1} bunx artillery run scripts/load/workflow-isolation.yml", "build": "bun run build:pptx-worker && next build", "build:pptx-worker": "bun build ./lib/execution/pptx-worker.cjs --target=node --format=cjs --outfile ./dist/pptx-worker.cjs", "start": "next start", + "worker": "NODE_ENV=production bun run worker/index.ts", "prepare": "cd ../.. && bun husky", "test": "vitest run", "test:watch": "vitest", @@ -92,6 +98,7 @@ "better-auth-harmony": "1.3.1", "binary-extensions": "^2.0.0", "browser-image-compression": "^2.0.2", + "bullmq": "5.71.0", "chalk": "5.6.2", "chart.js": "4.5.1", "cheerio": "1.1.2", diff --git a/apps/sim/scripts/load/README.md b/apps/sim/scripts/load/README.md new file mode 100644 index 00000000000..926f84fad57 --- /dev/null +++ b/apps/sim/scripts/load/README.md @@ -0,0 +1,113 @@ +# Workflow Load Tests + +These local-only Artillery scenarios exercise `POST /api/workflows/[id]/execute` in async mode. + +## Requirements + +- The app should be running locally, for example with `bun run dev:full` +- Each scenario needs valid workflow IDs and API keys +- All scenarios default to `http://localhost:3000` + +The default rates are tuned for these local limits: + +- `ADMISSION_GATE_MAX_INFLIGHT=500` +- `DISPATCH_MAX_QUEUE_PER_WORKSPACE=1000` +- `DISPATCH_MAX_QUEUE_GLOBAL=50000` +- `WORKSPACE_CONCURRENCY_FREE=5` +- `WORKSPACE_CONCURRENCY_PRO=50` +- `WORKSPACE_CONCURRENCY_TEAM=200` +- `WORKSPACE_CONCURRENCY_ENTERPRISE=200` + +That means the defaults are intentionally aimed at forcing queueing for a Free workspace without overwhelming a single local dev server process. + +## Baseline Concurrency + +Use this to ramp traffic into one workflow and observe normal queueing behavior. + +Default profile: + +- Starts at `2` requests per second +- Ramps to `8` requests per second +- Holds there for `20` seconds +- Good for validating queueing against a Free workspace concurrency of `5` + +```bash +WORKFLOW_ID= \ +SIM_API_KEY= \ +bun run load:workflow:baseline +``` + +Optional variables: + +- `BASE_URL` +- `WARMUP_DURATION` +- `WARMUP_RATE` +- `PEAK_RATE` +- `HOLD_DURATION` + +For higher-plan workspaces, a good local starting point is: + +- Pro: `PEAK_RATE=20` to `40` +- Team or Enterprise: `PEAK_RATE=50` to `100` + +## Queueing Waves + +Use this to send repeated bursts to one workflow in the same workspace. + +Default profile: + +- Wave 1: `6` requests per second for `10` seconds +- Wave 2: `8` requests per second for `15` seconds +- Wave 3: `10` requests per second for `20` seconds +- Quiet gaps: `5` seconds + +```bash +WORKFLOW_ID= \ +SIM_API_KEY= \ +bun run load:workflow:waves +``` + +Optional variables: + +- `BASE_URL` +- `WAVE_ONE_DURATION` +- `WAVE_ONE_RATE` +- `QUIET_DURATION` +- `WAVE_TWO_DURATION` +- `WAVE_TWO_RATE` +- `WAVE_THREE_DURATION` +- `WAVE_THREE_RATE` + +## Two-Workspace Isolation + +Use this to send mixed traffic to two workflows from different workspaces and compare whether one workspace's queue pressure appears to affect the other. + +Default profile: + +- Total rate: `9` requests per second for `30` seconds +- Weight split: `8:1` +- In practice this sends heavy pressure to workspace A while still sending a light stream to workspace B + +```bash +WORKFLOW_ID_A= \ +SIM_API_KEY_A= \ +WORKFLOW_ID_B= \ +SIM_API_KEY_B= \ +bun run load:workflow:isolation +``` + +Optional variables: + +- `BASE_URL` +- `ISOLATION_DURATION` +- `TOTAL_RATE` +- `WORKSPACE_A_WEIGHT` +- `WORKSPACE_B_WEIGHT` + +## Notes + +- `load:workflow` is an alias for `load:workflow:baseline` +- All scenarios send `x-execution-mode: async` +- Artillery output will show request counts and response codes, which is usually enough for quick local verification +- At these defaults, you should observe queueing behavior before you approach `ADMISSION_GATE_MAX_INFLIGHT=500` or `DISPATCH_MAX_QUEUE_PER_WORKSPACE=1000` +- If you still see lots of `429` or `ETIMEDOUT` responses locally, lower the rates again before increasing durations diff --git a/apps/sim/scripts/load/workflow-concurrency.yml b/apps/sim/scripts/load/workflow-concurrency.yml new file mode 100644 index 00000000000..a981d438e91 --- /dev/null +++ b/apps/sim/scripts/load/workflow-concurrency.yml @@ -0,0 +1,24 @@ +config: + target: "{{ $processEnvironment.BASE_URL }}" + phases: + - duration: "{{ $processEnvironment.WARMUP_DURATION }}" + arrivalRate: "{{ $processEnvironment.WARMUP_RATE }}" + rampTo: "{{ $processEnvironment.PEAK_RATE }}" + name: baseline-ramp + - duration: "{{ $processEnvironment.HOLD_DURATION }}" + arrivalRate: "{{ $processEnvironment.PEAK_RATE }}" + name: baseline-hold + defaults: + headers: + content-type: application/json + x-api-key: "{{ $processEnvironment.SIM_API_KEY }}" + x-execution-mode: async +scenarios: + - name: baseline-workflow-concurrency + flow: + - post: + url: "/api/workflows/{{ $processEnvironment.WORKFLOW_ID }}/execute" + json: + input: + source: artillery-baseline + runId: "{{ $uuid }}" diff --git a/apps/sim/scripts/load/workflow-isolation.yml b/apps/sim/scripts/load/workflow-isolation.yml new file mode 100644 index 00000000000..1dbadb83a8d --- /dev/null +++ b/apps/sim/scripts/load/workflow-isolation.yml @@ -0,0 +1,35 @@ +config: + target: "{{ $processEnvironment.BASE_URL }}" + phases: + - duration: "{{ $processEnvironment.ISOLATION_DURATION }}" + arrivalRate: "{{ $processEnvironment.TOTAL_RATE }}" + name: mixed-workspace-load + defaults: + headers: + content-type: application/json + x-execution-mode: async +scenarios: + - name: workspace-a-traffic + weight: "{{ $processEnvironment.WORKSPACE_A_WEIGHT }}" + flow: + - post: + url: "/api/workflows/{{ $processEnvironment.WORKFLOW_ID_A }}/execute" + headers: + x-api-key: "{{ $processEnvironment.SIM_API_KEY_A }}" + json: + input: + source: artillery-isolation + workspace: a + runId: "{{ $uuid }}" + - name: workspace-b-traffic + weight: "{{ $processEnvironment.WORKSPACE_B_WEIGHT }}" + flow: + - post: + url: "/api/workflows/{{ $processEnvironment.WORKFLOW_ID_B }}/execute" + headers: + x-api-key: "{{ $processEnvironment.SIM_API_KEY_B }}" + json: + input: + source: artillery-isolation + workspace: b + runId: "{{ $uuid }}" diff --git a/apps/sim/scripts/load/workflow-waves.yml b/apps/sim/scripts/load/workflow-waves.yml new file mode 100644 index 00000000000..dd20f6dc0f6 --- /dev/null +++ b/apps/sim/scripts/load/workflow-waves.yml @@ -0,0 +1,33 @@ +config: + target: "{{ $processEnvironment.BASE_URL }}" + phases: + - duration: "{{ $processEnvironment.WAVE_ONE_DURATION }}" + arrivalRate: "{{ $processEnvironment.WAVE_ONE_RATE }}" + name: wave-one + - duration: "{{ $processEnvironment.QUIET_DURATION }}" + arrivalRate: 1 + name: quiet-gap + - duration: "{{ $processEnvironment.WAVE_TWO_DURATION }}" + arrivalRate: "{{ $processEnvironment.WAVE_TWO_RATE }}" + name: wave-two + - duration: "{{ $processEnvironment.QUIET_DURATION }}" + arrivalRate: 1 + name: quiet-gap-two + - duration: "{{ $processEnvironment.WAVE_THREE_DURATION }}" + arrivalRate: "{{ $processEnvironment.WAVE_THREE_RATE }}" + name: wave-three + defaults: + headers: + content-type: application/json + x-api-key: "{{ $processEnvironment.SIM_API_KEY }}" + x-execution-mode: async +scenarios: + - name: workflow-queue-waves + flow: + - post: + url: "/api/workflows/{{ $processEnvironment.WORKFLOW_ID }}/execute" + json: + input: + source: artillery-waves + runId: "{{ $uuid }}" + waveProfile: single-workspace diff --git a/apps/sim/worker/health.ts b/apps/sim/worker/health.ts new file mode 100644 index 00000000000..f2a5fea3c11 --- /dev/null +++ b/apps/sim/worker/health.ts @@ -0,0 +1,77 @@ +import { createServer } from 'http' +import { createLogger } from '@sim/logger' +import { getRedisClient } from '@/lib/core/config/redis' + +const logger = createLogger('BullMQWorkerHealth') + +export interface WorkerHealthServer { + close: () => Promise +} + +interface WorkerHealthCheck { + redisConnected: boolean + dispatcherLastWakeAt: number +} + +let healthState: WorkerHealthCheck = { + redisConnected: false, + dispatcherLastWakeAt: 0, +} + +export function updateWorkerHealthState(update: Partial): void { + healthState = { ...healthState, ...update } +} + +export function startWorkerHealthServer(port: number): WorkerHealthServer { + const server = createServer((req, res) => { + if (req.method === 'GET' && req.url === '/health') { + const redis = getRedisClient() + const redisConnected = redis !== null + const dispatcherActive = + healthState.dispatcherLastWakeAt > 0 && + Date.now() - healthState.dispatcherLastWakeAt < 30_000 + + const healthy = redisConnected && dispatcherActive + + res.writeHead(healthy ? 200 : 503, { 'Content-Type': 'application/json' }) + res.end( + JSON.stringify({ + ok: healthy, + redis: redisConnected, + dispatcher: dispatcherActive, + lastWakeAgoMs: healthState.dispatcherLastWakeAt + ? Date.now() - healthState.dispatcherLastWakeAt + : null, + }) + ) + return + } + + if (req.method === 'GET' && req.url === '/health/live') { + res.writeHead(200, { 'Content-Type': 'application/json' }) + res.end(JSON.stringify({ ok: true })) + return + } + + res.writeHead(404, { 'Content-Type': 'application/json' }) + res.end(JSON.stringify({ error: 'Not found' })) + }) + + server.listen(port, '0.0.0.0', () => { + logger.info(`Worker health server listening on port ${port}`) + }) + + return { + close: () => + new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error) + return + } + + resolve() + }) + }), + } +} diff --git a/apps/sim/worker/index.ts b/apps/sim/worker/index.ts new file mode 100644 index 00000000000..aaf71dd5aab --- /dev/null +++ b/apps/sim/worker/index.ts @@ -0,0 +1,190 @@ +import { createLogger } from '@sim/logger' +import { Worker } from 'bullmq' +import { + getBullMQConnectionOptions, + isBullMQEnabled, + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + MOTHERSHIP_JOB_EXECUTION_QUEUE, + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, +} from '@/lib/core/bullmq' +import { wakeWorkspaceDispatcher } from '@/lib/core/workspace-dispatch' +import { sweepPendingNotificationDeliveries } from '@/background/workspace-notification-delivery' +import { startWorkerHealthServer, updateWorkerHealthState } from '@/worker/health' +import { processKnowledgeConnectorSync } from '@/worker/processors/knowledge-connector-sync' +import { processKnowledgeDocument } from '@/worker/processors/knowledge-document-processing' +import { processMothershipJobExecution } from '@/worker/processors/mothership-job-execution' +import { processSchedule } from '@/worker/processors/schedule' +import { processWebhook } from '@/worker/processors/webhook' +import { processWorkflow } from '@/worker/processors/workflow' +import { processWorkspaceNotificationDelivery } from '@/worker/processors/workspace-notification-delivery' + +const logger = createLogger('BullMQWorker') + +const DEFAULT_WORKER_PORT = 3001 +const DEFAULT_WORKFLOW_CONCURRENCY = 50 +const DEFAULT_WEBHOOK_CONCURRENCY = 30 +const DEFAULT_SCHEDULE_CONCURRENCY = 20 +const DEFAULT_MOTHERSHIP_JOB_CONCURRENCY = 10 +const DEFAULT_CONNECTOR_SYNC_CONCURRENCY = 5 +const DEFAULT_DOCUMENT_PROCESSING_CONCURRENCY = 20 +const DEFAULT_NOTIFICATION_DELIVERY_CONCURRENCY = 10 +const DISPATCHER_WAKE_INTERVAL_MS = 5_000 +const NOTIFICATION_SWEEPER_INTERVAL_MS = 10_000 + +function parseWorkerNumber(value: string | undefined, fallback: number): number { + const parsed = Number.parseInt(value ?? '', 10) + return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback +} + +async function main() { + const workerPort = parseWorkerNumber(process.env.WORKER_PORT, DEFAULT_WORKER_PORT) + const healthServer = startWorkerHealthServer(workerPort) + + if (!isBullMQEnabled()) { + logger.warn('BullMQ worker started without REDIS_URL; worker will remain idle') + + const shutdownWithoutRedis = async () => { + await healthServer.close() + process.exit(0) + } + + process.on('SIGINT', shutdownWithoutRedis) + process.on('SIGTERM', shutdownWithoutRedis) + return + } + + const connection = getBullMQConnectionOptions() + + const workflowWorker = new Worker('workflow-execution', processWorkflow, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_WORKFLOW, + DEFAULT_WORKFLOW_CONCURRENCY + ), + }) + + const webhookWorker = new Worker('webhook-execution', processWebhook, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_WEBHOOK, + DEFAULT_WEBHOOK_CONCURRENCY + ), + }) + + const scheduleWorker = new Worker('schedule-execution', processSchedule, { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_SCHEDULE, + DEFAULT_SCHEDULE_CONCURRENCY + ), + }) + + const mothershipJobWorker = new Worker( + MOTHERSHIP_JOB_EXECUTION_QUEUE, + processMothershipJobExecution, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_MOTHERSHIP_JOB, + DEFAULT_MOTHERSHIP_JOB_CONCURRENCY + ), + } + ) + + const connectorSyncWorker = new Worker( + KNOWLEDGE_CONNECTOR_SYNC_QUEUE, + processKnowledgeConnectorSync, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_CONNECTOR_SYNC, + DEFAULT_CONNECTOR_SYNC_CONCURRENCY + ), + } + ) + + const documentProcessingWorker = new Worker( + KNOWLEDGE_DOCUMENT_PROCESSING_QUEUE, + processKnowledgeDocument, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_DOCUMENT_PROCESSING, + DEFAULT_DOCUMENT_PROCESSING_CONCURRENCY + ), + } + ) + + const notificationDeliveryWorker = new Worker( + WORKSPACE_NOTIFICATION_DELIVERY_QUEUE, + processWorkspaceNotificationDelivery, + { + connection, + concurrency: parseWorkerNumber( + process.env.WORKER_CONCURRENCY_NOTIFICATION_DELIVERY, + DEFAULT_NOTIFICATION_DELIVERY_CONCURRENCY + ), + } + ) + + const workers = [ + workflowWorker, + webhookWorker, + scheduleWorker, + mothershipJobWorker, + connectorSyncWorker, + documentProcessingWorker, + notificationDeliveryWorker, + ] + + for (const worker of workers) { + worker.on('failed', (job, error) => { + logger.error('BullMQ job failed', { + queue: worker.name, + jobId: job?.id, + name: job?.name, + error: error.message, + }) + }) + } + + const dispatcherWakeInterval = setInterval(() => { + void wakeWorkspaceDispatcher() + .then(() => { + updateWorkerHealthState({ dispatcherLastWakeAt: Date.now() }) + }) + .catch((error) => { + logger.error('Periodic workspace dispatcher wake failed', { error }) + }) + }, DISPATCHER_WAKE_INTERVAL_MS) + dispatcherWakeInterval.unref() + + const notificationSweeperInterval = setInterval(() => { + void sweepPendingNotificationDeliveries().catch((error) => { + logger.error('Pending notification sweeper failed', { error }) + }) + }, NOTIFICATION_SWEEPER_INTERVAL_MS) + notificationSweeperInterval.unref() + + const shutdown = async () => { + logger.info('Shutting down BullMQ worker') + + clearInterval(dispatcherWakeInterval) + clearInterval(notificationSweeperInterval) + await Promise.allSettled(workers.map((worker) => worker.close())) + await healthServer.close() + + process.exit(0) + } + + process.on('SIGINT', shutdown) + process.on('SIGTERM', shutdown) +} + +main().catch((error) => { + logger.error('Failed to start BullMQ worker', { + error: error instanceof Error ? error.message : String(error), + }) + process.exit(1) +}) diff --git a/apps/sim/worker/processors/knowledge-connector-sync.ts b/apps/sim/worker/processors/knowledge-connector-sync.ts new file mode 100644 index 00000000000..9a504ebde0f --- /dev/null +++ b/apps/sim/worker/processors/knowledge-connector-sync.ts @@ -0,0 +1,22 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeSync } from '@/lib/knowledge/connectors/sync-engine' +import type { ConnectorSyncPayload } from '@/background/knowledge-connector-sync' + +const logger = createLogger('BullMQKnowledgeConnectorSync') + +export async function processKnowledgeConnectorSync(job: Job>) { + const { connectorId, fullSync } = job.data.payload + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing connector sync job', { + jobId: job.id, + connectorId, + }) + + return runDispatchedJob(job.data.metadata, () => executeSync(connectorId, { fullSync }), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/knowledge-document-processing.ts b/apps/sim/worker/processors/knowledge-document-processing.ts new file mode 100644 index 00000000000..74fff94fb7b --- /dev/null +++ b/apps/sim/worker/processors/knowledge-document-processing.ts @@ -0,0 +1,26 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { type DocumentJobData, processDocumentAsync } from '@/lib/knowledge/documents/service' + +const logger = createLogger('BullMQKnowledgeDocumentProcessing') + +export async function processKnowledgeDocument(job: Job>) { + const { knowledgeBaseId, documentId, docData, processingOptions } = job.data.payload + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing knowledge document job', { + jobId: job.id, + knowledgeBaseId, + documentId, + }) + + await runDispatchedJob( + job.data.metadata, + () => processDocumentAsync(knowledgeBaseId, documentId, docData, processingOptions), + { + isFinalAttempt, + } + ) +} diff --git a/apps/sim/worker/processors/mothership-job-execution.ts b/apps/sim/worker/processors/mothership-job-execution.ts new file mode 100644 index 00000000000..d5980394388 --- /dev/null +++ b/apps/sim/worker/processors/mothership-job-execution.ts @@ -0,0 +1,20 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeJobInline, type JobExecutionPayload } from '@/background/schedule-execution' + +const logger = createLogger('BullMQMothershipJobExecution') + +export async function processMothershipJobExecution(job: Job>) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing mothership scheduled job', { + jobId: job.id, + scheduleId: job.data.payload.scheduleId, + }) + + await runDispatchedJob(job.data.metadata, () => executeJobInline(job.data.payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/schedule.ts b/apps/sim/worker/processors/schedule.ts new file mode 100644 index 00000000000..78f4cde7d79 --- /dev/null +++ b/apps/sim/worker/processors/schedule.ts @@ -0,0 +1,21 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeScheduleJob, type ScheduleExecutionPayload } from '@/background/schedule-execution' + +const logger = createLogger('BullMQScheduleProcessor') + +export async function processSchedule(job: Job>) { + const { payload } = job.data + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing schedule job', { + jobId: job.id, + name: job.name, + }) + + return runDispatchedJob(job.data.metadata, () => executeScheduleJob(payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/webhook.ts b/apps/sim/worker/processors/webhook.ts new file mode 100644 index 00000000000..da61aede1c8 --- /dev/null +++ b/apps/sim/worker/processors/webhook.ts @@ -0,0 +1,21 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { executeWebhookJob, type WebhookExecutionPayload } from '@/background/webhook-execution' + +const logger = createLogger('BullMQWebhookProcessor') + +export async function processWebhook(job: Job>) { + const { payload } = job.data + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing webhook job', { + jobId: job.id, + name: job.name, + }) + + return runDispatchedJob(job.data.metadata, () => executeWebhookJob(payload), { + isFinalAttempt, + }) +} diff --git a/apps/sim/worker/processors/workflow.ts b/apps/sim/worker/processors/workflow.ts new file mode 100644 index 00000000000..8648e76b556 --- /dev/null +++ b/apps/sim/worker/processors/workflow.ts @@ -0,0 +1,51 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { + DIRECT_WORKFLOW_JOB_NAME, + executeQueuedWorkflowJob, + type QueuedWorkflowExecutionPayload, +} from '@/lib/workflows/executor/queued-workflow-execution' +import { executeWorkflowJob, type WorkflowExecutionPayload } from '@/background/workflow-execution' + +const logger = createLogger('BullMQWorkflowProcessor') + +type WorkflowQueueJobData = + | BullMQJobData + | BullMQJobData + +function isDirectWorkflowJob( + job: Job +): job is Job> { + return job.name === DIRECT_WORKFLOW_JOB_NAME +} + +function isBackgroundWorkflowJob( + job: Job +): job is Job> { + return job.name !== DIRECT_WORKFLOW_JOB_NAME +} + +export async function processWorkflow(job: Job) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing workflow job', { + jobId: job.id, + name: job.name, + }) + + if (isDirectWorkflowJob(job)) { + return runDispatchedJob(job.data.metadata, () => executeQueuedWorkflowJob(job.data.payload), { + isFinalAttempt, + }) + } + + if (isBackgroundWorkflowJob(job)) { + return runDispatchedJob(job.data.metadata, () => executeWorkflowJob(job.data.payload), { + isFinalAttempt, + }) + } + + throw new Error(`Unsupported workflow job type: ${job.name}`) +} diff --git a/apps/sim/worker/processors/workspace-notification-delivery.ts b/apps/sim/worker/processors/workspace-notification-delivery.ts new file mode 100644 index 00000000000..7b6dbbc6da1 --- /dev/null +++ b/apps/sim/worker/processors/workspace-notification-delivery.ts @@ -0,0 +1,32 @@ +import { createLogger } from '@sim/logger' +import type { Job } from 'bullmq' +import type { BullMQJobData } from '@/lib/core/bullmq' +import { runDispatchedJob } from '@/lib/core/workspace-dispatch' +import { + executeNotificationDelivery, + type NotificationDeliveryParams, +} from '@/background/workspace-notification-delivery' + +const logger = createLogger('BullMQWorkspaceNotificationDelivery') + +export async function processWorkspaceNotificationDelivery( + job: Job> +) { + const isFinalAttempt = job.attemptsMade + 1 >= (job.opts.attempts ?? 1) + + logger.info('Processing workspace notification delivery job', { + jobId: job.id, + deliveryId: job.data.payload.deliveryId, + }) + + const result = await runDispatchedJob( + job.data.metadata, + () => executeNotificationDelivery(job.data.payload), + { + isFinalAttempt, + } + ) + + // Retry scheduling is persisted in the notification delivery row and + // rehydrated by the periodic sweeper, which makes retries crash-safe. +} diff --git a/bun.lock b/bun.lock index c75c40f7233..de08d9dbc64 100644 --- a/bun.lock +++ b/bun.lock @@ -1,6 +1,5 @@ { "lockfileVersion": 1, - "configVersion": 0, "workspaces": { "": { "name": "simstudio", @@ -117,6 +116,7 @@ "better-auth-harmony": "1.3.1", "binary-extensions": "^2.0.0", "browser-image-compression": "^2.0.2", + "bullmq": "5.71.0", "chalk": "5.6.2", "chart.js": "4.5.1", "cheerio": "1.1.2", @@ -829,6 +829,18 @@ "@mongodb-js/saslprep": ["@mongodb-js/saslprep@1.4.6", "", { "dependencies": { "sparse-bitfield": "^3.0.3" } }, "sha512-y+x3H1xBZd38n10NZF/rEBlvDOOMQ6LKUTHqr8R9VkJ+mmQOYtJFxIlkkK8fZrtOiL6VixbOBWMbZGBdal3Z1g=="], + "@msgpackr-extract/msgpackr-extract-darwin-arm64": ["@msgpackr-extract/msgpackr-extract-darwin-arm64@3.0.3", "", { "os": "darwin", "cpu": "arm64" }, "sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw=="], + + "@msgpackr-extract/msgpackr-extract-darwin-x64": ["@msgpackr-extract/msgpackr-extract-darwin-x64@3.0.3", "", { "os": "darwin", "cpu": "x64" }, "sha512-mdzd3AVzYKuUmiWOQ8GNhl64/IoFGol569zNRdkLReh6LRLHOXxU4U8eq0JwaD8iFHdVGqSy4IjFL4reoWCDFw=="], + + "@msgpackr-extract/msgpackr-extract-linux-arm": ["@msgpackr-extract/msgpackr-extract-linux-arm@3.0.3", "", { "os": "linux", "cpu": "arm" }, "sha512-fg0uy/dG/nZEXfYilKoRe7yALaNmHoYeIoJuJ7KJ+YyU2bvY8vPv27f7UKhGRpY6euFYqEVhxCFZgAUNQBM3nw=="], + + "@msgpackr-extract/msgpackr-extract-linux-arm64": ["@msgpackr-extract/msgpackr-extract-linux-arm64@3.0.3", "", { "os": "linux", "cpu": "arm64" }, "sha512-YxQL+ax0XqBJDZiKimS2XQaf+2wDGVa1enVRGzEvLLVFeqa5kx2bWbtcSXgsxjQB7nRqqIGFIcLteF/sHeVtQg=="], + + "@msgpackr-extract/msgpackr-extract-linux-x64": ["@msgpackr-extract/msgpackr-extract-linux-x64@3.0.3", "", { "os": "linux", "cpu": "x64" }, "sha512-cvwNfbP07pKUfq1uH+S6KJ7dT9K8WOE4ZiAcsrSes+UY55E/0jLYc+vq+DO7jlmqRb5zAggExKm0H7O/CBaesg=="], + + "@msgpackr-extract/msgpackr-extract-win32-x64": ["@msgpackr-extract/msgpackr-extract-win32-x64@3.0.3", "", { "os": "win32", "cpu": "x64" }, "sha512-x0fWaQtYp4E6sktbsdAqnehxDgEc/VwM7uLsRCYWaiGu0ykYdZPiS8zCWdnjHwyiumousxfBm4SO31eXqwEZhQ=="], + "@napi-rs/canvas": ["@napi-rs/canvas@0.1.97", "", { "optionalDependencies": { "@napi-rs/canvas-android-arm64": "0.1.97", "@napi-rs/canvas-darwin-arm64": "0.1.97", "@napi-rs/canvas-darwin-x64": "0.1.97", "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.97", "@napi-rs/canvas-linux-arm64-gnu": "0.1.97", "@napi-rs/canvas-linux-arm64-musl": "0.1.97", "@napi-rs/canvas-linux-riscv64-gnu": "0.1.97", "@napi-rs/canvas-linux-x64-gnu": "0.1.97", "@napi-rs/canvas-linux-x64-musl": "0.1.97", "@napi-rs/canvas-win32-arm64-msvc": "0.1.97", "@napi-rs/canvas-win32-x64-msvc": "0.1.97" } }, "sha512-8cFniXvrIEnVwuNSRCW9wirRZbHvrD3JVujdS2P5n5xiJZNZMOZcfOvJ1pb66c7jXMKHHglJEDVJGbm8XWFcXQ=="], "@napi-rs/canvas-android-arm64": ["@napi-rs/canvas-android-arm64@0.1.97", "", { "os": "android", "cpu": "arm64" }, "sha512-V1c/WVw+NzH8vk7ZK/O8/nyBSCQimU8sfMsB/9qeSvdkGKNU7+mxy/bIF0gTgeBFmHpj30S4E9WHMSrxXGQuVQ=="], @@ -1829,6 +1841,8 @@ "buildcheck": ["buildcheck@0.0.7", "", {}, "sha512-lHblz4ahamxpTmnsk+MNTRWsjYKv965MwOrSJyeD588rR3Jcu7swE+0wN5F+PbL5cjgu/9ObkhfzEPuofEMwLA=="], + "bullmq": ["bullmq@5.71.0", "", { "dependencies": { "cron-parser": "4.9.0", "ioredis": "5.9.3", "msgpackr": "1.11.5", "node-abort-controller": "3.1.1", "semver": "7.7.4", "tslib": "2.8.1", "uuid": "11.1.0" } }, "sha512-aeNWh4drsafSKnAJeiNH/nZP/5O8ZdtdMbnOPZmpjXj7NZUP5YC901U3bIH41iZValm7d1i3c34ojv7q31m30w=="], + "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], "c12": ["c12@3.1.0", "", { "dependencies": { "chokidar": "^4.0.3", "confbox": "^0.2.2", "defu": "^6.1.4", "dotenv": "^16.6.1", "exsolve": "^1.0.7", "giget": "^2.0.0", "jiti": "^2.4.2", "ohash": "^2.0.11", "pathe": "^2.0.3", "perfect-debounce": "^1.0.0", "pkg-types": "^2.2.0", "rc9": "^2.1.2" }, "peerDependencies": { "magicast": "^0.3.5" }, "optionalPeers": ["magicast"] }, "sha512-uWoS8OU1MEIsOv8p/5a82c3H31LsWVR5qiyXVfBNOzfffjUWtPnhAb4BYI2uG2HfGmZmFjCtui5XNWaps+iFuw=="], @@ -1969,6 +1983,8 @@ "critters": ["critters@0.0.25", "", { "dependencies": { "chalk": "^4.1.0", "css-select": "^5.1.0", "dom-serializer": "^2.0.0", "domhandler": "^5.0.2", "htmlparser2": "^8.0.2", "postcss": "^8.4.23", "postcss-media-query-parser": "^0.2.3" } }, "sha512-ROF/tjJyyRdM8/6W0VqoN5Ql05xAGnkf5b7f3sTEl1bI5jTQQf8O918RD/V9tEb9pRY/TKcvJekDbJtniHyPtQ=="], + "cron-parser": ["cron-parser@4.9.0", "", { "dependencies": { "luxon": "^3.2.1" } }, "sha512-p0SaNjrHOnQeR8/VnfGbmg9te2kfyYSQ7Sc/j/6DtPL3JQvKxmjO9TSjNFpujqV3vEYYBvNNvXSxzyksBWAx1Q=="], + "croner": ["croner@9.1.0", "", {}, "sha512-p9nwwR4qyT5W996vBZhdvBCnMhicY5ytZkR4D1Xj0wuTDEiMnjwR57Q3RXYY/s0EpX6Ay3vgIcfaR+ewGHsi+g=="], "cronstrue": ["cronstrue@3.3.0", "", { "bin": { "cronstrue": "bin/cli.js" } }, "sha512-iwJytzJph1hosXC09zY8F5ACDJKerr0h3/2mOxg9+5uuFObYlgK0m35uUPk4GCvhHc2abK7NfnR9oMqY0qZFAg=="], @@ -2695,6 +2711,8 @@ "lucide-react": ["lucide-react@0.511.0", "", { "peerDependencies": { "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, "sha512-VK5a2ydJ7xm8GvBeKLS9mu1pVK6ucef9780JVUjw6bAjJL/QXnd4Y0p7SPeOUMC27YhzNCZvm5d/QX0Tp3rc0w=="], + "luxon": ["luxon@3.7.2", "", {}, "sha512-vtEhXh/gNjI9Yg1u4jX/0YVPMvxzHuGgCm6tC5kZyb08yjGWGnqAjGJvcXbqQR2P3MyMEFnRbpcdFS6PBcLqew=="], + "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], "magicast": ["magicast@0.3.5", "", { "dependencies": { "@babel/parser": "^7.25.4", "@babel/types": "^7.25.4", "source-map-js": "^1.2.0" } }, "sha512-L0WhttDl+2BOsybvEOLK7fW3UA0OQ0IQ2d6Zl2x/a6vVRs3bAY0ECOSHHeL5jD+SbOpOCUEi0y1DgHEn9Qn1AQ=="], @@ -2885,6 +2903,10 @@ "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], + "msgpackr": ["msgpackr@1.11.5", "", { "optionalDependencies": { "msgpackr-extract": "^3.0.2" } }, "sha512-UjkUHN0yqp9RWKy0Lplhh+wlpdt9oQBYgULZOiFhV3VclSF1JnSQWZ5r9gORQlNYaUKQoR8itv7g7z1xDDuACA=="], + + "msgpackr-extract": ["msgpackr-extract@3.0.3", "", { "dependencies": { "node-gyp-build-optional-packages": "5.2.2" }, "optionalDependencies": { "@msgpackr-extract/msgpackr-extract-darwin-arm64": "3.0.3", "@msgpackr-extract/msgpackr-extract-darwin-x64": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-arm": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-arm64": "3.0.3", "@msgpackr-extract/msgpackr-extract-linux-x64": "3.0.3", "@msgpackr-extract/msgpackr-extract-win32-x64": "3.0.3" }, "bin": { "download-msgpackr-prebuilds": "bin/download-prebuilds.js" } }, "sha512-P0efT1C9jIdVRefqjzOQ9Xml57zpOXnIuS+csaB4MdZbTdmGDLo8XhzBG1N7aO11gKDDkJvBLULeFTo46wwreA=="], + "mustache": ["mustache@4.2.0", "", { "bin": { "mustache": "bin/mustache" } }, "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ=="], "mute-stream": ["mute-stream@0.0.8", "", {}, "sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA=="], @@ -2925,6 +2947,8 @@ "node-abi": ["node-abi@3.89.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA=="], + "node-abort-controller": ["node-abort-controller@3.1.1", "", {}, "sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ=="], + "node-domexception": ["node-domexception@1.0.0", "", {}, "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="], "node-ensure": ["node-ensure@0.0.0", "", {}, "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw=="], @@ -2935,6 +2959,8 @@ "node-gyp-build": ["node-gyp-build@4.8.4", "", { "bin": { "node-gyp-build": "bin.js", "node-gyp-build-optional": "optional.js", "node-gyp-build-test": "build-test.js" } }, "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ=="], + "node-gyp-build-optional-packages": ["node-gyp-build-optional-packages@5.2.2", "", { "dependencies": { "detect-libc": "^2.0.1" }, "bin": { "node-gyp-build-optional-packages": "bin.js", "node-gyp-build-optional-packages-optional": "optional.js", "node-gyp-build-optional-packages-test": "build-test.js" } }, "sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw=="], + "node-int64": ["node-int64@0.4.0", "", {}, "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw=="], "node-readable-to-web-readable-stream": ["node-readable-to-web-readable-stream@0.4.2", "", {}, "sha512-/cMZNI34v//jUTrI+UIo4ieHAB5EZRY/+7OmXZgBxaWBMcW2tGdceIw06RFxWxrKZ5Jp3sI2i5TsRo+CBhtVLQ=="], @@ -4161,6 +4187,8 @@ "body-parser/iconv-lite": ["iconv-lite@0.7.1", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-2Tth85cXwGFHfvRgZWszZSvdo+0Xsqmw8k8ZwxScfcBneNUraK+dxRxRm24nszx80Y0TVio8kKLt5sLE7ZCLlw=="], + "bullmq/ioredis": ["ioredis@5.9.3", "", { "dependencies": { "@ioredis/commands": "1.5.0", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-VI5tMCdeoxZWU5vjHWsiE/Su76JGhBvWF1MJnV9ZtGltHk9BmD48oDq8Tj8haZ85aceXZMxLNDQZRVo5QKNgXA=="], + "c12/chokidar": ["chokidar@4.0.3", "", { "dependencies": { "readdirp": "^4.0.1" } }, "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA=="], "c12/confbox": ["confbox@0.2.4", "", {}, "sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ=="], @@ -4649,6 +4677,8 @@ "bl/readable-stream/string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="], + "bullmq/ioredis/@ioredis/commands": ["@ioredis/commands@1.5.0", "", {}, "sha512-eUgLqrMf8nJkZxT24JvVRrQya1vZkQh8BBeYNwGDqa5I0VUi8ACx7uFvAaLxintokpTenkK6DASvo/bvNbBGow=="], + "c12/chokidar/readdirp": ["readdirp@4.1.2", "", {}, "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg=="], "cheerio/htmlparser2/entities": ["entities@7.0.1", "", {}, "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA=="], diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 527c8d86b37..4c03862e35b 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -23,6 +23,9 @@ services: - OLLAMA_URL=${OLLAMA_URL:-http://localhost:11434} - SOCKET_SERVER_URL=${SOCKET_SERVER_URL:-http://realtime:3002} - NEXT_PUBLIC_SOCKET_URL=${NEXT_PUBLIC_SOCKET_URL:-http://localhost:3002} + - ADMISSION_GATE_MAX_INFLIGHT=${ADMISSION_GATE_MAX_INFLIGHT:-500} + - DISPATCH_MAX_QUEUE_PER_WORKSPACE=${DISPATCH_MAX_QUEUE_PER_WORKSPACE:-1000} + - DISPATCH_MAX_QUEUE_GLOBAL=${DISPATCH_MAX_QUEUE_GLOBAL:-50000} depends_on: db: condition: service_healthy @@ -37,6 +40,43 @@ services: retries: 3 start_period: 10s + sim-worker: + image: ghcr.io/simstudioai/simstudio:latest + command: ['bun', 'run', 'worker'] + restart: unless-stopped + deploy: + resources: + limits: + memory: 4G + environment: + - NODE_ENV=production + - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} + - REDIS_URL=${REDIS_URL:-} + - ENCRYPTION_KEY=${ENCRYPTION_KEY} + - API_ENCRYPTION_KEY=${API_ENCRYPTION_KEY:-} + - INTERNAL_API_SECRET=${INTERNAL_API_SECRET} + - WORKER_PORT=${WORKER_PORT:-3001} + - WORKER_CONCURRENCY_WORKFLOW=${WORKER_CONCURRENCY_WORKFLOW:-50} + - WORKER_CONCURRENCY_WEBHOOK=${WORKER_CONCURRENCY_WEBHOOK:-30} + - WORKER_CONCURRENCY_SCHEDULE=${WORKER_CONCURRENCY_SCHEDULE:-20} + - WORKER_CONCURRENCY_MOTHERSHIP_JOB=${WORKER_CONCURRENCY_MOTHERSHIP_JOB:-10} + - WORKER_CONCURRENCY_CONNECTOR_SYNC=${WORKER_CONCURRENCY_CONNECTOR_SYNC:-5} + - WORKER_CONCURRENCY_DOCUMENT_PROCESSING=${WORKER_CONCURRENCY_DOCUMENT_PROCESSING:-20} + - WORKER_CONCURRENCY_NOTIFICATION_DELIVERY=${WORKER_CONCURRENCY_NOTIFICATION_DELIVERY:-10} + - DISPATCH_MAX_QUEUE_PER_WORKSPACE=${DISPATCH_MAX_QUEUE_PER_WORKSPACE:-1000} + - DISPATCH_MAX_QUEUE_GLOBAL=${DISPATCH_MAX_QUEUE_GLOBAL:-50000} + depends_on: + db: + condition: service_healthy + migrations: + condition: service_completed_successfully + healthcheck: + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health'] + interval: 90s + timeout: 5s + retries: 3 + start_period: 10s + realtime: image: ghcr.io/simstudioai/realtime:latest restart: unless-stopped diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index 9b59e1b7582..8d75b73692e 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -124,6 +124,11 @@ app: OLLAMA_URL: "" # Ollama local LLM server URL ELEVENLABS_API_KEY: "" # ElevenLabs API key for text-to-speech in deployed chat + # Admission & Dispatch Queue Configuration + ADMISSION_GATE_MAX_INFLIGHT: "500" # Max concurrent in-flight execution requests per pod + DISPATCH_MAX_QUEUE_PER_WORKSPACE: "1000" # Max queued dispatch jobs per workspace + DISPATCH_MAX_QUEUE_GLOBAL: "50000" # Max queued dispatch jobs globally + # Rate Limiting Configuration (per minute) RATE_LIMIT_WINDOW_MS: "60000" # Rate limit window duration (1 minute) RATE_LIMIT_FREE_SYNC: "50" # Sync API executions per minute From d944b47548c67de4827d13b2577f28be090f068b Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 15:57:40 -0700 Subject: [PATCH 03/65] fix(linear): add default null for after cursor (#3814) --- apps/sim/blocks/blocks/linear.ts | 40 +++++++++---------- apps/sim/tools/linear/list_attachments.ts | 2 +- apps/sim/tools/linear/list_comments.ts | 2 +- .../tools/linear/list_customer_requests.ts | 2 +- .../tools/linear/list_customer_statuses.ts | 2 +- apps/sim/tools/linear/list_customer_tiers.ts | 2 +- apps/sim/tools/linear/list_customers.ts | 2 +- apps/sim/tools/linear/list_cycles.ts | 2 +- apps/sim/tools/linear/list_favorites.ts | 2 +- apps/sim/tools/linear/list_issue_relations.ts | 2 +- apps/sim/tools/linear/list_labels.ts | 2 +- apps/sim/tools/linear/list_notifications.ts | 2 +- apps/sim/tools/linear/list_project_labels.ts | 4 +- .../tools/linear/list_project_milestones.ts | 2 +- .../sim/tools/linear/list_project_statuses.ts | 2 +- apps/sim/tools/linear/list_project_updates.ts | 2 +- apps/sim/tools/linear/list_projects.ts | 2 +- apps/sim/tools/linear/list_teams.ts | 2 +- apps/sim/tools/linear/list_users.ts | 2 +- apps/sim/tools/linear/list_workflow_states.ts | 2 +- apps/sim/tools/linear/read_issues.ts | 4 +- apps/sim/tools/linear/search_issues.ts | 2 +- 22 files changed, 43 insertions(+), 43 deletions(-) diff --git a/apps/sim/blocks/blocks/linear.ts b/apps/sim/blocks/blocks/linear.ts index 504cb6b5ffa..18dad4d8574 100644 --- a/apps/sim/blocks/blocks/linear.ts +++ b/apps/sim/blocks/blocks/linear.ts @@ -1532,7 +1532,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n projectId: effectiveProjectId || undefined, includeArchived: params.includeArchived, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_get_issue': @@ -1599,7 +1599,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n teamId: effectiveTeamId, includeArchived: params.includeArchived, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_add_label_to_issue': @@ -1650,7 +1650,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, issueId: params.issueId.trim(), first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_list_projects': @@ -1659,7 +1659,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n teamId: effectiveTeamId, includeArchived: params.includeArchived, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_get_project': @@ -1714,7 +1714,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n return { ...baseParams, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_get_viewer': @@ -1725,7 +1725,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, teamId: effectiveTeamId, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_create_label': @@ -1764,7 +1764,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, teamId: effectiveTeamId, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_create_workflow_state': @@ -1795,7 +1795,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, teamId: effectiveTeamId, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_get_cycle': @@ -1860,7 +1860,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, issueId: params.issueId.trim(), first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_update_attachment': @@ -1901,7 +1901,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, issueId: params.issueId.trim(), first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_delete_issue_relation': @@ -1927,7 +1927,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n return { ...baseParams, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_create_project_update': @@ -1949,14 +1949,14 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, projectId: effectiveProjectId, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_list_notifications': return { ...baseParams, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_update_notification': @@ -1988,7 +1988,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n return { ...baseParams, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, includeArchived: false, } @@ -2023,7 +2023,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n return { ...baseParams, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, includeArchived: false, } @@ -2117,7 +2117,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n return { ...baseParams, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } // Customer Tier Operations @@ -2159,7 +2159,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n return { ...baseParams, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } // Project Management Operations @@ -2212,7 +2212,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, projectId: effectiveProjectId || undefined, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } case 'linear_add_label_to_project': @@ -2277,7 +2277,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n ...baseParams, projectId: params.projectIdForMilestone.trim(), first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } // Project Status Operations @@ -2328,7 +2328,7 @@ Return ONLY the date string in YYYY-MM-DD format - no explanations, no quotes, n return { ...baseParams, first: params.first ? Number(params.first) : undefined, - after: params.after, + after: params.after?.trim() || undefined, } default: diff --git a/apps/sim/tools/linear/list_attachments.ts b/apps/sim/tools/linear/list_attachments.ts index b7270c655c0..0eac48b829b 100644 --- a/apps/sim/tools/linear/list_attachments.ts +++ b/apps/sim/tools/linear/list_attachments.ts @@ -76,7 +76,7 @@ export const linearListAttachmentsTool: ToolConfig< variables: { issueId: params.issueId, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_comments.ts b/apps/sim/tools/linear/list_comments.ts index c14251a5028..05eb77103ae 100644 --- a/apps/sim/tools/linear/list_comments.ts +++ b/apps/sim/tools/linear/list_comments.ts @@ -76,7 +76,7 @@ export const linearListCommentsTool: ToolConfig< variables: { issueId: params.issueId, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_customer_requests.ts b/apps/sim/tools/linear/list_customer_requests.ts index eb5a4857005..abf406e7680 100644 --- a/apps/sim/tools/linear/list_customer_requests.ts +++ b/apps/sim/tools/linear/list_customer_requests.ts @@ -89,7 +89,7 @@ export const linearListCustomerRequestsTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, includeArchived: params.includeArchived || false, }, }), diff --git a/apps/sim/tools/linear/list_customer_statuses.ts b/apps/sim/tools/linear/list_customer_statuses.ts index bb36d51a2c4..b7f3ea7d76b 100644 --- a/apps/sim/tools/linear/list_customer_statuses.ts +++ b/apps/sim/tools/linear/list_customer_statuses.ts @@ -70,7 +70,7 @@ export const linearListCustomerStatusesTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_customer_tiers.ts b/apps/sim/tools/linear/list_customer_tiers.ts index 5b16c968ddf..cecfe9e653c 100644 --- a/apps/sim/tools/linear/list_customer_tiers.ts +++ b/apps/sim/tools/linear/list_customer_tiers.ts @@ -69,7 +69,7 @@ export const linearListCustomerTiersTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_customers.ts b/apps/sim/tools/linear/list_customers.ts index 4aa4fe75a5f..09db5ab6499 100644 --- a/apps/sim/tools/linear/list_customers.ts +++ b/apps/sim/tools/linear/list_customers.ts @@ -76,7 +76,7 @@ export const linearListCustomersTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, includeArchived: params.includeArchived || false, }, }), diff --git a/apps/sim/tools/linear/list_cycles.ts b/apps/sim/tools/linear/list_cycles.ts index b351bc77b3b..5a3ee3d078c 100644 --- a/apps/sim/tools/linear/list_cycles.ts +++ b/apps/sim/tools/linear/list_cycles.ts @@ -80,7 +80,7 @@ export const linearListCyclesTool: ToolConfig 0 ? filter : undefined, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, } }, diff --git a/apps/sim/tools/linear/list_favorites.ts b/apps/sim/tools/linear/list_favorites.ts index b26a9deb93a..79e6cd0b01d 100644 --- a/apps/sim/tools/linear/list_favorites.ts +++ b/apps/sim/tools/linear/list_favorites.ts @@ -71,7 +71,7 @@ export const linearListFavoritesTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_issue_relations.ts b/apps/sim/tools/linear/list_issue_relations.ts index 9ff785f26a2..b008ba45fc4 100644 --- a/apps/sim/tools/linear/list_issue_relations.ts +++ b/apps/sim/tools/linear/list_issue_relations.ts @@ -79,7 +79,7 @@ export const linearListIssueRelationsTool: ToolConfig< variables: { issueId: params.issueId, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_labels.ts b/apps/sim/tools/linear/list_labels.ts index ecb891e5c01..ec1f66e375f 100644 --- a/apps/sim/tools/linear/list_labels.ts +++ b/apps/sim/tools/linear/list_labels.ts @@ -80,7 +80,7 @@ export const linearListLabelsTool: ToolConfig 0 ? filter : undefined, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, } }, diff --git a/apps/sim/tools/linear/list_notifications.ts b/apps/sim/tools/linear/list_notifications.ts index fb8e542be21..7509334c4fa 100644 --- a/apps/sim/tools/linear/list_notifications.ts +++ b/apps/sim/tools/linear/list_notifications.ts @@ -70,7 +70,7 @@ export const linearListNotificationsTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_project_labels.ts b/apps/sim/tools/linear/list_project_labels.ts index 8efcd5fe799..5156572b817 100644 --- a/apps/sim/tools/linear/list_project_labels.ts +++ b/apps/sim/tools/linear/list_project_labels.ts @@ -82,7 +82,7 @@ export const linearListProjectLabelsTool: ToolConfig< variables: { id: params.projectId.trim(), first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, } } @@ -110,7 +110,7 @@ export const linearListProjectLabelsTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, } }, diff --git a/apps/sim/tools/linear/list_project_milestones.ts b/apps/sim/tools/linear/list_project_milestones.ts index afcde75c23f..59e9bd2cb40 100644 --- a/apps/sim/tools/linear/list_project_milestones.ts +++ b/apps/sim/tools/linear/list_project_milestones.ts @@ -82,7 +82,7 @@ export const linearListProjectMilestonesTool: ToolConfig< variables: { id: params.projectId, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_project_statuses.ts b/apps/sim/tools/linear/list_project_statuses.ts index c0266c1a598..b0b1efb1240 100644 --- a/apps/sim/tools/linear/list_project_statuses.ts +++ b/apps/sim/tools/linear/list_project_statuses.ts @@ -71,7 +71,7 @@ export const linearListProjectStatusesTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_project_updates.ts b/apps/sim/tools/linear/list_project_updates.ts index 0148c914683..2b76d92ab94 100644 --- a/apps/sim/tools/linear/list_project_updates.ts +++ b/apps/sim/tools/linear/list_project_updates.ts @@ -77,7 +77,7 @@ export const linearListProjectUpdatesTool: ToolConfig< variables: { projectId: params.projectId, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, }), }, diff --git a/apps/sim/tools/linear/list_projects.ts b/apps/sim/tools/linear/list_projects.ts index a9700f324f8..2af440f53a3 100644 --- a/apps/sim/tools/linear/list_projects.ts +++ b/apps/sim/tools/linear/list_projects.ts @@ -94,7 +94,7 @@ export const linearListProjectsTool: ToolConfig< `, variables: { first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, includeArchived: params.includeArchived || false, }, } diff --git a/apps/sim/tools/linear/list_teams.ts b/apps/sim/tools/linear/list_teams.ts index 6917a167f83..268586ae7db 100644 --- a/apps/sim/tools/linear/list_teams.ts +++ b/apps/sim/tools/linear/list_teams.ts @@ -59,7 +59,7 @@ export const linearListTeamsTool: ToolConfig 0 ? filter : undefined, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, }, } }, diff --git a/apps/sim/tools/linear/read_issues.ts b/apps/sim/tools/linear/read_issues.ts index bcbb4972dbb..3dd443d12b7 100644 --- a/apps/sim/tools/linear/read_issues.ts +++ b/apps/sim/tools/linear/read_issues.ts @@ -135,8 +135,8 @@ export const linearReadIssuesTool: ToolConfig 0 ? filter : undefined, first: params.first ? Number(params.first) : 50, - after: params.after, + after: params.after?.trim() || undefined, includeArchived: params.includeArchived || false, }, } From 0d8f4cfac09226b95799eb0ba9428fbb4a481266 Mon Sep 17 00:00:00 2001 From: Waleed Date: Fri, 27 Mar 2026 16:58:07 -0700 Subject: [PATCH 04/65] fix(knowledge): reject non-alphanumeric file extensions from document names (#3816) * fix(knowledge): reject non-alphanumeric file extensions from document names * fix(knowledge): improve error message when extension is non-alphanumeric --- apps/sim/lib/knowledge/documents/parser-extension.ts | 5 ++--- apps/sim/lib/uploads/utils/validation.ts | 10 ++++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/apps/sim/lib/knowledge/documents/parser-extension.ts b/apps/sim/lib/knowledge/documents/parser-extension.ts index ab32a7da1ed..db8765f14a5 100644 --- a/apps/sim/lib/knowledge/documents/parser-extension.ts +++ b/apps/sim/lib/knowledge/documents/parser-extension.ts @@ -29,9 +29,8 @@ export function resolveParserExtension( mimeType?: string, fallback?: string ): string { - const filenameExtension = filename.includes('.') - ? filename.split('.').pop()?.toLowerCase() - : undefined + const raw = filename.includes('.') ? filename.split('.').pop()?.toLowerCase() : undefined + const filenameExtension = raw && /^[a-z0-9]+$/.test(raw) ? raw : undefined if (filenameExtension && isSupportedParserExtension(filenameExtension)) { return filenameExtension diff --git a/apps/sim/lib/uploads/utils/validation.ts b/apps/sim/lib/uploads/utils/validation.ts index f9bc0221801..94456e726ec 100644 --- a/apps/sim/lib/uploads/utils/validation.ts +++ b/apps/sim/lib/uploads/utils/validation.ts @@ -137,12 +137,13 @@ export interface FileValidationError { * Validate if a file type is supported for document processing */ export function validateFileType(fileName: string, mimeType: string): FileValidationError | null { - const extension = path.extname(fileName).toLowerCase().substring(1) as SupportedDocumentExtension + const raw = path.extname(fileName).toLowerCase().substring(1) + const extension = (/^[a-z0-9]+$/.test(raw) ? raw : '') as SupportedDocumentExtension if (!SUPPORTED_DOCUMENT_EXTENSIONS.includes(extension)) { return { code: 'UNSUPPORTED_FILE_TYPE', - message: `Unsupported file type: ${extension}. Supported types are: ${SUPPORTED_DOCUMENT_EXTENSIONS.join(', ')}`, + message: `Unsupported file type${extension ? `: ${extension}` : ` for "${fileName}"`}. Supported types are: ${SUPPORTED_DOCUMENT_EXTENSIONS.join(', ')}`, supportedTypes: [...SUPPORTED_DOCUMENT_EXTENSIONS], } } @@ -221,7 +222,8 @@ export function validateMediaFileType( fileName: string, mimeType: string ): FileValidationError | null { - const extension = path.extname(fileName).toLowerCase().substring(1) + const raw = path.extname(fileName).toLowerCase().substring(1) + const extension = /^[a-z0-9]+$/.test(raw) ? raw : '' const isAudio = SUPPORTED_AUDIO_EXTENSIONS.includes(extension as SupportedAudioExtension) const isVideo = SUPPORTED_VIDEO_EXTENSIONS.includes(extension as SupportedVideoExtension) @@ -229,7 +231,7 @@ export function validateMediaFileType( if (!isAudio && !isVideo) { return { code: 'UNSUPPORTED_FILE_TYPE', - message: `Unsupported media file type: ${extension}. Supported audio types: ${SUPPORTED_AUDIO_EXTENSIONS.join(', ')}. Supported video types: ${SUPPORTED_VIDEO_EXTENSIONS.join(', ')}`, + message: `Unsupported media file type${extension ? `: ${extension}` : ` for "${fileName}"`}. Supported audio types: ${SUPPORTED_AUDIO_EXTENSIONS.join(', ')}. Supported video types: ${SUPPORTED_VIDEO_EXTENSIONS.join(', ')}`, supportedTypes: [...SUPPORTED_AUDIO_EXTENSIONS, ...SUPPORTED_VIDEO_EXTENSIONS], } } From a74789ee3eee2ab2e37f02c54c73a7ce2e6aedee Mon Sep 17 00:00:00 2001 From: Waleed Date: Fri, 27 Mar 2026 18:10:47 -0700 Subject: [PATCH 05/65] fix(security): SSRF, access control, and info disclosure (#3815) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(security): scope copilot feedback GET endpoint to authenticated user Add WHERE clause to filter feedback records by the authenticated user's ID, preventing any authenticated user from reading all users' copilot interactions, queries, and workflow YAML (IDOR / CWE-639). Co-Authored-By: Claude Opus 4.6 * fix(smtp): add SSRF validation and genericize network error messages Prevent SSRF via user-controlled smtpHost by validating with validateDatabaseHost before creating the nodemailer transporter. Collapse distinct network error messages (ECONNREFUSED, ECONNRESET, ETIMEDOUT) into a single generic message to prevent port-state leakage. Co-Authored-By: Claude Opus 4.6 * fix(security): add SSRF validation to SFTP/SSH and access control to workspace invitations Add `validateDatabaseHost` checks to SFTP and SSH connection utilities to block connections to private/reserved IPs and localhost, matching the existing pattern used by all database tools. Add authorization check to the workspace invitation GET endpoint so only the invitee or a workspace admin can view invitation details. Co-Authored-By: Claude Opus 4.6 * fix(smtp): restore SMTP response code handling for post-connection errors SMTP 4xx/5xx response codes are application-level errors (invalid recipient, mailbox full, server error) unrelated to the SSRF hardening goal. Restore response code differentiation and logging to preserve actionable user-facing error messages. Co-Authored-By: Claude Opus 4.6 * fix(security): use session email directly instead of extra DB query Addresses PR review feedback — align with the workspace invitation route pattern by using session.user.email instead of re-fetching from the database. Co-Authored-By: Claude Opus 4.6 * lint * fix(auth): revert lint autofix that broke hasExternalApiCredentials return type Biome auto-fixed `return auth !== null && auth.startsWith(...)` to `return auth?.startsWith(...)` which returns `boolean | undefined`, not `boolean`, causing a TypeScript build failure. * fix(smtp): pin resolved IP to prevent DNS rebinding (TOCTOU) Use the pre-resolved IP from validateDatabaseHost instead of the original hostname when creating the nodemailer transporter. Set servername to the original hostname to preserve TLS SNI validation. Co-Authored-By: Claude Opus 4.6 * refactor(security): extract createPinnedLookup helper for DNS rebinding prevention Extract reusable createPinnedLookup from secureFetchWithPinnedIP so non-HTTP transports (SSH, SFTP, IMAP) can pin resolved IPs at the socket level. SMTP route uses host+servername pinning instead since nodemailer doesn't reliably pass lookup to both secure/plaintext paths. Co-Authored-By: Claude Opus 4.6 * fix(security): pin IMAP connections to validated resolved IP Pass the resolved IP from validateDatabaseHost to ImapFlow as host, with the original hostname as servername for TLS SNI verification. Closes the DNS TOCTOU rebinding window. Co-Authored-By: Claude Opus 4.6 * lint * fix(auth): revert lint autofix on hasExternalApiCredentials return type Also pin SFTP/SSH connections to validated resolved IP to prevent DNS rebinding. * fix(security): short-circuit admin check when caller is invitee Skip the hasWorkspaceAdminAccess DB query when the caller is already the invitee, avoiding an unnecessary round-trip. Aligns with the org invitation route pattern. Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- .../app/api/copilot/feedback/route.test.ts | 35 ++++++------ apps/sim/app/api/copilot/feedback/route.ts | 6 +- .../api/copilot/training/examples/route.ts | 9 +++ apps/sim/app/api/copilot/training/route.ts | 9 +++ .../[id]/invitations/[invitationId]/route.ts | 15 +++++ .../sim/app/api/tools/imap/mailboxes/route.ts | 25 ++++----- apps/sim/app/api/tools/sftp/utils.ts | 24 +++++--- apps/sim/app/api/tools/smtp/send/route.ts | 37 +++++++----- apps/sim/app/api/tools/ssh/utils.ts | 24 +++++--- apps/sim/app/api/workflows/middleware.ts | 6 -- .../invitations/[invitationId]/route.test.ts | 56 ++++++++++++++++++- .../invitations/[invitationId]/route.ts | 9 +++ apps/sim/blocks/blocks/imap.ts | 1 - .../core/security/input-validation.server.ts | 29 ++++++---- apps/sim/lib/webhooks/imap-polling-service.ts | 32 ++++++++--- apps/sim/lib/webhooks/utils.server.ts | 1 - apps/sim/triggers/imap/poller.ts | 13 +---- bun.lock | 1 + 18 files changed, 228 insertions(+), 104 deletions(-) diff --git a/apps/sim/app/api/copilot/feedback/route.test.ts b/apps/sim/app/api/copilot/feedback/route.test.ts index de2a4d87576..f74aecf77a7 100644 --- a/apps/sim/app/api/copilot/feedback/route.test.ts +++ b/apps/sim/app/api/copilot/feedback/route.test.ts @@ -12,6 +12,7 @@ const { mockReturning, mockSelect, mockFrom, + mockWhere, mockAuthenticate, mockCreateUnauthorizedResponse, mockCreateBadRequestResponse, @@ -23,6 +24,7 @@ const { mockReturning: vi.fn(), mockSelect: vi.fn(), mockFrom: vi.fn(), + mockWhere: vi.fn(), mockAuthenticate: vi.fn(), mockCreateUnauthorizedResponse: vi.fn(), mockCreateBadRequestResponse: vi.fn(), @@ -81,7 +83,8 @@ describe('Copilot Feedback API Route', () => { mockValues.mockReturnValue({ returning: mockReturning }) mockReturning.mockResolvedValue([]) mockSelect.mockReturnValue({ from: mockFrom }) - mockFrom.mockResolvedValue([]) + mockFrom.mockReturnValue({ where: mockWhere }) + mockWhere.mockResolvedValue([]) mockCreateRequestTracker.mockReturnValue({ requestId: 'test-request-id', @@ -386,7 +389,7 @@ edges: isAuthenticated: true, }) - mockFrom.mockResolvedValueOnce([]) + mockWhere.mockResolvedValueOnce([]) const request = new Request('http://localhost:3000/api/copilot/feedback') const response = await GET(request as any) @@ -397,7 +400,7 @@ edges: expect(responseData.feedback).toEqual([]) }) - it('should return all feedback records', async () => { + it('should only return feedback records for the authenticated user', async () => { mockAuthenticate.mockResolvedValueOnce({ userId: 'user-123', isAuthenticated: true, @@ -415,19 +418,8 @@ edges: workflowYaml: null, createdAt: new Date('2024-01-01'), }, - { - feedbackId: 'feedback-2', - userId: 'user-456', - chatId: 'chat-2', - userQuery: 'Query 2', - agentResponse: 'Response 2', - isPositive: false, - feedback: 'Not helpful', - workflowYaml: 'yaml: content', - createdAt: new Date('2024-01-02'), - }, ] - mockFrom.mockResolvedValueOnce(mockFeedback) + mockWhere.mockResolvedValueOnce(mockFeedback) const request = new Request('http://localhost:3000/api/copilot/feedback') const response = await GET(request as any) @@ -435,9 +427,14 @@ edges: expect(response.status).toBe(200) const responseData = await response.json() expect(responseData.success).toBe(true) - expect(responseData.feedback).toHaveLength(2) + expect(responseData.feedback).toHaveLength(1) expect(responseData.feedback[0].feedbackId).toBe('feedback-1') - expect(responseData.feedback[1].feedbackId).toBe('feedback-2') + expect(responseData.feedback[0].userId).toBe('user-123') + + // Verify the where clause was called with the authenticated user's ID + const { eq } = await import('drizzle-orm') + expect(mockWhere).toHaveBeenCalled() + expect(eq).toHaveBeenCalledWith('userId', 'user-123') }) it('should handle database errors gracefully', async () => { @@ -446,7 +443,7 @@ edges: isAuthenticated: true, }) - mockFrom.mockRejectedValueOnce(new Error('Database connection failed')) + mockWhere.mockRejectedValueOnce(new Error('Database connection failed')) const request = new Request('http://localhost:3000/api/copilot/feedback') const response = await GET(request as any) @@ -462,7 +459,7 @@ edges: isAuthenticated: true, }) - mockFrom.mockResolvedValueOnce([]) + mockWhere.mockResolvedValueOnce([]) const request = new Request('http://localhost:3000/api/copilot/feedback') const response = await GET(request as any) diff --git a/apps/sim/app/api/copilot/feedback/route.ts b/apps/sim/app/api/copilot/feedback/route.ts index 3ff0956122c..4786d1d7d86 100644 --- a/apps/sim/app/api/copilot/feedback/route.ts +++ b/apps/sim/app/api/copilot/feedback/route.ts @@ -1,6 +1,7 @@ import { db } from '@sim/db' import { copilotFeedback } from '@sim/db/schema' import { createLogger } from '@sim/logger' +import { eq } from 'drizzle-orm' import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { @@ -109,7 +110,7 @@ export async function POST(req: NextRequest) { /** * GET /api/copilot/feedback - * Get all feedback records (for analytics) + * Get feedback records for the authenticated user */ export async function GET(req: NextRequest) { const tracker = createRequestTracker() @@ -123,7 +124,7 @@ export async function GET(req: NextRequest) { return createUnauthorizedResponse() } - // Get all feedback records + // Get feedback records for the authenticated user only const feedbackRecords = await db .select({ feedbackId: copilotFeedback.feedbackId, @@ -137,6 +138,7 @@ export async function GET(req: NextRequest) { createdAt: copilotFeedback.createdAt, }) .from(copilotFeedback) + .where(eq(copilotFeedback.userId, authenticatedUserId)) logger.info(`[${tracker.requestId}] Retrieved ${feedbackRecords.length} feedback records`) diff --git a/apps/sim/app/api/copilot/training/examples/route.ts b/apps/sim/app/api/copilot/training/examples/route.ts index 1d23793cd79..934ce256875 100644 --- a/apps/sim/app/api/copilot/training/examples/route.ts +++ b/apps/sim/app/api/copilot/training/examples/route.ts @@ -1,6 +1,10 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' +import { + authenticateCopilotRequestSessionOnly, + createUnauthorizedResponse, +} from '@/lib/copilot/request-helpers' import { env } from '@/lib/core/config/env' const logger = createLogger('CopilotTrainingExamplesAPI') @@ -16,6 +20,11 @@ const TrainingExampleSchema = z.object({ }) export async function POST(request: NextRequest) { + const { userId, isAuthenticated } = await authenticateCopilotRequestSessionOnly() + if (!isAuthenticated || !userId) { + return createUnauthorizedResponse() + } + const baseUrl = env.AGENT_INDEXER_URL if (!baseUrl) { logger.error('Missing AGENT_INDEXER_URL environment variable') diff --git a/apps/sim/app/api/copilot/training/route.ts b/apps/sim/app/api/copilot/training/route.ts index 4ff955eee0f..e6e58f59bb0 100644 --- a/apps/sim/app/api/copilot/training/route.ts +++ b/apps/sim/app/api/copilot/training/route.ts @@ -1,6 +1,10 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' +import { + authenticateCopilotRequestSessionOnly, + createUnauthorizedResponse, +} from '@/lib/copilot/request-helpers' import { env } from '@/lib/core/config/env' const logger = createLogger('CopilotTrainingAPI') @@ -22,6 +26,11 @@ const TrainingDataSchema = z.object({ }) export async function POST(request: NextRequest) { + const { userId, isAuthenticated } = await authenticateCopilotRequestSessionOnly() + if (!isAuthenticated || !userId) { + return createUnauthorizedResponse() + } + try { const baseUrl = env.AGENT_INDEXER_URL if (!baseUrl) { diff --git a/apps/sim/app/api/organizations/[id]/invitations/[invitationId]/route.ts b/apps/sim/app/api/organizations/[id]/invitations/[invitationId]/route.ts index 044f239d825..e929765380e 100644 --- a/apps/sim/app/api/organizations/[id]/invitations/[invitationId]/route.ts +++ b/apps/sim/app/api/organizations/[id]/invitations/[invitationId]/route.ts @@ -61,6 +61,21 @@ export async function GET( return NextResponse.json({ error: 'Invitation not found' }, { status: 404 }) } + // Verify caller is either an org member or the invitee + const isInvitee = session.user.email?.toLowerCase() === orgInvitation.email.toLowerCase() + + if (!isInvitee) { + const memberEntry = await db + .select() + .from(member) + .where(and(eq(member.organizationId, organizationId), eq(member.userId, session.user.id))) + .limit(1) + + if (memberEntry.length === 0) { + return NextResponse.json({ error: 'Forbidden' }, { status: 403 }) + } + } + const org = await db .select() .from(organization) diff --git a/apps/sim/app/api/tools/imap/mailboxes/route.ts b/apps/sim/app/api/tools/imap/mailboxes/route.ts index 49543a662cc..e2f3056aa29 100644 --- a/apps/sim/app/api/tools/imap/mailboxes/route.ts +++ b/apps/sim/app/api/tools/imap/mailboxes/route.ts @@ -2,6 +2,7 @@ import { createLogger } from '@sim/logger' import { ImapFlow } from 'imapflow' import { type NextRequest, NextResponse } from 'next/server' import { getSession } from '@/lib/auth' +import { validateDatabaseHost } from '@/lib/core/security/input-validation.server' const logger = createLogger('ImapMailboxesAPI') @@ -9,7 +10,6 @@ interface ImapMailboxRequest { host: string port: number secure: boolean - rejectUnauthorized: boolean username: string password: string } @@ -22,7 +22,7 @@ export async function POST(request: NextRequest) { try { const body = (await request.json()) as ImapMailboxRequest - const { host, port, secure, rejectUnauthorized, username, password } = body + const { host, port, secure, username, password } = body if (!host || !username || !password) { return NextResponse.json( @@ -31,8 +31,14 @@ export async function POST(request: NextRequest) { ) } + const hostValidation = await validateDatabaseHost(host, 'host') + if (!hostValidation.isValid) { + return NextResponse.json({ success: false, message: hostValidation.error }, { status: 400 }) + } + const client = new ImapFlow({ - host, + host: hostValidation.resolvedIP!, + servername: host, port: port || 993, secure: secure ?? true, auth: { @@ -40,7 +46,7 @@ export async function POST(request: NextRequest) { pass: password, }, tls: { - rejectUnauthorized: rejectUnauthorized ?? true, + rejectUnauthorized: true, }, logger: false, }) @@ -79,21 +85,12 @@ export async function POST(request: NextRequest) { const errorMessage = error instanceof Error ? error.message : 'Unknown error' logger.error('Error fetching IMAP mailboxes:', errorMessage) - let userMessage = 'Failed to connect to IMAP server' + let userMessage = 'Failed to connect to IMAP server. Please check your connection settings.' if ( errorMessage.includes('AUTHENTICATIONFAILED') || errorMessage.includes('Invalid credentials') ) { userMessage = 'Invalid username or password. For Gmail, use an App Password.' - } else if (errorMessage.includes('ENOTFOUND') || errorMessage.includes('getaddrinfo')) { - userMessage = 'Could not find IMAP server. Please check the hostname.' - } else if (errorMessage.includes('ECONNREFUSED')) { - userMessage = 'Connection refused. Please check the port and SSL settings.' - } else if (errorMessage.includes('certificate') || errorMessage.includes('SSL')) { - userMessage = - 'TLS/SSL error. Try disabling "Verify TLS Certificate" for self-signed certificates.' - } else if (errorMessage.includes('timeout')) { - userMessage = 'Connection timed out. Please check your network and server settings.' } return NextResponse.json({ success: false, message: userMessage }, { status: 500 }) diff --git a/apps/sim/app/api/tools/sftp/utils.ts b/apps/sim/app/api/tools/sftp/utils.ts index d3518ae81df..72468a21b10 100644 --- a/apps/sim/app/api/tools/sftp/utils.ts +++ b/apps/sim/app/api/tools/sftp/utils.ts @@ -1,4 +1,5 @@ import { type Attributes, Client, type ConnectConfig, type SFTPWrapper } from 'ssh2' +import { validateDatabaseHost } from '@/lib/core/security/input-validation.server' const S_IFMT = 0o170000 const S_IFDIR = 0o040000 @@ -91,16 +92,23 @@ function formatSftpError(err: Error, config: { host: string; port: number }): Er * Creates an SSH connection for SFTP using the provided configuration. * Uses ssh2 library defaults which align with OpenSSH standards. */ -export function createSftpConnection(config: SftpConnectionConfig): Promise { +export async function createSftpConnection(config: SftpConnectionConfig): Promise { + const host = config.host + + if (!host || host.trim() === '') { + throw new Error('Host is required. Please provide a valid hostname or IP address.') + } + + const hostValidation = await validateDatabaseHost(host, 'host') + if (!hostValidation.isValid) { + throw new Error(hostValidation.error) + } + + const resolvedHost = hostValidation.resolvedIP ?? host.trim() + return new Promise((resolve, reject) => { const client = new Client() const port = config.port || 22 - const host = config.host - - if (!host || host.trim() === '') { - reject(new Error('Host is required. Please provide a valid hostname or IP address.')) - return - } const hasPassword = config.password && config.password.trim() !== '' const hasPrivateKey = config.privateKey && config.privateKey.trim() !== '' @@ -111,7 +119,7 @@ export function createSftpConnection(config: SftpConnectionConfig): Promise { return typeof err === 'object' && err !== null && 'responseCode' in err } diff --git a/apps/sim/app/api/tools/ssh/utils.ts b/apps/sim/app/api/tools/ssh/utils.ts index 126849ba901..9561924c718 100644 --- a/apps/sim/app/api/tools/ssh/utils.ts +++ b/apps/sim/app/api/tools/ssh/utils.ts @@ -1,5 +1,6 @@ import { createLogger } from '@sim/logger' import { type Attributes, Client, type ConnectConfig } from 'ssh2' +import { validateDatabaseHost } from '@/lib/core/security/input-validation.server' const logger = createLogger('SSHUtils') @@ -108,16 +109,23 @@ function formatSSHError(err: Error, config: { host: string; port: number }): Err * - keepaliveInterval: 0 (disabled, same as OpenSSH ServerAliveInterval) * - keepaliveCountMax: 3 (same as OpenSSH ServerAliveCountMax) */ -export function createSSHConnection(config: SSHConnectionConfig): Promise { +export async function createSSHConnection(config: SSHConnectionConfig): Promise { + const host = config.host + + if (!host || host.trim() === '') { + throw new Error('Host is required. Please provide a valid hostname or IP address.') + } + + const hostValidation = await validateDatabaseHost(host, 'host') + if (!hostValidation.isValid) { + throw new Error(hostValidation.error) + } + + const resolvedHost = hostValidation.resolvedIP ?? host.trim() + return new Promise((resolve, reject) => { const client = new Client() const port = config.port || 22 - const host = config.host - - if (!host || host.trim() === '') { - reject(new Error('Host is required. Please provide a valid hostname or IP address.')) - return - } const hasPassword = config.password && config.password.trim() !== '' const hasPrivateKey = config.privateKey && config.privateKey.trim() !== '' @@ -128,7 +136,7 @@ export function createSSHConnection(config: SSHConnectionConfig): Promise ({ getBaseUrl: vi.fn().mockReturnValue('https://test.sim.ai'), })) +vi.mock('@/components/emails', () => ({ + WorkspaceInvitationEmail: vi.fn().mockReturnValue(null), +})) + +vi.mock('@/lib/messaging/email/mailer', () => ({ + sendEmail: vi.fn().mockResolvedValue({ success: true }), +})) + +vi.mock('@/lib/messaging/email/utils', () => ({ + getFromEmailAddress: vi.fn().mockReturnValue('noreply@test.com'), +})) + +vi.mock('@react-email/render', () => ({ + render: vi.fn().mockResolvedValue(''), +})) + vi.mock('@sim/db', () => ({ db: { select: () => mockDbSelect(), @@ -171,9 +187,31 @@ describe('Workspace Invitation [invitationId] API Route', () => { }) describe('GET /api/workspaces/invitations/[invitationId]', () => { - it('should return invitation details when called without token', async () => { + it('should return invitation details when caller is the invitee', async () => { + const session = createSession({ userId: mockUser.id, email: 'invited@example.com' }) + mockGetSession.mockResolvedValue(session) + mockHasWorkspaceAdminAccess.mockResolvedValue(false) + dbSelectResults = [[mockInvitation], [mockWorkspace]] + + const request = new NextRequest('http://localhost/api/workspaces/invitations/invitation-789') + const params = Promise.resolve({ invitationId: 'invitation-789' }) + + const response = await GET(request, { params }) + const data = await response.json() + + expect(response.status).toBe(200) + expect(data).toMatchObject({ + id: 'invitation-789', + email: 'invited@example.com', + status: 'pending', + workspaceName: 'Test Workspace', + }) + }) + + it('should return invitation details when caller is a workspace admin', async () => { const session = createSession({ userId: mockUser.id, email: mockUser.email }) mockGetSession.mockResolvedValue(session) + mockHasWorkspaceAdminAccess.mockResolvedValue(true) dbSelectResults = [[mockInvitation], [mockWorkspace]] const request = new NextRequest('http://localhost/api/workspaces/invitations/invitation-789') @@ -191,6 +229,22 @@ describe('Workspace Invitation [invitationId] API Route', () => { }) }) + it('should return 403 when caller is neither invitee nor workspace admin', async () => { + const session = createSession({ userId: mockUser.id, email: 'unrelated@example.com' }) + mockGetSession.mockResolvedValue(session) + mockHasWorkspaceAdminAccess.mockResolvedValue(false) + dbSelectResults = [[mockInvitation], [mockWorkspace]] + + const request = new NextRequest('http://localhost/api/workspaces/invitations/invitation-789') + const params = Promise.resolve({ invitationId: 'invitation-789' }) + + const response = await GET(request, { params }) + const data = await response.json() + + expect(response.status).toBe(403) + expect(data).toEqual({ error: 'Insufficient permissions' }) + }) + it('should redirect to login when unauthenticated with token', async () => { mockGetSession.mockResolvedValue(null) diff --git a/apps/sim/app/api/workspaces/invitations/[invitationId]/route.ts b/apps/sim/app/api/workspaces/invitations/[invitationId]/route.ts index 723b2954de0..df71f666986 100644 --- a/apps/sim/app/api/workspaces/invitations/[invitationId]/route.ts +++ b/apps/sim/app/api/workspaces/invitations/[invitationId]/route.ts @@ -198,6 +198,15 @@ export async function GET( ) } + const isInvitee = session.user.email?.toLowerCase() === invitation.email.toLowerCase() + + if (!isInvitee) { + const hasAdminAccess = await hasWorkspaceAdminAccess(session.user.id, invitation.workspaceId) + if (!hasAdminAccess) { + return NextResponse.json({ error: 'Insufficient permissions' }, { status: 403 }) + } + } + return NextResponse.json({ ...invitation, workspaceName: workspaceDetails.name, diff --git a/apps/sim/blocks/blocks/imap.ts b/apps/sim/blocks/blocks/imap.ts index 8217e372de6..20e4faabdb0 100644 --- a/apps/sim/blocks/blocks/imap.ts +++ b/apps/sim/blocks/blocks/imap.ts @@ -28,7 +28,6 @@ export const ImapBlock: BlockConfig = { host: { type: 'string', description: 'IMAP server hostname' }, port: { type: 'string', description: 'IMAP server port' }, secure: { type: 'boolean', description: 'Use SSL/TLS encryption' }, - rejectUnauthorized: { type: 'boolean', description: 'Verify TLS certificate' }, username: { type: 'string', description: 'Email username' }, password: { type: 'string', description: 'Email password' }, mailbox: { type: 'string', description: 'Mailbox to monitor' }, diff --git a/apps/sim/lib/core/security/input-validation.server.ts b/apps/sim/lib/core/security/input-validation.server.ts index 78c93d6d13d..7ed391a3abb 100644 --- a/apps/sim/lib/core/security/input-validation.server.ts +++ b/apps/sim/lib/core/security/input-validation.server.ts @@ -243,6 +243,24 @@ function resolveRedirectUrl(baseUrl: string, location: string): string { } } +/** + * Creates a DNS lookup function that always returns a pre-resolved IP address. + * Use this to prevent DNS rebinding (TOCTOU) attacks when connecting to + * user-controlled hostnames via non-HTTP protocols (SMTP, SSH, IMAP, etc.). + */ +export function createPinnedLookup(resolvedIP: string): LookupFunction { + const isIPv6 = resolvedIP.includes(':') + const family = isIPv6 ? 6 : 4 + + return (_hostname, options, callback) => { + if (options.all) { + callback(null, [{ address: resolvedIP, family }]) + } else { + callback(null, resolvedIP, family) + } + } +} + /** * Performs a fetch with IP pinning to prevent DNS rebinding attacks. * Uses the pre-resolved IP address while preserving the original hostname for TLS SNI. @@ -263,16 +281,7 @@ export async function secureFetchWithPinnedIP( const defaultPort = isHttps ? 443 : 80 const port = parsed.port ? Number.parseInt(parsed.port, 10) : defaultPort - const isIPv6 = resolvedIP.includes(':') - const family = isIPv6 ? 6 : 4 - - const lookup: LookupFunction = (_hostname, options, callback) => { - if (options.all) { - callback(null, [{ address: resolvedIP, family }]) - } else { - callback(null, resolvedIP, family) - } - } + const lookup = createPinnedLookup(resolvedIP) const agentOptions: http.AgentOptions = { lookup } diff --git a/apps/sim/lib/webhooks/imap-polling-service.ts b/apps/sim/lib/webhooks/imap-polling-service.ts index 28020426e3a..6f2f3ca09c0 100644 --- a/apps/sim/lib/webhooks/imap-polling-service.ts +++ b/apps/sim/lib/webhooks/imap-polling-service.ts @@ -7,6 +7,7 @@ import type { FetchMessageObject, MailboxLockObject } from 'imapflow' import { ImapFlow } from 'imapflow' import { nanoid } from 'nanoid' import { pollingIdempotency } from '@/lib/core/idempotency/service' +import { validateDatabaseHost } from '@/lib/core/security/input-validation.server' import { getInternalApiBaseUrl } from '@/lib/core/utils/urls' import { MAX_CONSECUTIVE_FAILURES } from '@/triggers/constants' @@ -18,7 +19,6 @@ interface ImapWebhookConfig { host: string port: number secure: boolean - rejectUnauthorized: boolean username: string password: string mailbox: string | string[] // Can be single mailbox or array of mailboxes @@ -172,7 +172,17 @@ export async function pollImapWebhooks() { return } - const fetchResult = await fetchNewEmails(config, requestId) + const hostValidation = await validateDatabaseHost(config.host, 'host') + if (!hostValidation.isValid) { + logger.error( + `[${requestId}] IMAP host validation failed for webhook ${webhookId}: ${hostValidation.error}` + ) + await markWebhookFailed(webhookId) + failureCount++ + return + } + + const fetchResult = await fetchNewEmails(config, requestId, hostValidation.resolvedIP!) const { emails, latestUidByMailbox } = fetchResult const pollTimestamp = new Date().toISOString() @@ -190,7 +200,8 @@ export async function pollImapWebhooks() { emails, webhookData, config, - requestId + requestId, + hostValidation.resolvedIP! ) await updateWebhookLastProcessedUids(webhookId, latestUidByMailbox, pollTimestamp) @@ -257,9 +268,10 @@ export async function pollImapWebhooks() { } } -async function fetchNewEmails(config: ImapWebhookConfig, requestId: string) { +async function fetchNewEmails(config: ImapWebhookConfig, requestId: string, resolvedIP: string) { const client = new ImapFlow({ - host: config.host, + host: resolvedIP, + servername: config.host, port: config.port || 993, secure: config.secure ?? true, auth: { @@ -267,7 +279,7 @@ async function fetchNewEmails(config: ImapWebhookConfig, requestId: string) { pass: config.password, }, tls: { - rejectUnauthorized: config.rejectUnauthorized ?? true, + rejectUnauthorized: true, }, logger: false, }) @@ -553,13 +565,15 @@ async function processEmails( }>, webhookData: WebhookRecord, config: ImapWebhookConfig, - requestId: string + requestId: string, + resolvedIP: string ) { let processedCount = 0 let failedCount = 0 const client = new ImapFlow({ - host: config.host, + host: resolvedIP, + servername: config.host, port: config.port || 993, secure: config.secure ?? true, auth: { @@ -567,7 +581,7 @@ async function processEmails( pass: config.password, }, tls: { - rejectUnauthorized: config.rejectUnauthorized ?? true, + rejectUnauthorized: true, }, logger: false, }) diff --git a/apps/sim/lib/webhooks/utils.server.ts b/apps/sim/lib/webhooks/utils.server.ts index 9f81f923c02..3eec55697f4 100644 --- a/apps/sim/lib/webhooks/utils.server.ts +++ b/apps/sim/lib/webhooks/utils.server.ts @@ -2822,7 +2822,6 @@ export async function configureImapPolling(webhookData: any, requestId: string): ...providerConfig, port: providerConfig.port || '993', secure: providerConfig.secure !== false, - rejectUnauthorized: providerConfig.rejectUnauthorized !== false, mailbox: providerConfig.mailbox || 'INBOX', searchCriteria: providerConfig.searchCriteria || 'UNSEEN', markAsRead: providerConfig.markAsRead || false, diff --git a/apps/sim/triggers/imap/poller.ts b/apps/sim/triggers/imap/poller.ts index b7a8063e1ab..e4279236f0d 100644 --- a/apps/sim/triggers/imap/poller.ts +++ b/apps/sim/triggers/imap/poller.ts @@ -44,15 +44,6 @@ export const imapPollingTrigger: TriggerConfig = { required: false, mode: 'trigger', }, - { - id: 'rejectUnauthorized', - title: 'Verify TLS Certificate', - type: 'switch', - defaultValue: true, - description: 'Verify server TLS certificate. Disable for self-signed certificates.', - required: false, - mode: 'trigger', - }, // Authentication { id: 'username', @@ -89,7 +80,6 @@ export const imapPollingTrigger: TriggerConfig = { const host = store.getValue(blockId, 'host') as string | null const port = store.getValue(blockId, 'port') as string | null const secure = store.getValue(blockId, 'secure') as boolean | null - const rejectUnauthorized = store.getValue(blockId, 'rejectUnauthorized') as boolean | null const username = store.getValue(blockId, 'username') as string | null const password = store.getValue(blockId, 'password') as string | null @@ -105,7 +95,6 @@ export const imapPollingTrigger: TriggerConfig = { host, port: port ? Number.parseInt(port, 10) : 993, secure: secure ?? true, - rejectUnauthorized: rejectUnauthorized ?? true, username, password, }), @@ -129,7 +118,7 @@ export const imapPollingTrigger: TriggerConfig = { throw error } }, - dependsOn: ['host', 'port', 'secure', 'rejectUnauthorized', 'username', 'password'], + dependsOn: ['host', 'port', 'secure', 'username', 'password'], mode: 'trigger', }, // Email filtering diff --git a/bun.lock b/bun.lock index de08d9dbc64..42c3776ac3c 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "simstudio", From d7840e0d3b5d0481d25b7d69d87c1c515dd82bbf Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 18:28:36 -0700 Subject: [PATCH 06/65] fix(worker): dockerfile + helm updates (#3818) * fix(worker): dockerfile + helm updates * address comments --- apps/sim/package.json | 3 +- docker-compose.local.yml | 32 +++++++ docker-compose.prod.yml | 4 +- docker/app.Dockerfile | 3 + helm/sim/templates/_helpers.tpl | 20 +++++ helm/sim/templates/deployment-worker.yaml | 101 ++++++++++++++++++++++ helm/sim/values.yaml | 50 +++++++++++ 7 files changed, 210 insertions(+), 3 deletions(-) create mode 100644 helm/sim/templates/deployment-worker.yaml diff --git a/apps/sim/package.json b/apps/sim/package.json index 03c91227186..d22daf42ca4 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -17,8 +17,9 @@ "load:workflow:baseline": "BASE_URL=${BASE_URL:-http://localhost:3000} WARMUP_DURATION=${WARMUP_DURATION:-10} WARMUP_RATE=${WARMUP_RATE:-2} PEAK_RATE=${PEAK_RATE:-8} HOLD_DURATION=${HOLD_DURATION:-20} bunx artillery run scripts/load/workflow-concurrency.yml", "load:workflow:waves": "BASE_URL=${BASE_URL:-http://localhost:3000} WAVE_ONE_DURATION=${WAVE_ONE_DURATION:-10} WAVE_ONE_RATE=${WAVE_ONE_RATE:-6} QUIET_DURATION=${QUIET_DURATION:-5} WAVE_TWO_DURATION=${WAVE_TWO_DURATION:-15} WAVE_TWO_RATE=${WAVE_TWO_RATE:-8} WAVE_THREE_DURATION=${WAVE_THREE_DURATION:-20} WAVE_THREE_RATE=${WAVE_THREE_RATE:-10} bunx artillery run scripts/load/workflow-waves.yml", "load:workflow:isolation": "BASE_URL=${BASE_URL:-http://localhost:3000} ISOLATION_DURATION=${ISOLATION_DURATION:-30} TOTAL_RATE=${TOTAL_RATE:-9} WORKSPACE_A_WEIGHT=${WORKSPACE_A_WEIGHT:-8} WORKSPACE_B_WEIGHT=${WORKSPACE_B_WEIGHT:-1} bunx artillery run scripts/load/workflow-isolation.yml", - "build": "bun run build:pptx-worker && next build", + "build": "bun run build:pptx-worker && bun run build:worker && next build", "build:pptx-worker": "bun build ./lib/execution/pptx-worker.cjs --target=node --format=cjs --outfile ./dist/pptx-worker.cjs", + "build:worker": "bun build ./worker/index.ts --target=node --format=cjs --packages=external --outfile ./dist/worker.cjs", "start": "next start", "worker": "NODE_ENV=production bun run worker/index.ts", "prepare": "cd ../.. && bun husky", diff --git a/docker-compose.local.yml b/docker-compose.local.yml index f47643ad00f..ceb8dc3883b 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -67,6 +67,38 @@ services: retries: 3 start_period: 10s + sim-worker: + build: + context: . + dockerfile: docker/app.Dockerfile + command: ['bun', 'apps/sim/dist/worker.cjs'] + restart: unless-stopped + profiles: + - worker + deploy: + resources: + limits: + memory: 4G + environment: + - NODE_ENV=development + - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} + - REDIS_URL=${REDIS_URL:-} + - ENCRYPTION_KEY=${ENCRYPTION_KEY:-dev-encryption-key-at-least-32-chars} + - API_ENCRYPTION_KEY=${API_ENCRYPTION_KEY:-} + - INTERNAL_API_SECRET=${INTERNAL_API_SECRET:-dev-internal-api-secret-min-32-chars} + - WORKER_PORT=3001 + depends_on: + db: + condition: service_healthy + migrations: + condition: service_completed_successfully + healthcheck: + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3001/health/live'] + interval: 90s + timeout: 5s + retries: 3 + start_period: 10s + migrations: build: context: . diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 4c03862e35b..da547506556 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -42,7 +42,7 @@ services: sim-worker: image: ghcr.io/simstudioai/simstudio:latest - command: ['bun', 'run', 'worker'] + command: ['bun', 'apps/sim/dist/worker.cjs'] restart: unless-stopped deploy: resources: @@ -71,7 +71,7 @@ services: migrations: condition: service_completed_successfully healthcheck: - test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health'] + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health/live'] interval: 90s timeout: 5s retries: 3 diff --git a/docker/app.Dockerfile b/docker/app.Dockerfile index 7e1552a1c45..4050e98a007 100644 --- a/docker/app.Dockerfile +++ b/docker/app.Dockerfile @@ -114,6 +114,9 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/isolated-v # Copy the bundled PPTX worker artifact COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/pptx-worker.cjs ./apps/sim/dist/pptx-worker.cjs +# Copy the bundled BullMQ worker artifact +COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/worker.cjs ./apps/sim/dist/worker.cjs + # Guardrails setup with pip caching COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/requirements.txt ./apps/sim/lib/guardrails/requirements.txt COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/validate_pii.py ./apps/sim/lib/guardrails/validate_pii.py diff --git a/helm/sim/templates/_helpers.tpl b/helm/sim/templates/_helpers.tpl index e1bee304913..3ba078c5e67 100644 --- a/helm/sim/templates/_helpers.tpl +++ b/helm/sim/templates/_helpers.tpl @@ -117,6 +117,22 @@ Ollama selector labels app.kubernetes.io/component: ollama {{- end }} +{{/* +Worker specific labels +*/}} +{{- define "sim.worker.labels" -}} +{{ include "sim.labels" . }} +app.kubernetes.io/component: worker +{{- end }} + +{{/* +Worker selector labels +*/}} +{{- define "sim.worker.selectorLabels" -}} +{{ include "sim.selectorLabels" . }} +app.kubernetes.io/component: worker +{{- end }} + {{/* Migrations specific labels */}} @@ -206,6 +222,10 @@ Skip validation when using existing secrets or External Secrets Operator {{- fail "realtime.env.BETTER_AUTH_SECRET must not use the default placeholder value. Generate a secure secret with: openssl rand -hex 32" }} {{- end }} {{- end }} +{{- /* Worker validation - REDIS_URL is required when worker is enabled */ -}} +{{- if and .Values.worker.enabled (not .Values.app.env.REDIS_URL) }} +{{- fail "app.env.REDIS_URL is required when worker.enabled=true" }} +{{- end }} {{- /* PostgreSQL password validation - skip if using existing secret or ESO */ -}} {{- if not (or $useExistingPostgresSecret $useExternalSecrets) }} {{- if and .Values.postgresql.enabled (not .Values.postgresql.auth.password) }} diff --git a/helm/sim/templates/deployment-worker.yaml b/helm/sim/templates/deployment-worker.yaml new file mode 100644 index 00000000000..701fdff1849 --- /dev/null +++ b/helm/sim/templates/deployment-worker.yaml @@ -0,0 +1,101 @@ +{{- if .Values.worker.enabled }} +{{- include "sim.validateSecrets" . }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "sim.fullname" . }}-worker + namespace: {{ .Release.Namespace }} + labels: + {{- include "sim.worker.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.worker.replicaCount }} + selector: + matchLabels: + {{- include "sim.worker.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "sim.worker.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "sim.serviceAccountName" . }} + {{- include "sim.podSecurityContext" .Values.worker | nindent 6 }} + {{- include "sim.nodeSelector" .Values.worker | nindent 6 }} + {{- include "sim.tolerations" .Values | nindent 6 }} + {{- include "sim.affinity" .Values | nindent 6 }} + containers: + - name: worker + image: {{ include "sim.image" (dict "context" . "image" .Values.worker.image) }} + imagePullPolicy: {{ .Values.worker.image.pullPolicy }} + command: ["bun", "apps/sim/dist/worker.cjs"] + ports: + - name: health + containerPort: {{ .Values.worker.healthPort }} + protocol: TCP + env: + - name: DATABASE_URL + value: {{ include "sim.databaseUrl" . | quote }} + {{- if .Values.app.env.REDIS_URL }} + - name: REDIS_URL + value: {{ .Values.app.env.REDIS_URL | quote }} + {{- end }} + - name: WORKER_PORT + value: {{ .Values.worker.healthPort | quote }} + {{- if .Values.telemetry.enabled }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://{{ include "sim.fullname" . }}-otel-collector:4318" + - name: OTEL_SERVICE_NAME + value: sim-worker + - name: OTEL_SERVICE_VERSION + value: {{ .Chart.AppVersion | quote }} + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.name=sim-worker,service.version={{ .Chart.AppVersion }},deployment.environment={{ .Values.worker.env.NODE_ENV }}" + {{- end }} + {{- range $key, $value := .Values.worker.env }} + {{- if ne $key "WORKER_PORT" }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- end }} + {{- with .Values.extraEnvVars }} + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + - secretRef: + name: {{ include "sim.appSecretName" . }} + {{- if .Values.postgresql.enabled }} + - secretRef: + name: {{ include "sim.postgresqlSecretName" . }} + {{- else if .Values.externalDatabase.enabled }} + - secretRef: + name: {{ include "sim.externalDbSecretName" . }} + {{- end }} + livenessProbe: + httpGet: + path: /health/live + port: health + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: health + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + {{- include "sim.resources" .Values.worker | nindent 10 }} + {{- include "sim.securityContext" .Values.worker | nindent 10 }} +{{- end }} diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index 8d75b73692e..4fd2828d8c0 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -358,6 +358,56 @@ realtime: extraVolumes: [] extraVolumeMounts: [] +# BullMQ worker configuration (processes background jobs when Redis is available) +# Uses the same image as the main app with a different command +worker: + # Enable/disable the worker deployment (requires REDIS_URL to be set in app.env) + enabled: false + + # Image configuration (defaults to same image as app) + image: + repository: simstudioai/simstudio + tag: latest + pullPolicy: Always + + # Number of replicas + replicaCount: 1 + + # Health check port (worker exposes a lightweight HTTP health server) + healthPort: 3001 + + # Resource limits and requests + resources: + limits: + memory: "4Gi" + cpu: "1000m" + requests: + memory: "2Gi" + cpu: "500m" + + # Node selector for pod scheduling + nodeSelector: {} + + # Pod security context + podSecurityContext: + fsGroup: 1001 + + # Container security context + securityContext: + runAsNonRoot: true + runAsUser: 1001 + + # Environment variables (worker-specific tuning) + env: + NODE_ENV: "production" + WORKER_CONCURRENCY_WORKFLOW: "50" + WORKER_CONCURRENCY_WEBHOOK: "30" + WORKER_CONCURRENCY_SCHEDULE: "20" + WORKER_CONCURRENCY_MOTHERSHIP_JOB: "10" + WORKER_CONCURRENCY_CONNECTOR_SYNC: "5" + WORKER_CONCURRENCY_DOCUMENT_PROCESSING: "20" + WORKER_CONCURRENCY_NOTIFICATION_DELIVERY: "10" + # Database migrations job configuration migrations: # Enable/disable migrations job From 879f0c9a4c5aadc357e2a2cb75e52cb9cef54a1c Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 18:50:57 -0700 Subject: [PATCH 07/65] update dockerfile (#3819) --- docker/app.Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/app.Dockerfile b/docker/app.Dockerfile index 4050e98a007..7dc0681f9bc 100644 --- a/docker/app.Dockerfile +++ b/docker/app.Dockerfile @@ -114,8 +114,9 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/isolated-v # Copy the bundled PPTX worker artifact COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/pptx-worker.cjs ./apps/sim/dist/pptx-worker.cjs -# Copy the bundled BullMQ worker artifact +# Copy the bundled BullMQ worker artifact and workspace packages it depends on COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/worker.cjs ./apps/sim/dist/worker.cjs +COPY --from=builder --chown=nextjs:nodejs /app/packages ./packages # Guardrails setup with pip caching COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/requirements.txt ./apps/sim/lib/guardrails/requirements.txt From 296fd89aa9cf197859d6deb05ea846a469e45ba4 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 19:11:02 -0700 Subject: [PATCH 08/65] fix dockerfile --- apps/sim/package.json | 3 +-- docker/app.Dockerfile | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/apps/sim/package.json b/apps/sim/package.json index d22daf42ca4..c8cc4530338 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -11,8 +11,7 @@ "dev": "next dev --port 3000", "dev:webpack": "next dev --webpack", "dev:sockets": "bun run socket/index.ts", - "dev:worker": "bun run worker/index.ts", - "dev:full": "bunx concurrently -n \"App,Realtime,Worker\" -c \"cyan,magenta,yellow\" \"bun run dev\" \"bun run dev:sockets\" \"bun run dev:worker\"", + "dev:full": "bunx concurrently -n \"App,Realtime,Worker\" -c \"cyan,magenta,yellow\" \"bun run dev\" \"bun run dev:sockets\" \"bun run worker\"", "load:workflow": "bun run load:workflow:baseline", "load:workflow:baseline": "BASE_URL=${BASE_URL:-http://localhost:3000} WARMUP_DURATION=${WARMUP_DURATION:-10} WARMUP_RATE=${WARMUP_RATE:-2} PEAK_RATE=${PEAK_RATE:-8} HOLD_DURATION=${HOLD_DURATION:-20} bunx artillery run scripts/load/workflow-concurrency.yml", "load:workflow:waves": "BASE_URL=${BASE_URL:-http://localhost:3000} WAVE_ONE_DURATION=${WAVE_ONE_DURATION:-10} WAVE_ONE_RATE=${WAVE_ONE_RATE:-6} QUIET_DURATION=${QUIET_DURATION:-5} WAVE_TWO_DURATION=${WAVE_TWO_DURATION:-15} WAVE_TWO_RATE=${WAVE_TWO_RATE:-8} WAVE_THREE_DURATION=${WAVE_THREE_DURATION:-20} WAVE_THREE_RATE=${WAVE_THREE_RATE:-10} bunx artillery run scripts/load/workflow-waves.yml", diff --git a/docker/app.Dockerfile b/docker/app.Dockerfile index 7dc0681f9bc..b5f7970b9d8 100644 --- a/docker/app.Dockerfile +++ b/docker/app.Dockerfile @@ -114,9 +114,12 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/isolated-v # Copy the bundled PPTX worker artifact COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/pptx-worker.cjs ./apps/sim/dist/pptx-worker.cjs -# Copy the bundled BullMQ worker artifact and workspace packages it depends on +# Copy the bundled BullMQ worker artifact and workspace packages it needs at runtime. +# The bundle uses --packages=external so all node_modules are resolved at runtime. +# npm packages come from the standalone node_modules; workspace packages need explicit copies. COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/worker.cjs ./apps/sim/dist/worker.cjs -COPY --from=builder --chown=nextjs:nodejs /app/packages ./packages +COPY --from=builder --chown=nextjs:nodejs /app/packages/logger ./node_modules/@sim/logger +COPY --from=builder --chown=nextjs:nodejs /app/packages/db ./node_modules/@sim/db # Guardrails setup with pip caching COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/requirements.txt ./apps/sim/lib/guardrails/requirements.txt From 1da4b154307634845d851dcf80bd50f3d4ccae0a Mon Sep 17 00:00:00 2001 From: Waleed Date: Fri, 27 Mar 2026 19:54:00 -0700 Subject: [PATCH 09/65] =?UTF-8?q?fix(security):=20pentest=20remediation=20?= =?UTF-8?q?=E2=80=94=20condition=20escaping,=20SSRF=20hardening,=20ReDoS?= =?UTF-8?q?=20protection=20(#3820)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(executor): escape newline characters in condition expression strings Unescaped newline/carriage-return characters in resolved string values cause unterminated string literals in generated JS, crashing condition evaluation with a SyntaxError. Co-Authored-By: Claude Opus 4.6 * fix(security): prevent ReDoS in guardrails regex validation Add safe-regex2 to reject catastrophic backtracking patterns before execution and cap input length at 10k characters. Co-Authored-By: Claude Opus 4.6 * fix(security): SSRF localhost hardening and regex DoS protection Block localhost/loopback URLs in hosted environments using isHosted flag instead of allowHttp. Add safe-regex2 validation and input length limits to regex guardrails to prevent catastrophic backtracking. Co-Authored-By: Claude Opus 4.6 * fix(security): validate regex syntax before safety check Move new RegExp() before safe() so invalid patterns get a proper syntax error instead of a misleading "catastrophic backtracking" message. Co-Authored-By: Claude Opus 4.6 * fix(security): address PR review feedback - Hoist isLocalhost && isHosted guard to single early-return before protocol checks, removing redundant duplicate block - Move regex syntax validation (new RegExp) before safe-regex2 check so invalid patterns get proper syntax error instead of misleading "catastrophic backtracking" message * fix(security): remove input length cap from regex validation The 10k character cap would block legitimate guardrail checks on long LLM outputs. Input length doesn't affect ReDoS risk — the safe-regex2 pattern check already prevents catastrophic backtracking. Co-Authored-By: Claude Opus 4.6 * fix(tests): mock isHosted in input-validation and function-execute tests Tests that assert self-hosted localhost behavior need isHosted=false, which is not guaranteed in CI where NEXT_PUBLIC_APP_URL is set to the hosted domain. Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- .../app/api/function/execute/route.test.ts | 8 +++++++ apps/sim/executor/variables/resolver.ts | 8 ++++++- .../core/security/input-validation.server.ts | 6 ++--- .../core/security/input-validation.test.ts | 15 +++++++----- .../sim/lib/core/security/input-validation.ts | 16 +++++++------ apps/sim/lib/guardrails/validate_regex.ts | 24 +++++++++++++------ apps/sim/package.json | 3 ++- bun.lock | 5 ++++ 8 files changed, 59 insertions(+), 26 deletions(-) diff --git a/apps/sim/app/api/function/execute/route.test.ts b/apps/sim/app/api/function/execute/route.test.ts index b57c8fdb77e..9c9666c44c9 100644 --- a/apps/sim/app/api/function/execute/route.test.ts +++ b/apps/sim/app/api/function/execute/route.test.ts @@ -26,6 +26,14 @@ vi.mock('@/lib/execution/e2b', () => ({ executeInE2B: mockExecuteInE2B, })) +vi.mock('@/lib/core/config/feature-flags', () => ({ + isHosted: false, + isE2bEnabled: false, + isProd: false, + isDev: false, + isTest: true, +})) + import { validateProxyUrl } from '@/lib/core/security/input-validation' import { POST } from '@/app/api/function/execute/route' diff --git a/apps/sim/executor/variables/resolver.ts b/apps/sim/executor/variables/resolver.ts index 7da902e13be..88b23d72340 100644 --- a/apps/sim/executor/variables/resolver.ts +++ b/apps/sim/executor/variables/resolver.ts @@ -236,7 +236,13 @@ export class VariableResolver { } if (typeof resolved === 'string') { - const escaped = resolved.replace(/\\/g, '\\\\').replace(/'/g, "\\'") + const escaped = resolved + .replace(/\\/g, '\\\\') + .replace(/'/g, "\\'") + .replace(/\n/g, '\\n') + .replace(/\r/g, '\\r') + .replace(/\u2028/g, '\\u2028') + .replace(/\u2029/g, '\\u2029') return `'${escaped}'` } if (typeof resolved === 'object' && resolved !== null) { diff --git a/apps/sim/lib/core/security/input-validation.server.ts b/apps/sim/lib/core/security/input-validation.server.ts index 7ed391a3abb..95458150b7d 100644 --- a/apps/sim/lib/core/security/input-validation.server.ts +++ b/apps/sim/lib/core/security/input-validation.server.ts @@ -4,6 +4,7 @@ import https from 'https' import type { LookupFunction } from 'net' import { createLogger } from '@sim/logger' import * as ipaddr from 'ipaddr.js' +import { isHosted } from '@/lib/core/config/feature-flags' import { type ValidationResult, validateExternalUrl } from '@/lib/core/security/input-validation' const logger = createLogger('InputValidation') @@ -89,10 +90,7 @@ export async function validateUrlWithDNS( return ip === '127.0.0.1' || ip === '::1' })() - if ( - isPrivateOrReservedIP(address) && - !(isLocalhost && resolvedIsLoopback && !options.allowHttp) - ) { + if (isPrivateOrReservedIP(address) && !(isLocalhost && resolvedIsLoopback && !isHosted)) { logger.warn('URL resolves to blocked IP address', { paramName, hostname, diff --git a/apps/sim/lib/core/security/input-validation.test.ts b/apps/sim/lib/core/security/input-validation.test.ts index 3098c7294fd..46d7c7c0903 100644 --- a/apps/sim/lib/core/security/input-validation.test.ts +++ b/apps/sim/lib/core/security/input-validation.test.ts @@ -23,6 +23,9 @@ import { validateUrlWithDNS } from '@/lib/core/security/input-validation.server' import { sanitizeForLogging } from '@/lib/core/security/redaction' vi.mock('@sim/logger', () => loggerMock) +vi.mock('@/lib/core/config/feature-flags', () => ({ + isHosted: false, +})) describe('validatePathSegment', () => { describe('valid inputs', () => { @@ -569,25 +572,25 @@ describe('validateUrlWithDNS', () => { expect(result.error).toContain('https://') }) - it('should accept https localhost URLs', async () => { + it('should accept https localhost URLs (self-hosted)', async () => { const result = await validateUrlWithDNS('https://localhost/api') expect(result.isValid).toBe(true) expect(result.resolvedIP).toBeDefined() }) - it('should accept http localhost URLs', async () => { + it('should accept http localhost URLs (self-hosted)', async () => { const result = await validateUrlWithDNS('http://localhost/api') expect(result.isValid).toBe(true) expect(result.resolvedIP).toBeDefined() }) - it('should accept IPv4 loopback URLs', async () => { + it('should accept IPv4 loopback URLs (self-hosted)', async () => { const result = await validateUrlWithDNS('http://127.0.0.1/api') expect(result.isValid).toBe(true) expect(result.resolvedIP).toBeDefined() }) - it('should accept IPv6 loopback URLs', async () => { + it('should accept IPv6 loopback URLs (self-hosted)', async () => { const result = await validateUrlWithDNS('http://[::1]/api') expect(result.isValid).toBe(true) expect(result.resolvedIP).toBeDefined() @@ -918,7 +921,7 @@ describe('validateExternalUrl', () => { }) }) - describe('localhost and loopback addresses', () => { + describe('localhost and loopback addresses (self-hosted)', () => { it.concurrent('should accept https localhost', () => { const result = validateExternalUrl('https://localhost/api') expect(result.isValid).toBe(true) @@ -1027,7 +1030,7 @@ describe('validateImageUrl', () => { expect(result.isValid).toBe(true) }) - it.concurrent('should accept localhost URLs', () => { + it.concurrent('should accept localhost URLs (self-hosted)', () => { const result = validateImageUrl('https://localhost/image.png') expect(result.isValid).toBe(true) }) diff --git a/apps/sim/lib/core/security/input-validation.ts b/apps/sim/lib/core/security/input-validation.ts index ce803fdef53..52c4dde288a 100644 --- a/apps/sim/lib/core/security/input-validation.ts +++ b/apps/sim/lib/core/security/input-validation.ts @@ -1,5 +1,6 @@ import { createLogger } from '@sim/logger' import * as ipaddr from 'ipaddr.js' +import { isHosted } from '@/lib/core/config/feature-flags' const logger = createLogger('InputValidation') @@ -710,6 +711,13 @@ export function validateExternalUrl( } } + if (isLocalhost && isHosted) { + return { + isValid: false, + error: `${paramName} cannot point to localhost`, + } + } + if (options.allowHttp) { if (protocol !== 'https:' && protocol !== 'http:') { return { @@ -717,13 +725,7 @@ export function validateExternalUrl( error: `${paramName} must use http:// or https:// protocol`, } } - if (isLocalhost) { - return { - isValid: false, - error: `${paramName} cannot point to localhost`, - } - } - } else if (protocol !== 'https:' && !(protocol === 'http:' && isLocalhost)) { + } else if (protocol !== 'https:' && !(protocol === 'http:' && isLocalhost && !isHosted)) { return { isValid: false, error: `${paramName} must use https:// protocol`, diff --git a/apps/sim/lib/guardrails/validate_regex.ts b/apps/sim/lib/guardrails/validate_regex.ts index 16bd78ebf2d..40cfb0a0405 100644 --- a/apps/sim/lib/guardrails/validate_regex.ts +++ b/apps/sim/lib/guardrails/validate_regex.ts @@ -1,3 +1,5 @@ +import safe from 'safe-regex2' + /** * Validate if input matches regex pattern */ @@ -7,15 +9,23 @@ export interface ValidationResult { } export function validateRegex(inputStr: string, pattern: string): ValidationResult { + let regex: RegExp try { - const regex = new RegExp(pattern) - const match = regex.test(inputStr) - - if (match) { - return { passed: true } - } - return { passed: false, error: 'Input does not match regex pattern' } + regex = new RegExp(pattern) } catch (error: any) { return { passed: false, error: `Invalid regex pattern: ${error.message}` } } + + if (!safe(pattern)) { + return { + passed: false, + error: 'Regex pattern rejected: potentially unsafe (catastrophic backtracking)', + } + } + + const match = regex.test(inputStr) + if (match) { + return { passed: true } + } + return { passed: false, error: 'Input does not match regex pattern' } } diff --git a/apps/sim/package.json b/apps/sim/package.json index c8cc4530338..055a455b6c7 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -116,8 +116,8 @@ "es-toolkit": "1.45.1", "ffmpeg-static": "5.3.0", "fluent-ffmpeg": "2.1.3", - "free-email-domains": "1.2.25", "framer-motion": "^12.5.0", + "free-email-domains": "1.2.25", "google-auth-library": "10.5.0", "gray-matter": "^4.0.3", "groq-sdk": "^0.15.0", @@ -174,6 +174,7 @@ "remark-gfm": "4.0.1", "resend": "^4.1.2", "rss-parser": "3.13.0", + "safe-regex2": "5.1.0", "sharp": "0.34.3", "soap": "1.8.0", "socket.io": "^4.8.1", diff --git a/bun.lock b/bun.lock index 42c3776ac3c..848126625b1 100644 --- a/bun.lock +++ b/bun.lock @@ -193,6 +193,7 @@ "remark-gfm": "4.0.1", "resend": "^4.1.2", "rss-parser": "3.13.0", + "safe-regex2": "5.1.0", "sharp": "0.34.3", "soap": "1.8.0", "socket.io": "^4.8.1", @@ -3320,6 +3321,8 @@ "restore-cursor": ["restore-cursor@3.1.0", "", { "dependencies": { "onetime": "^5.1.0", "signal-exit": "^3.0.2" } }, "sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA=="], + "ret": ["ret@0.5.0", "", {}, "sha512-I1XxrZSQ+oErkRR4jYbAyEEu2I0avBvvMM5JN+6EBprOGRCs63ENqZ3vjavq8fBw2+62G5LF5XelKwuJpcvcxw=="], + "retry": ["retry@0.13.1", "", {}, "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg=="], "reusify": ["reusify@1.1.0", "", {}, "sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw=="], @@ -3346,6 +3349,8 @@ "safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="], + "safe-regex2": ["safe-regex2@5.1.0", "", { "dependencies": { "ret": "~0.5.0" }, "bin": { "safe-regex2": "bin/safe-regex2.js" } }, "sha512-pNHAuBW7TrcleFHsxBr5QMi/Iyp0ENjUKz7GCcX1UO7cMh+NmVK6HxQckNL1tJp1XAJVjG6B8OKIPqodqj9rtw=="], + "safe-stable-stringify": ["safe-stable-stringify@2.5.0", "", {}, "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA=="], "safer-buffer": ["safer-buffer@2.1.2", "", {}, "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="], From bed7091a4028aa05acdc23aede63a844729406c7 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 19:54:56 -0700 Subject: [PATCH 10/65] improvement(worker): configuration defaults (#3821) * improvement(worker): configuration defaults * update readmes * realtime curl import --- README.md | 10 ++++++++-- apps/sim/lib/execution/isolated-vm.ts | 2 ++ apps/sim/package.json | 2 +- docker-compose.local.yml | 10 ++++------ docker-compose.prod.yml | 8 ++++---- docker/app.Dockerfile | 8 ++------ docker/realtime.Dockerfile | 3 ++- helm/sim/README.md | 11 +++++++++++ helm/sim/templates/_helpers.tpl | 4 ---- helm/sim/templates/deployment-worker.yaml | 2 +- helm/sim/templates/external-secret-app.yaml | 5 +++++ helm/sim/templates/secrets-app.yaml | 3 +++ helm/sim/values.yaml | 12 +++++++++--- 13 files changed, 52 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 831361fb3ed..114d5ffc207 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,10 @@ docker compose -f docker-compose.prod.yml up -d Open [http://localhost:3000](http://localhost:3000) +#### Background worker note + +The Docker Compose stack starts a dedicated worker container by default. If `REDIS_URL` is not configured, the worker will start, log that it is idle, and do no queue processing. This is expected. Queue-backed API, webhook, and schedule execution requires Redis; installs without Redis continue to use the inline execution path. + Sim also supports local models via [Ollama](https://ollama.ai) and [vLLM](https://docs.vllm.ai/) — see the [Docker self-hosting docs](https://docs.sim.ai/self-hosting/docker) for setup details. ### Self-hosted: Manual Setup @@ -113,10 +117,12 @@ cd packages/db && bunx drizzle-kit migrate --config=./drizzle.config.ts 5. Start development servers: ```bash -bun run dev:full # Starts both Next.js app and realtime socket server +bun run dev:full # Starts Next.js app, realtime socket server, and the BullMQ worker ``` -Or run separately: `bun run dev` (Next.js) and `cd apps/sim && bun run dev:sockets` (realtime). +If `REDIS_URL` is not configured, the worker will remain idle and execution continues inline. + +Or run separately: `bun run dev` (Next.js), `cd apps/sim && bun run dev:sockets` (realtime), and `cd apps/sim && bun run worker` (BullMQ worker). ## Copilot API Keys diff --git a/apps/sim/lib/execution/isolated-vm.ts b/apps/sim/lib/execution/isolated-vm.ts index 9deffbe83c0..877035760e5 100644 --- a/apps/sim/lib/execution/isolated-vm.ts +++ b/apps/sim/lib/execution/isolated-vm.ts @@ -696,6 +696,8 @@ function spawnWorker(): Promise { const currentDir = path.dirname(fileURLToPath(import.meta.url)) const candidatePaths = [ path.join(currentDir, 'isolated-vm-worker.cjs'), + path.join(currentDir, '..', '..', 'lib', 'execution', 'isolated-vm-worker.cjs'), + path.join(process.cwd(), 'apps', 'sim', 'lib', 'execution', 'isolated-vm-worker.cjs'), path.join(process.cwd(), 'lib', 'execution', 'isolated-vm-worker.cjs'), ] const workerPath = candidatePaths.find((p) => fs.existsSync(p)) diff --git a/apps/sim/package.json b/apps/sim/package.json index 055a455b6c7..ac1c815075f 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -18,7 +18,7 @@ "load:workflow:isolation": "BASE_URL=${BASE_URL:-http://localhost:3000} ISOLATION_DURATION=${ISOLATION_DURATION:-30} TOTAL_RATE=${TOTAL_RATE:-9} WORKSPACE_A_WEIGHT=${WORKSPACE_A_WEIGHT:-8} WORKSPACE_B_WEIGHT=${WORKSPACE_B_WEIGHT:-1} bunx artillery run scripts/load/workflow-isolation.yml", "build": "bun run build:pptx-worker && bun run build:worker && next build", "build:pptx-worker": "bun build ./lib/execution/pptx-worker.cjs --target=node --format=cjs --outfile ./dist/pptx-worker.cjs", - "build:worker": "bun build ./worker/index.ts --target=node --format=cjs --packages=external --outfile ./dist/worker.cjs", + "build:worker": "bun build ./worker/index.ts --target=node --format=esm --splitting --outdir ./dist/worker --external isolated-vm", "start": "next start", "worker": "NODE_ENV=production bun run worker/index.ts", "prepare": "cd ../.. && bun husky", diff --git a/docker-compose.local.yml b/docker-compose.local.yml index ceb8dc3883b..354a77d1393 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -32,7 +32,7 @@ services: realtime: condition: service_healthy healthcheck: - test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3000'] + test: ['CMD', 'curl', '-fsS', 'http://127.0.0.1:3000'] interval: 90s timeout: 5s retries: 3 @@ -61,7 +61,7 @@ services: limits: memory: 1G healthcheck: - test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health'] + test: ['CMD', 'curl', '-fsS', 'http://127.0.0.1:3002/health'] interval: 90s timeout: 5s retries: 3 @@ -71,10 +71,8 @@ services: build: context: . dockerfile: docker/app.Dockerfile - command: ['bun', 'apps/sim/dist/worker.cjs'] + command: ['bun', 'apps/sim/dist/worker/index.js'] restart: unless-stopped - profiles: - - worker deploy: resources: limits: @@ -93,7 +91,7 @@ services: migrations: condition: service_completed_successfully healthcheck: - test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3001/health/live'] + test: ['CMD', 'curl', '-fsS', 'http://127.0.0.1:3001/health/live'] interval: 90s timeout: 5s retries: 3 diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index da547506556..5f8f8bc9db5 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -34,7 +34,7 @@ services: realtime: condition: service_healthy healthcheck: - test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3000'] + test: ['CMD', 'curl', '-fsS', 'http://127.0.0.1:3000'] interval: 90s timeout: 5s retries: 3 @@ -42,7 +42,7 @@ services: sim-worker: image: ghcr.io/simstudioai/simstudio:latest - command: ['bun', 'apps/sim/dist/worker.cjs'] + command: ['bun', 'apps/sim/dist/worker/index.js'] restart: unless-stopped deploy: resources: @@ -71,7 +71,7 @@ services: migrations: condition: service_completed_successfully healthcheck: - test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health/live'] + test: ['CMD', 'curl', '-fsS', 'http://127.0.0.1:${WORKER_PORT:-3001}/health/live'] interval: 90s timeout: 5s retries: 3 @@ -98,7 +98,7 @@ services: db: condition: service_healthy healthcheck: - test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3002/health'] + test: ['CMD', 'curl', '-fsS', 'http://127.0.0.1:3002/health'] interval: 90s timeout: 5s retries: 3 diff --git a/docker/app.Dockerfile b/docker/app.Dockerfile index b5f7970b9d8..b5e2f14457b 100644 --- a/docker/app.Dockerfile +++ b/docker/app.Dockerfile @@ -114,12 +114,8 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/isolated-v # Copy the bundled PPTX worker artifact COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/pptx-worker.cjs ./apps/sim/dist/pptx-worker.cjs -# Copy the bundled BullMQ worker artifact and workspace packages it needs at runtime. -# The bundle uses --packages=external so all node_modules are resolved at runtime. -# npm packages come from the standalone node_modules; workspace packages need explicit copies. -COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/worker.cjs ./apps/sim/dist/worker.cjs -COPY --from=builder --chown=nextjs:nodejs /app/packages/logger ./node_modules/@sim/logger -COPY --from=builder --chown=nextjs:nodejs /app/packages/db ./node_modules/@sim/db +# Copy the bundled BullMQ worker (self-contained ESM bundle, only isolated-vm is external) +COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/worker ./apps/sim/dist/worker # Guardrails setup with pip caching COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/requirements.txt ./apps/sim/lib/guardrails/requirements.txt diff --git a/docker/realtime.Dockerfile b/docker/realtime.Dockerfile index 337e5e2afdb..add2c194a99 100644 --- a/docker/realtime.Dockerfile +++ b/docker/realtime.Dockerfile @@ -3,11 +3,12 @@ # ======================================== FROM oven/bun:1.3.11-alpine AS base +RUN apk add --no-cache libc6-compat curl + # ======================================== # Dependencies Stage: Install Dependencies # ======================================== FROM base AS deps -RUN apk add --no-cache libc6-compat WORKDIR /app COPY package.json bun.lock turbo.json ./ diff --git a/helm/sim/README.md b/helm/sim/README.md index 0c33120539b..3507f543495 100644 --- a/helm/sim/README.md +++ b/helm/sim/README.md @@ -709,6 +709,17 @@ kubectl create secret generic my-postgresql-secret \ See `examples/values-existing-secret.yaml` for more details. +### Worker and Redis + +The Helm chart enables the BullMQ worker by default so the deployment topology matches Docker Compose. If `REDIS_URL` is not configured, the worker pod will still start but remain idle and do no queue processing. This is expected. + +Queue-backed API, webhook, and schedule execution requires Redis. Installs without Redis continue to use the inline execution path. If you do not want the worker pod at all, set: + +```yaml +worker: + enabled: false +``` + ### External Secrets Parameters | Parameter | Description | Default | diff --git a/helm/sim/templates/_helpers.tpl b/helm/sim/templates/_helpers.tpl index 3ba078c5e67..915df7cf618 100644 --- a/helm/sim/templates/_helpers.tpl +++ b/helm/sim/templates/_helpers.tpl @@ -222,10 +222,6 @@ Skip validation when using existing secrets or External Secrets Operator {{- fail "realtime.env.BETTER_AUTH_SECRET must not use the default placeholder value. Generate a secure secret with: openssl rand -hex 32" }} {{- end }} {{- end }} -{{- /* Worker validation - REDIS_URL is required when worker is enabled */ -}} -{{- if and .Values.worker.enabled (not .Values.app.env.REDIS_URL) }} -{{- fail "app.env.REDIS_URL is required when worker.enabled=true" }} -{{- end }} {{- /* PostgreSQL password validation - skip if using existing secret or ESO */ -}} {{- if not (or $useExistingPostgresSecret $useExternalSecrets) }} {{- if and .Values.postgresql.enabled (not .Values.postgresql.auth.password) }} diff --git a/helm/sim/templates/deployment-worker.yaml b/helm/sim/templates/deployment-worker.yaml index 701fdff1849..adf7b9c284f 100644 --- a/helm/sim/templates/deployment-worker.yaml +++ b/helm/sim/templates/deployment-worker.yaml @@ -37,7 +37,7 @@ spec: - name: worker image: {{ include "sim.image" (dict "context" . "image" .Values.worker.image) }} imagePullPolicy: {{ .Values.worker.image.pullPolicy }} - command: ["bun", "apps/sim/dist/worker.cjs"] + command: ["bun", "apps/sim/dist/worker/index.js"] ports: - name: health containerPort: {{ .Values.worker.healthPort }} diff --git a/helm/sim/templates/external-secret-app.yaml b/helm/sim/templates/external-secret-app.yaml index 3377901fcc3..b5b2b8fa34b 100644 --- a/helm/sim/templates/external-secret-app.yaml +++ b/helm/sim/templates/external-secret-app.yaml @@ -41,4 +41,9 @@ spec: remoteRef: key: {{ .Values.externalSecrets.remoteRefs.app.API_ENCRYPTION_KEY }} {{- end }} + {{- if .Values.externalSecrets.remoteRefs.app.REDIS_URL }} + - secretKey: REDIS_URL + remoteRef: + key: {{ .Values.externalSecrets.remoteRefs.app.REDIS_URL }} + {{- end }} {{- end }} diff --git a/helm/sim/templates/secrets-app.yaml b/helm/sim/templates/secrets-app.yaml index 29a9d065f2d..c99e485384b 100644 --- a/helm/sim/templates/secrets-app.yaml +++ b/helm/sim/templates/secrets-app.yaml @@ -24,4 +24,7 @@ stringData: {{- if .Values.app.env.API_ENCRYPTION_KEY }} API_ENCRYPTION_KEY: {{ .Values.app.env.API_ENCRYPTION_KEY | quote }} {{- end }} + {{- if .Values.app.env.REDIS_URL }} + REDIS_URL: {{ .Values.app.env.REDIS_URL | quote }} + {{- end }} {{- end }} diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index 4fd2828d8c0..92c163b4222 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -64,6 +64,7 @@ app: INTERNAL_API_SECRET: "INTERNAL_API_SECRET" CRON_SECRET: "CRON_SECRET" API_ENCRYPTION_KEY: "API_ENCRYPTION_KEY" + REDIS_URL: "REDIS_URL" # Environment variables env: @@ -95,6 +96,7 @@ app: # Optional: API Key Encryption (RECOMMENDED for production) # Generate 64-character hex string using: openssl rand -hex 32 (outputs 64 hex chars = 32 bytes) API_ENCRYPTION_KEY: "" # OPTIONAL - encrypts API keys at rest, must be exactly 64 hex characters, if not set keys stored in plain text + REDIS_URL: "" # OPTIONAL - Redis connection string for BullMQ/workers; can also come from app secret or External Secrets # Email & Communication EMAIL_VERIFICATION_ENABLED: "false" # Enable email verification for user registration and login (defaults to false) @@ -359,10 +361,12 @@ realtime: extraVolumeMounts: [] # BullMQ worker configuration (processes background jobs when Redis is available) -# Uses the same image as the main app with a different command +# Uses the same image as the main app with a different command. +# Enabled by default so self-hosted deployments get the same topology as compose. +# Without REDIS_URL the worker starts, logs that it is idle, and does no queue processing. worker: - # Enable/disable the worker deployment (requires REDIS_URL to be set in app.env) - enabled: false + # Enable/disable the worker deployment + enabled: true # Image configuration (defaults to same image as app) image: @@ -1283,6 +1287,8 @@ externalSecrets: CRON_SECRET: "" # Path to API_ENCRYPTION_KEY in external store (optional) API_ENCRYPTION_KEY: "" + # Path to REDIS_URL in external store (optional, required for worker when not set in app.env) + REDIS_URL: "" # PostgreSQL password (for internal PostgreSQL) postgresql: From a1b32d3a1a547a691495261cb353f5d96da37ca2 Mon Sep 17 00:00:00 2001 From: Waleed Date: Fri, 27 Mar 2026 19:59:47 -0700 Subject: [PATCH 11/65] improvement(tour): remove auto-start, only trigger on explicit user action (#3823) --- .../components/product-tour/product-tour.tsx | 4 - .../components/product-tour/use-tour.ts | 84 ++----------------- .../components/product-tour/workflow-tour.tsx | 6 +- 3 files changed, 8 insertions(+), 86 deletions(-) diff --git a/apps/sim/app/workspace/[workspaceId]/components/product-tour/product-tour.tsx b/apps/sim/app/workspace/[workspaceId]/components/product-tour/product-tour.tsx index 1c49837afa5..ba659237800 100644 --- a/apps/sim/app/workspace/[workspaceId]/components/product-tour/product-tour.tsx +++ b/apps/sim/app/workspace/[workspaceId]/components/product-tour/product-tour.tsx @@ -16,7 +16,6 @@ const Joyride = dynamic(() => import('react-joyride'), { ssr: false, }) -const NAV_TOUR_STORAGE_KEY = 'sim-nav-tour-completed-v1' export const START_NAV_TOUR_EVENT = 'start-nav-tour' export function NavTour() { @@ -25,9 +24,6 @@ export function NavTour() { const { run, stepIndex, tourKey, isTooltipVisible, isEntrance, handleCallback } = useTour({ steps: navTourSteps, - storageKey: NAV_TOUR_STORAGE_KEY, - autoStartDelay: 1200, - resettable: true, triggerEvent: START_NAV_TOUR_EVENT, tourName: 'Navigation tour', disabled: isWorkflowPage, diff --git a/apps/sim/app/workspace/[workspaceId]/components/product-tour/use-tour.ts b/apps/sim/app/workspace/[workspaceId]/components/product-tour/use-tour.ts index 1c3ed52e179..10b09caf9bb 100644 --- a/apps/sim/app/workspace/[workspaceId]/components/product-tour/use-tour.ts +++ b/apps/sim/app/workspace/[workspaceId]/components/product-tour/use-tour.ts @@ -12,17 +12,11 @@ const FADE_OUT_MS = 80 interface UseTourOptions { /** Tour step definitions */ steps: Step[] - /** localStorage key for completion persistence */ - storageKey: string - /** Delay before auto-starting the tour (ms) */ - autoStartDelay?: number - /** Whether this tour can be reset/retriggered */ - resettable?: boolean /** Custom event name to listen for manual triggers */ triggerEvent?: string /** Identifier for logging */ tourName?: string - /** When true, suppresses auto-start (e.g. to avoid overlapping with another active tour) */ + /** When true, stops a running tour (e.g. navigating away from the relevant page) */ disabled?: boolean } @@ -41,49 +35,14 @@ interface UseTourReturn { handleCallback: (data: CallBackProps) => void } -function isTourCompleted(storageKey: string): boolean { - try { - return localStorage.getItem(storageKey) === 'true' - } catch { - return false - } -} - -function markTourCompleted(storageKey: string): void { - try { - localStorage.setItem(storageKey, 'true') - } catch { - logger.warn('Failed to persist tour completion', { storageKey }) - } -} - -function clearTourCompletion(storageKey: string): void { - try { - localStorage.removeItem(storageKey) - } catch { - logger.warn('Failed to clear tour completion', { storageKey }) - } -} - -/** - * Tracks which tours have already attempted auto-start in this page session. - * Module-level so it survives component remounts (e.g. navigating between - * workflows remounts WorkflowTour), while still resetting on full page reload. - */ -const autoStartAttempted = new Set() - /** * Shared hook for managing product tour state with smooth transitions. * - * Handles auto-start on first visit, localStorage persistence, - * manual triggering via custom events, and coordinated fade + * Handles manual triggering via custom events and coordinated fade * transitions between steps to prevent layout shift. */ export function useTour({ steps, - storageKey, - autoStartDelay = 1200, - resettable = false, triggerEvent, tourName = 'tour', disabled = false, @@ -94,15 +53,10 @@ export function useTour({ const [isTooltipVisible, setIsTooltipVisible] = useState(true) const [isEntrance, setIsEntrance] = useState(true) - const disabledRef = useRef(disabled) const retriggerTimerRef = useRef | null>(null) const transitionTimerRef = useRef | null>(null) const rafRef = useRef(null) - useEffect(() => { - disabledRef.current = disabled - }, [disabled]) - /** * Schedules a two-frame rAF to reveal the tooltip after the browser * finishes repositioning. Stores the outer frame ID in `rafRef` so @@ -137,8 +91,7 @@ export function useTour({ setRun(false) setIsTooltipVisible(true) setIsEntrance(true) - markTourCompleted(storageKey) - }, [storageKey, cancelPendingTransitions]) + }, [cancelPendingTransitions]) /** Transition to a new step with a coordinated fade-out/fade-in */ const transitionToStep = useCallback( @@ -164,40 +117,17 @@ export function useTour({ /** Stop the tour when disabled becomes true (e.g. navigating away from the relevant page) */ useEffect(() => { if (disabled && run) { - cancelPendingTransitions() - setRun(false) - setIsTooltipVisible(true) - setIsEntrance(true) + stopTour() logger.info(`${tourName} paused — disabled became true`) } - }, [disabled, run, tourName, cancelPendingTransitions]) - - /** Auto-start on first visit (once per page session per tour) */ - useEffect(() => { - if (disabled || autoStartAttempted.has(storageKey) || isTourCompleted(storageKey)) return - - const timer = setTimeout(() => { - if (disabledRef.current) return - - autoStartAttempted.add(storageKey) - setStepIndex(0) - setIsEntrance(true) - setIsTooltipVisible(false) - setRun(true) - logger.info(`Auto-starting ${tourName}`) - scheduleReveal() - }, autoStartDelay) - - return () => clearTimeout(timer) - }, [disabled, storageKey, autoStartDelay, tourName, scheduleReveal]) + }, [disabled, run, tourName, stopTour]) /** Listen for manual trigger events */ useEffect(() => { - if (!triggerEvent || !resettable) return + if (!triggerEvent) return const handleTrigger = () => { setRun(false) - clearTourCompletion(storageKey) setTourKey((k) => k + 1) if (retriggerTimerRef.current) { @@ -222,7 +152,7 @@ export function useTour({ clearTimeout(retriggerTimerRef.current) } } - }, [triggerEvent, resettable, storageKey, tourName, scheduleReveal]) + }, [triggerEvent, tourName, scheduleReveal]) /** Clean up all pending async work on unmount */ useEffect(() => { diff --git a/apps/sim/app/workspace/[workspaceId]/components/product-tour/workflow-tour.tsx b/apps/sim/app/workspace/[workspaceId]/components/product-tour/workflow-tour.tsx index 13bcf7468c5..383a3311c0b 100644 --- a/apps/sim/app/workspace/[workspaceId]/components/product-tour/workflow-tour.tsx +++ b/apps/sim/app/workspace/[workspaceId]/components/product-tour/workflow-tour.tsx @@ -15,19 +15,15 @@ const Joyride = dynamic(() => import('react-joyride'), { ssr: false, }) -const WORKFLOW_TOUR_STORAGE_KEY = 'sim-workflow-tour-completed-v1' export const START_WORKFLOW_TOUR_EVENT = 'start-workflow-tour' /** * Workflow tour that covers the canvas, blocks, copilot, and deployment. - * Runs on first workflow visit and can be retriggered via "Take a tour". + * Triggered via "Take a tour" in the sidebar menu. */ export function WorkflowTour() { const { run, stepIndex, tourKey, isTooltipVisible, isEntrance, handleCallback } = useTour({ steps: workflowTourSteps, - storageKey: WORKFLOW_TOUR_STORAGE_KEY, - autoStartDelay: 800, - resettable: true, triggerEvent: START_WORKFLOW_TOUR_EVENT, tourName: 'Workflow tour', }) From 918c03202b86d12297edba3121e92afa5c2efa64 Mon Sep 17 00:00:00 2001 From: Waleed Date: Fri, 27 Mar 2026 20:22:30 -0700 Subject: [PATCH 12/65] fix(mcp): use correct modal for creating workflow MCP servers in deploy (#3822) * fix(mcp): use correct modal for creating workflow MCP servers in deploy * fix(mcp): show workflows field during loading and when empty --- .../create-workflow-mcp-server-modal.tsx | 162 ++++++++++++++++++ .../workflow-mcp-servers.tsx | 116 +------------ .../deploy-modal/components/mcp/mcp.tsx | 26 +-- 3 files changed, 177 insertions(+), 127 deletions(-) create mode 100644 apps/sim/app/workspace/[workspaceId]/settings/components/workflow-mcp-servers/create-workflow-mcp-server-modal.tsx diff --git a/apps/sim/app/workspace/[workspaceId]/settings/components/workflow-mcp-servers/create-workflow-mcp-server-modal.tsx b/apps/sim/app/workspace/[workspaceId]/settings/components/workflow-mcp-servers/create-workflow-mcp-server-modal.tsx new file mode 100644 index 00000000000..249c73e2f27 --- /dev/null +++ b/apps/sim/app/workspace/[workspaceId]/settings/components/workflow-mcp-servers/create-workflow-mcp-server-modal.tsx @@ -0,0 +1,162 @@ +'use client' + +import { useCallback, useEffect, useState } from 'react' +import { createLogger } from '@sim/logger' +import { + Button, + ButtonGroup, + ButtonGroupItem, + Combobox, + type ComboboxOption, + Input as EmcnInput, + Modal, + ModalBody, + ModalContent, + ModalFooter, + ModalHeader, + Textarea, +} from '@/components/emcn' +import { FormField } from '@/app/workspace/[workspaceId]/settings/components/mcp/components' +import { useCreateWorkflowMcpServer } from '@/hooks/queries/workflow-mcp-servers' + +const logger = createLogger('CreateWorkflowMcpServerModal') + +const INITIAL_FORM_DATA: { name: string; description: string; isPublic: boolean } = { + name: '', + description: '', + isPublic: false, +} + +interface CreateWorkflowMcpServerModalProps { + open: boolean + onOpenChange: (open: boolean) => void + workspaceId: string + workflowOptions?: ComboboxOption[] + isLoadingWorkflows?: boolean +} + +export function CreateWorkflowMcpServerModal({ + open, + onOpenChange, + workspaceId, + workflowOptions, + isLoadingWorkflows = false, +}: CreateWorkflowMcpServerModalProps) { + const createServerMutation = useCreateWorkflowMcpServer() + + const [formData, setFormData] = useState({ ...INITIAL_FORM_DATA }) + const [selectedWorkflowIds, setSelectedWorkflowIds] = useState([]) + + const isFormValid = formData.name.trim().length > 0 + + useEffect(() => { + if (open) { + setFormData({ ...INITIAL_FORM_DATA }) + setSelectedWorkflowIds([]) + } + }, [open]) + + const handleCreateServer = useCallback(async () => { + if (!formData.name.trim()) return + + try { + await createServerMutation.mutateAsync({ + workspaceId, + name: formData.name.trim(), + description: formData.description.trim() || undefined, + isPublic: formData.isPublic, + workflowIds: selectedWorkflowIds.length > 0 ? selectedWorkflowIds : undefined, + }) + onOpenChange(false) + } catch (err) { + logger.error('Failed to create server:', err) + } + }, [formData, selectedWorkflowIds, workspaceId, onOpenChange]) + + const showWorkflows = workflowOptions !== undefined + + return ( + + + Add New MCP Server + +
+ + setFormData({ ...formData, name: e.target.value })} + className='h-9' + /> + + + +