From 32296d07b8e0667023503a3832019afbece22312 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 18:10:39 -0700 Subject: [PATCH 1/2] fix(worker): dockerfile + helm updates --- apps/sim/package.json | 3 +- docker-compose.local.yml | 32 ++++++++ docker-compose.prod.yml | 2 +- docker/app.Dockerfile | 3 + helm/sim/templates/_helpers.tpl | 16 ++++ helm/sim/templates/deployment-worker.yaml | 99 +++++++++++++++++++++++ helm/sim/values.yaml | 50 ++++++++++++ 7 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 helm/sim/templates/deployment-worker.yaml diff --git a/apps/sim/package.json b/apps/sim/package.json index 03c91227186..d22daf42ca4 100644 --- a/apps/sim/package.json +++ b/apps/sim/package.json @@ -17,8 +17,9 @@ "load:workflow:baseline": "BASE_URL=${BASE_URL:-http://localhost:3000} WARMUP_DURATION=${WARMUP_DURATION:-10} WARMUP_RATE=${WARMUP_RATE:-2} PEAK_RATE=${PEAK_RATE:-8} HOLD_DURATION=${HOLD_DURATION:-20} bunx artillery run scripts/load/workflow-concurrency.yml", "load:workflow:waves": "BASE_URL=${BASE_URL:-http://localhost:3000} WAVE_ONE_DURATION=${WAVE_ONE_DURATION:-10} WAVE_ONE_RATE=${WAVE_ONE_RATE:-6} QUIET_DURATION=${QUIET_DURATION:-5} WAVE_TWO_DURATION=${WAVE_TWO_DURATION:-15} WAVE_TWO_RATE=${WAVE_TWO_RATE:-8} WAVE_THREE_DURATION=${WAVE_THREE_DURATION:-20} WAVE_THREE_RATE=${WAVE_THREE_RATE:-10} bunx artillery run scripts/load/workflow-waves.yml", "load:workflow:isolation": "BASE_URL=${BASE_URL:-http://localhost:3000} ISOLATION_DURATION=${ISOLATION_DURATION:-30} TOTAL_RATE=${TOTAL_RATE:-9} WORKSPACE_A_WEIGHT=${WORKSPACE_A_WEIGHT:-8} WORKSPACE_B_WEIGHT=${WORKSPACE_B_WEIGHT:-1} bunx artillery run scripts/load/workflow-isolation.yml", - "build": "bun run build:pptx-worker && next build", + "build": "bun run build:pptx-worker && bun run build:worker && next build", "build:pptx-worker": "bun build ./lib/execution/pptx-worker.cjs --target=node --format=cjs --outfile ./dist/pptx-worker.cjs", + "build:worker": "bun build ./worker/index.ts --target=node --format=cjs --packages=external --outfile ./dist/worker.cjs", "start": "next start", "worker": "NODE_ENV=production bun run worker/index.ts", "prepare": "cd ../.. && bun husky", diff --git a/docker-compose.local.yml b/docker-compose.local.yml index f47643ad00f..ceb8dc3883b 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -67,6 +67,38 @@ services: retries: 3 start_period: 10s + sim-worker: + build: + context: . + dockerfile: docker/app.Dockerfile + command: ['bun', 'apps/sim/dist/worker.cjs'] + restart: unless-stopped + profiles: + - worker + deploy: + resources: + limits: + memory: 4G + environment: + - NODE_ENV=development + - DATABASE_URL=postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@db:5432/${POSTGRES_DB:-simstudio} + - REDIS_URL=${REDIS_URL:-} + - ENCRYPTION_KEY=${ENCRYPTION_KEY:-dev-encryption-key-at-least-32-chars} + - API_ENCRYPTION_KEY=${API_ENCRYPTION_KEY:-} + - INTERNAL_API_SECRET=${INTERNAL_API_SECRET:-dev-internal-api-secret-min-32-chars} + - WORKER_PORT=3001 + depends_on: + db: + condition: service_healthy + migrations: + condition: service_completed_successfully + healthcheck: + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:3001/health/live'] + interval: 90s + timeout: 5s + retries: 3 + start_period: 10s + migrations: build: context: . diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 4c03862e35b..d8e5bc8bc7a 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -42,7 +42,7 @@ services: sim-worker: image: ghcr.io/simstudioai/simstudio:latest - command: ['bun', 'run', 'worker'] + command: ['bun', 'apps/sim/dist/worker.cjs'] restart: unless-stopped deploy: resources: diff --git a/docker/app.Dockerfile b/docker/app.Dockerfile index 7e1552a1c45..4050e98a007 100644 --- a/docker/app.Dockerfile +++ b/docker/app.Dockerfile @@ -114,6 +114,9 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/isolated-v # Copy the bundled PPTX worker artifact COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/pptx-worker.cjs ./apps/sim/dist/pptx-worker.cjs +# Copy the bundled BullMQ worker artifact +COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/dist/worker.cjs ./apps/sim/dist/worker.cjs + # Guardrails setup with pip caching COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/requirements.txt ./apps/sim/lib/guardrails/requirements.txt COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/guardrails/validate_pii.py ./apps/sim/lib/guardrails/validate_pii.py diff --git a/helm/sim/templates/_helpers.tpl b/helm/sim/templates/_helpers.tpl index e1bee304913..915df7cf618 100644 --- a/helm/sim/templates/_helpers.tpl +++ b/helm/sim/templates/_helpers.tpl @@ -117,6 +117,22 @@ Ollama selector labels app.kubernetes.io/component: ollama {{- end }} +{{/* +Worker specific labels +*/}} +{{- define "sim.worker.labels" -}} +{{ include "sim.labels" . }} +app.kubernetes.io/component: worker +{{- end }} + +{{/* +Worker selector labels +*/}} +{{- define "sim.worker.selectorLabels" -}} +{{ include "sim.selectorLabels" . }} +app.kubernetes.io/component: worker +{{- end }} + {{/* Migrations specific labels */}} diff --git a/helm/sim/templates/deployment-worker.yaml b/helm/sim/templates/deployment-worker.yaml new file mode 100644 index 00000000000..8ec9b2001c5 --- /dev/null +++ b/helm/sim/templates/deployment-worker.yaml @@ -0,0 +1,99 @@ +{{- if .Values.worker.enabled }} +{{- include "sim.validateSecrets" . }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "sim.fullname" . }}-worker + namespace: {{ .Release.Namespace }} + labels: + {{- include "sim.worker.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.worker.replicaCount }} + selector: + matchLabels: + {{- include "sim.worker.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "sim.worker.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "sim.serviceAccountName" . }} + {{- include "sim.podSecurityContext" .Values.worker | nindent 6 }} + {{- include "sim.nodeSelector" .Values.worker | nindent 6 }} + {{- include "sim.tolerations" .Values | nindent 6 }} + {{- include "sim.affinity" .Values | nindent 6 }} + containers: + - name: worker + image: {{ include "sim.image" (dict "context" . "image" .Values.worker.image) }} + imagePullPolicy: {{ .Values.worker.image.pullPolicy }} + command: ["bun", "apps/sim/dist/worker.cjs"] + ports: + - name: health + containerPort: {{ .Values.worker.healthPort }} + protocol: TCP + env: + - name: DATABASE_URL + value: {{ include "sim.databaseUrl" . | quote }} + {{- if .Values.app.env.REDIS_URL }} + - name: REDIS_URL + value: {{ .Values.app.env.REDIS_URL | quote }} + {{- end }} + - name: WORKER_PORT + value: {{ .Values.worker.healthPort | quote }} + {{- if .Values.telemetry.enabled }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://{{ include "sim.fullname" . }}-otel-collector:4318" + - name: OTEL_SERVICE_NAME + value: sim-worker + - name: OTEL_SERVICE_VERSION + value: {{ .Chart.AppVersion | quote }} + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.name=sim-worker,service.version={{ .Chart.AppVersion }},deployment.environment={{ .Values.worker.env.NODE_ENV }}" + {{- end }} + {{- range $key, $value := .Values.worker.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- with .Values.extraEnvVars }} + {{- toYaml . | nindent 12 }} + {{- end }} + envFrom: + - secretRef: + name: {{ include "sim.appSecretName" . }} + {{- if .Values.postgresql.enabled }} + - secretRef: + name: {{ include "sim.postgresqlSecretName" . }} + {{- else if .Values.externalDatabase.enabled }} + - secretRef: + name: {{ include "sim.externalDbSecretName" . }} + {{- end }} + livenessProbe: + httpGet: + path: /health/live + port: health + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: health + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + {{- include "sim.resources" .Values.worker | nindent 10 }} + {{- include "sim.securityContext" .Values.worker | nindent 10 }} +{{- end }} diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml index 8d75b73692e..4fd2828d8c0 100644 --- a/helm/sim/values.yaml +++ b/helm/sim/values.yaml @@ -358,6 +358,56 @@ realtime: extraVolumes: [] extraVolumeMounts: [] +# BullMQ worker configuration (processes background jobs when Redis is available) +# Uses the same image as the main app with a different command +worker: + # Enable/disable the worker deployment (requires REDIS_URL to be set in app.env) + enabled: false + + # Image configuration (defaults to same image as app) + image: + repository: simstudioai/simstudio + tag: latest + pullPolicy: Always + + # Number of replicas + replicaCount: 1 + + # Health check port (worker exposes a lightweight HTTP health server) + healthPort: 3001 + + # Resource limits and requests + resources: + limits: + memory: "4Gi" + cpu: "1000m" + requests: + memory: "2Gi" + cpu: "500m" + + # Node selector for pod scheduling + nodeSelector: {} + + # Pod security context + podSecurityContext: + fsGroup: 1001 + + # Container security context + securityContext: + runAsNonRoot: true + runAsUser: 1001 + + # Environment variables (worker-specific tuning) + env: + NODE_ENV: "production" + WORKER_CONCURRENCY_WORKFLOW: "50" + WORKER_CONCURRENCY_WEBHOOK: "30" + WORKER_CONCURRENCY_SCHEDULE: "20" + WORKER_CONCURRENCY_MOTHERSHIP_JOB: "10" + WORKER_CONCURRENCY_CONNECTOR_SYNC: "5" + WORKER_CONCURRENCY_DOCUMENT_PROCESSING: "20" + WORKER_CONCURRENCY_NOTIFICATION_DELIVERY: "10" + # Database migrations job configuration migrations: # Enable/disable migrations job From c77f3e6c214a14d59d54f7c4e72e8b113a8e5ce0 Mon Sep 17 00:00:00 2001 From: Vikhyath Mondreti Date: Fri, 27 Mar 2026 18:20:50 -0700 Subject: [PATCH 2/2] address comments --- docker-compose.prod.yml | 2 +- helm/sim/templates/_helpers.tpl | 4 ++++ helm/sim/templates/deployment-worker.yaml | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index d8e5bc8bc7a..da547506556 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -71,7 +71,7 @@ services: migrations: condition: service_completed_successfully healthcheck: - test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health'] + test: ['CMD', 'wget', '--spider', '--quiet', 'http://127.0.0.1:${WORKER_PORT:-3001}/health/live'] interval: 90s timeout: 5s retries: 3 diff --git a/helm/sim/templates/_helpers.tpl b/helm/sim/templates/_helpers.tpl index 915df7cf618..3ba078c5e67 100644 --- a/helm/sim/templates/_helpers.tpl +++ b/helm/sim/templates/_helpers.tpl @@ -222,6 +222,10 @@ Skip validation when using existing secrets or External Secrets Operator {{- fail "realtime.env.BETTER_AUTH_SECRET must not use the default placeholder value. Generate a secure secret with: openssl rand -hex 32" }} {{- end }} {{- end }} +{{- /* Worker validation - REDIS_URL is required when worker is enabled */ -}} +{{- if and .Values.worker.enabled (not .Values.app.env.REDIS_URL) }} +{{- fail "app.env.REDIS_URL is required when worker.enabled=true" }} +{{- end }} {{- /* PostgreSQL password validation - skip if using existing secret or ESO */ -}} {{- if not (or $useExistingPostgresSecret $useExternalSecrets) }} {{- if and .Values.postgresql.enabled (not .Values.postgresql.auth.password) }} diff --git a/helm/sim/templates/deployment-worker.yaml b/helm/sim/templates/deployment-worker.yaml index 8ec9b2001c5..701fdff1849 100644 --- a/helm/sim/templates/deployment-worker.yaml +++ b/helm/sim/templates/deployment-worker.yaml @@ -62,9 +62,11 @@ spec: value: "service.name=sim-worker,service.version={{ .Chart.AppVersion }},deployment.environment={{ .Values.worker.env.NODE_ENV }}" {{- end }} {{- range $key, $value := .Values.worker.env }} + {{- if ne $key "WORKER_PORT" }} - name: {{ $key }} value: {{ $value | quote }} {{- end }} + {{- end }} {{- with .Values.extraEnvVars }} {{- toYaml . | nindent 12 }} {{- end }}