diff --git a/apps/sim/background/schedule-execution.ts b/apps/sim/background/schedule-execution.ts index 4c6b1a9cba8..a786cefd14e 100644 --- a/apps/sim/background/schedule-execution.ts +++ b/apps/sim/background/schedule-execution.ts @@ -7,7 +7,7 @@ import { workflowSchedule, } from '@sim/db' import { createLogger, runWithRequestContext } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { generateId } from '@sim/utils/id' import { backoffWithJitter } from '@sim/utils/retry' import { task } from '@trigger.dev/sdk' @@ -156,7 +156,7 @@ async function applyScheduleUpdate( return updatedRows.length > 0 } catch (error) { - logger.error(`[${requestId}] ${context}`, error) + logger.error(`[${requestId}] ${context}`, error, { cause: describeError(error) }) throw error } } @@ -530,7 +530,13 @@ async function runWorkflowExecution({ } } - logger.error(`[${requestId}] Early failure in scheduled workflow ${payload.workflowId}`, error) + logger.error( + `[${requestId}] Early failure in scheduled workflow ${payload.workflowId}`, + error, + { + cause: describeError(error), + } + ) if (wasExecutionFinalizedByCore(error, executionId)) { throw error @@ -950,7 +956,9 @@ export async function executeScheduleJob(payload: ScheduleExecutionPayload) { return } - logger.error(`[${requestId}] Error processing schedule ${payload.scheduleId}`, error) + logger.error(`[${requestId}] Error processing schedule ${payload.scheduleId}`, error, { + cause: describeError(error), + }) await releaseClaim( now, `Failed to release schedule ${payload.scheduleId} after unhandled error` diff --git a/apps/sim/background/workflow-column-execution.ts b/apps/sim/background/workflow-column-execution.ts index 53c337842fa..9f617cd5144 100644 --- a/apps/sim/background/workflow-column-execution.ts +++ b/apps/sim/background/workflow-column-execution.ts @@ -1,12 +1,13 @@ import { db } from '@sim/db' import { workflow as workflowTable } from '@sim/db/schema' import { createLogger, runWithRequestContext } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { sleep } from '@sim/utils/helpers' import { generateId } from '@sim/utils/id' import { backoffWithJitter } from '@sim/utils/retry' import { task } from '@trigger.dev/sdk' import { eq } from 'drizzle-orm' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import { createTimeoutAbortController } from '@/lib/core/execution-limits' import { RateLimiter } from '@/lib/core/rate-limiter/rate-limiter' import { preprocessExecution } from '@/lib/execution/preprocessing' @@ -597,8 +598,8 @@ async function runWorkflowAndWriteTerminal( }) .catch((err) => { logger.warn( - `Per-block partial write failed (table=${tableId} row=${rowId} group=${groupId}):`, - err + `Per-block partial write failed (table=${tableId} row=${rowId} group=${groupId})`, + { cause: describeError(err), retryable: isRetryableInfrastructureError(err) } ) }) } @@ -720,7 +721,12 @@ async function runWorkflowAndWriteTerminal( const message = toError(err).message logger.error( `Workflow group cell execution failed (table=${tableId} row=${rowId} group=${groupId})`, - { error: message, executionId } + { + error: message, + executionId, + cause: describeError(err), + retryable: isRetryableInfrastructureError(err), + } ) terminalWritten = true await writeChain.catch(() => {}) @@ -735,7 +741,11 @@ async function runWorkflowAndWriteTerminal( blockErrors, }) } catch (writeErr) { - logger.error('Also failed to write error state', { error: toError(writeErr).message }) + logger.error('Also failed to write error state', { + error: toError(writeErr).message, + cause: describeError(writeErr), + retryable: isRetryableInfrastructureError(writeErr), + }) } return 'error' } diff --git a/apps/sim/lib/logs/execution/logging-session.ts b/apps/sim/lib/logs/execution/logging-session.ts index 09bfd2348ca..a0fd011dc7d 100644 --- a/apps/sim/lib/logs/execution/logging-session.ts +++ b/apps/sim/lib/logs/execution/logging-session.ts @@ -1,8 +1,9 @@ import { db } from '@sim/db' import { workflowExecutionLogs } from '@sim/db/schema' import { createLogger } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' import { and, eq, sql } from 'drizzle-orm' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import { executionLogger } from '@/lib/logs/execution/logger' import { calculateCostSummary, @@ -177,6 +178,8 @@ export class LoggingSession { } catch (error) { logger.error(`Failed to persist last started block for execution ${this.executionId}:`, { error: toError(error).message, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) } } @@ -193,6 +196,8 @@ export class LoggingSession { } catch (error) { logger.error(`Failed to persist last completed block for execution ${this.executionId}:`, { error: toError(error).message, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) } } @@ -411,6 +416,8 @@ export class LoggingSession { executionId: this.executionId, error: toError(error).message, stack: error instanceof Error ? error.stack : undefined, + cause: describeError(error), + retryable: isRetryableInfrastructureError(error), }) throw error } @@ -1057,7 +1064,11 @@ export class LoggingSession { this.completionAttemptFailed = true logger.error( `[${this.requestId || 'unknown'}] Cost-only fallback also failed for execution ${this.executionId}:`, - { error: toError(fallbackError).message } + { + error: toError(fallbackError).message, + cause: describeError(fallbackError), + retryable: isRetryableInfrastructureError(fallbackError), + } ) } } diff --git a/apps/sim/lib/workflows/executor/pause-persistence.ts b/apps/sim/lib/workflows/executor/pause-persistence.ts index 2080668cccd..954329ad4fa 100644 --- a/apps/sim/lib/workflows/executor/pause-persistence.ts +++ b/apps/sim/lib/workflows/executor/pause-persistence.ts @@ -1,5 +1,6 @@ import { createLogger } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { describeError, toError } from '@sim/utils/errors' +import { isRetryableInfrastructureError } from '@/lib/core/errors/retryable-infrastructure' import type { LoggingSession } from '@/lib/logs/execution/logging-session' import { PauseResumeManager } from '@/lib/workflows/executor/human-in-the-loop-manager' import type { ExecutionResult } from '@/executor/types' @@ -46,6 +47,8 @@ export async function handlePostExecutionPauseState({ logger.error('Failed to persist pause result', { executionId, error: toError(pauseError).message, + cause: describeError(pauseError), + retryable: isRetryableInfrastructureError(pauseError), }) await loggingSession.markAsFailed( `Failed to persist pause state: ${toError(pauseError).message}` @@ -59,6 +62,8 @@ export async function handlePostExecutionPauseState({ logger.error('Failed to process queued resumes', { executionId, error: toError(resumeError).message, + cause: describeError(resumeError), + retryable: isRetryableInfrastructureError(resumeError), }) } } diff --git a/packages/utils/src/errors.test.ts b/packages/utils/src/errors.test.ts index fa11dc191f9..272c85e53a4 100644 --- a/packages/utils/src/errors.test.ts +++ b/packages/utils/src/errors.test.ts @@ -2,7 +2,7 @@ * @vitest-environment node */ import { describe, expect, it } from 'vitest' -import { getPostgresErrorCode, toError } from './errors.js' +import { describeError, getPostgresErrorCode, toError } from './errors.js' describe('toError', () => { it('returns the same Error when given an Error', () => { @@ -76,3 +76,54 @@ describe('getPostgresErrorCode', () => { expect(getPostgresErrorCode(err1)).toBeUndefined() }) }) + +describe('describeError', () => { + it('reports name and message for a plain error, omitting causeChain', () => { + const described = describeError(new Error('boom')) + expect(described).toEqual({ name: 'Error', message: 'boom' }) + expect(described.causeChain).toBeUndefined() + }) + + it('surfaces the deepest cause for a wrapped driver error', () => { + const driver = Object.assign(new Error('read ECONNRESET'), { + code: 'ECONNRESET', + errno: 'ECONNRESET', + syscall: 'read', + }) + const wrapped = new Error('Failed query: select ...', { cause: driver }) + const described = describeError(wrapped) + expect(described.message).toBe('read ECONNRESET') + expect(described.code).toBe('ECONNRESET') + expect(described.errno).toBe('ECONNRESET') + expect(described.syscall).toBe('read') + expect(described.causeChain).toEqual([ + 'Error: Failed query: select ...', + 'Error: read ECONNRESET', + ]) + }) + + it('always returns the cause for unclassified errors (AbortError)', () => { + const aborted = Object.assign(new Error('The operation was aborted'), { name: 'AbortError' }) + expect(describeError(aborted)).toEqual({ + name: 'AbortError', + message: 'The operation was aborted', + }) + }) + + it('falls back to a populated description for non-Error input without throwing', () => { + expect(describeError('just a string')).toEqual({ name: 'Error', message: 'just a string' }) + expect(() => describeError({ weird: true })).not.toThrow() + }) + + it('stops at depth 10 and does not loop on a cyclic cause', () => { + const a = new Error('a') + const b = new Error('b') + ;(a as { cause?: unknown }).cause = b + ;(b as { cause?: unknown }).cause = a + let described: ReturnType | undefined + expect(() => { + described = describeError(a) + }).not.toThrow() + expect(described?.causeChain?.length).toBeLessThanOrEqual(10) + }) +}) diff --git a/packages/utils/src/errors.ts b/packages/utils/src/errors.ts index 48fcee083c3..dc21d57b995 100644 --- a/packages/utils/src/errors.ts +++ b/packages/utils/src/errors.ts @@ -39,6 +39,60 @@ export function getPostgresConstraintName(error: unknown): string | undefined { return readPgErrorField(error, 'constraint_name') ?? readPgErrorField(error, 'constraint') } +export interface DescribedError { + name: string + message: string + code?: string + errno?: string + syscall?: string + /** `"Name: message"` per link in the `.cause` chain, outermost first. Present only when the chain has more than one link. */ + causeChain?: string[] +} + +/** + * Always-on diagnostic view of an error and its `.cause` chain. + * + * Reports the fields of the DEEPEST `.cause` link, because a wrapped driver + * error (e.g. Drizzle's `"Failed query: ..."` wrapping an `ECONNRESET`) carries + * the real reason there, not on the outer wrapper. Always returns a populated + * object — including for non-`Error` throws and unclassified errors like + * `AbortError`. Cycle-safe and depth-bounded. + * + * Loggers do not serialize the non-enumerable `Error.prototype.cause`, so pass + * the result as an explicit structured field rather than the raw error. + */ +export function describeError(error: unknown): DescribedError { + const chain: Error[] = [] + const seen = new Set() + let current: unknown = error + while (current instanceof Error && !seen.has(current) && chain.length < 10) { + seen.add(current) + chain.push(current) + current = current.cause + } + + if (chain.length === 0) { + const normalized = toError(error) + return { name: normalized.name, message: normalized.message } + } + + const deepest = chain[chain.length - 1] as Error & Record + const asString = (value: unknown): string | undefined => + typeof value === 'string' ? value : undefined + const code = asString(deepest.code) + const errno = asString(deepest.errno) + const syscall = asString(deepest.syscall) + + return { + name: deepest.name, + message: deepest.message, + ...(code ? { code } : {}), + ...(errno ? { errno } : {}), + ...(syscall ? { syscall } : {}), + ...(chain.length > 1 ? { causeChain: chain.map((e) => `${e.name}: ${e.message}`) } : {}), + } +} + function readPgErrorField(error: unknown, field: string): string | undefined { const seen = new Set() let current: unknown = error