From 13cf392ceb188568646ccb24542ce994274d0c22 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Sat, 23 May 2026 12:35:00 -0700 Subject: [PATCH 1/2] fix(files): never dedup external URL fetches by path filename External URL fetches in the file parse route were keyed on the path filename, so two distinct URLs whose paths ended in `image.png` (e.g. every Slack clipboard paste) collided in the workspace cache and returned stale bytes from a prior fetch. Extracts the fetch + save flow into `fetchExternalUrlToWorkspace` so the broken dedup pattern can't be reintroduced. The helper always downloads; `uploadWorkspaceFile` handles name collisions on save by suffixing (`image.png` -> `image (1).png` -> ...). --- apps/sim/app/api/files/parse/route.test.ts | 84 ++++--- apps/sim/app/api/files/parse/route.ts | 153 +++--------- .../workspace/fetch-external-url.test.ts | 220 ++++++++++++++++++ .../contexts/workspace/fetch-external-url.ts | 150 ++++++++++++ .../lib/uploads/contexts/workspace/index.ts | 1 + 5 files changed, 454 insertions(+), 154 deletions(-) create mode 100644 apps/sim/lib/uploads/contexts/workspace/fetch-external-url.test.ts create mode 100644 apps/sim/lib/uploads/contexts/workspace/fetch-external-url.ts diff --git a/apps/sim/app/api/files/parse/route.test.ts b/apps/sim/app/api/files/parse/route.test.ts index eb421bf450e..b2ab510c9f8 100644 --- a/apps/sim/app/api/files/parse/route.test.ts +++ b/apps/sim/app/api/files/parse/route.test.ts @@ -31,8 +31,6 @@ const { mockFsWriteFile, mockJoin, actualPath, - mockFileExistsInWorkspace, - mockListWorkspaceFiles, mockUploadWorkspaceFile, } = vi.hoisted(() => { // eslint-disable-next-line @typescript-eslint/no-require-imports @@ -62,9 +60,19 @@ const { return actualPath.join(...args) }), actualPath, - mockFileExistsInWorkspace: vi.fn().mockResolvedValue(false), - mockListWorkspaceFiles: vi.fn().mockResolvedValue([]), - mockUploadWorkspaceFile: vi.fn().mockResolvedValue({}), + mockUploadWorkspaceFile: vi + .fn() + .mockImplementation( + async (workspaceId: string, _userId: string, _buffer: Buffer, fileName: string) => ({ + id: 'wf_test', + name: fileName, + size: 0, + type: 'application/octet-stream', + url: `/api/files/serve/${workspaceId}/${fileName}`, + key: `${workspaceId}/${fileName}`, + context: 'workspace', + }) + ), } }) @@ -110,9 +118,7 @@ vi.mock('@/lib/uploads/contexts/execution', () => ({ uploadExecutionFile: vi.fn(), })) -vi.mock('@/lib/uploads/contexts/workspace', () => ({ - fileExistsInWorkspace: mockFileExistsInWorkspace, - listWorkspaceFiles: mockListWorkspaceFiles, +vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({ uploadWorkspaceFile: mockUploadWorkspaceFile, })) @@ -190,9 +196,7 @@ describe('File Parse API Route', () => { mockFsStat.mockResolvedValue({ isFile: () => true, size: 17 }) mockFsReadFile.mockResolvedValue(Buffer.from('test file content')) mockIsSupportedFileType.mockReturnValue(true) - mockFileExistsInWorkspace.mockResolvedValue(false) - mockListWorkspaceFiles.mockResolvedValue([]) - mockUploadWorkspaceFile.mockResolvedValue({}) + mockUploadWorkspaceFile.mockClear() mockParseFile.mockResolvedValue({ content: 'parsed content', metadata: { pageCount: 1 }, @@ -384,34 +388,32 @@ describe('File Parse API Route', () => { ) }) - it('should preserve the full download cap when an external URL reuses a workspace file', async () => { + it('should never dedup external URL fetches by path filename — two URLs sharing image.png both download', async () => { inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({ isValid: true, resolvedIP: '203.0.113.10', }) - inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( - new Response('file content', { - status: 200, - headers: { 'content-type': 'text/plain' }, - }) - ) - mockFileExistsInWorkspace.mockResolvedValueOnce(false).mockResolvedValueOnce(true) - mockListWorkspaceFiles.mockResolvedValueOnce([ - { name: 'file2.txt', key: 'workspace-file2.txt' }, - ]) - - mockParseBuffer - .mockResolvedValueOnce({ - content: 'a'.repeat(4 * 1024 * 1024), - metadata: { pageCount: 1 }, - }) - .mockResolvedValueOnce({ - content: 'second file', - metadata: { pageCount: 1 }, - }) + inputValidationMockFns.mockSecureFetchWithPinnedIP + .mockResolvedValueOnce( + new Response('first image bytes', { + status: 200, + headers: { 'content-type': 'image/png' }, + }) + ) + .mockResolvedValueOnce( + new Response('second image bytes — different content', { + status: 200, + headers: { 'content-type': 'image/png' }, + }) + ) + mockIsSupportedFileType.mockReturnValue(false) + permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue('write') const req = createMockRequest('POST', { - filePath: ['https://example.com/file1.txt', 'https://example.com/file2.txt'], + filePath: [ + 'https://files.slack.com/files-pri/T07-FAAA/download/image.png', + 'https://files.slack.com/files-pri/T07-FBBB/download/image.png', + ], workspaceId: 'workspace-id', }) @@ -420,9 +422,21 @@ describe('File Parse API Route', () => { expect(response.status).toBe(200) expect(data.results).toHaveLength(2) - expect(storageServiceMockFns.mockDownloadFile).toHaveBeenCalledWith( - expect.objectContaining({ key: 'workspace-file2.txt', maxBytes: 100 * 1024 * 1024 }) + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenCalledTimes(2) + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenNthCalledWith( + 1, + 'https://files.slack.com/files-pri/T07-FAAA/download/image.png', + '203.0.113.10', + expect.any(Object) + ) + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenNthCalledWith( + 2, + 'https://files.slack.com/files-pri/T07-FBBB/download/image.png', + '203.0.113.10', + expect.any(Object) ) + expect(mockUploadWorkspaceFile).toHaveBeenCalledTimes(2) + expect(storageServiceMockFns.mockDownloadFile).not.toHaveBeenCalled() }) it('should stop multi-file parsing once the combined parsed output is too large', async () => { diff --git a/apps/sim/app/api/files/parse/route.ts b/apps/sim/app/api/files/parse/route.ts index d015bc1cf66..ea4f493dd80 100644 --- a/apps/sim/app/api/files/parse/route.ts +++ b/apps/sim/app/api/files/parse/route.ts @@ -10,21 +10,15 @@ import { type NextRequest, NextResponse } from 'next/server' import { fileParseContract } from '@/lib/api/contracts/storage-transfer' import { getValidationErrorMessage, parseRequest } from '@/lib/api/server' import { checkInternalAuth } from '@/lib/auth/hybrid' -import { - secureFetchWithPinnedIP, - validateUrlWithDNS, -} from '@/lib/core/security/input-validation.server' import { sanitizeUrlForLog } from '@/lib/core/utils/logging' -import { - assertKnownSizeWithinLimit, - DEFAULT_MAX_ERROR_BODY_BYTES, - isPayloadSizeLimitError, - readResponseTextWithLimit, - readResponseToBufferWithLimit, -} from '@/lib/core/utils/stream-limits' +import { assertKnownSizeWithinLimit, isPayloadSizeLimitError } from '@/lib/core/utils/stream-limits' import { isSupportedFileType, parseFile } from '@/lib/file-parsers' import { isUsingCloudStorage, type StorageContext, StorageService } from '@/lib/uploads' import { uploadExecutionFile } from '@/lib/uploads/contexts/execution' +import { + ExternalUrlValidationError, + fetchExternalUrlToWorkspace, +} from '@/lib/uploads/contexts/workspace' import { UPLOAD_DIR_SERVER } from '@/lib/uploads/core/setup.server' import { getFileMetadataByKey } from '@/lib/uploads/server/metadata' import { @@ -36,7 +30,6 @@ import { inferContextFromKey, isInternalFileUrl, } from '@/lib/uploads/utils/file-utils' -import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils' import { verifyFileAccess } from '@/app/api/files/authorization' import type { UserFile } from '@/executor/types' import '@/lib/uploads/core/setup.server' @@ -453,9 +446,16 @@ function validateFilePath(filePath: string): { isValid: boolean; error?: string } /** - * Handle external URL - * If workspaceId is provided, checks if file already exists and saves to workspace if not - * If executionContext is provided, also stores the file in execution storage and returns UserFile + * Handle external URL. + * + * Always fetches the URL fresh — there is no filename-based dedup. Distinct URLs + * commonly share a path tail (e.g. every Slack clipboard paste is `image.png`), + * so keying a cache by filename returns stale bytes. `fetchExternalUrlToWorkspace` + * delegates to `uploadWorkspaceFile`, which suffix-disambiguates collisions on save. + * + * Workspace save is skipped when the URL already points at our execution-files + * bucket (re-uploading our own bytes is wasteful and would generate `image (1).png` + * style aliases for files we already own). */ async function handleExternalUrl( url: string, @@ -470,23 +470,6 @@ async function handleExternalUrl( ): Promise { try { logger.info('Fetching external URL:', url) - logger.info('WorkspaceId for URL save:', workspaceId) - - const urlValidation = await validateUrlWithDNS(url, 'fileUrl') - if (!urlValidation.isValid) { - logger.warn(`Blocked external URL request: ${urlValidation.error}`) - return { - success: false, - error: urlValidation.error || 'Invalid external URL', - filePath: url, - } - } - - const urlPath = new URL(url).pathname - const filename = urlPath.split('/').pop() || 'download' - const extension = path.extname(filename).toLowerCase().substring(1) - - logger.info(`Extracted filename: ${filename}, workspaceId: ${workspaceId}`) const { S3_EXECUTION_FILES_CONFIG, @@ -511,104 +494,27 @@ async function handleExternalUrl( isExecutionFile = false } - // Only apply workspace deduplication if: - // 1. WorkspaceId is provided - // 2. URL is NOT from execution files bucket/container - const shouldCheckWorkspace = workspaceId && !isExecutionFile - - if (shouldCheckWorkspace) { - const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId) - if (permission === null) { - logger.warn('User does not have workspace access for file parse', { - userId, - workspaceId, - filename, - }) - return { - success: false, - error: 'File not found', - filePath: url, - } - } - - const { fileExistsInWorkspace, listWorkspaceFiles } = await import( - '@/lib/uploads/contexts/workspace' - ) - const exists = await fileExistsInWorkspace(workspaceId, filename) - - if (exists) { - logger.info(`File ${filename} already exists in workspace, using existing file`) - const workspaceFiles = await listWorkspaceFiles(workspaceId) - const existingFile = workspaceFiles.find((f) => f.name === filename) - - if (existingFile) { - const storageFilePath = `/api/files/serve/${existingFile.key}` - return handleCloudFile( - storageFilePath, - fileType, - 'workspace', - userId, - executionContext, - maxDownloadBytes, - maxParsedOutputBytes - ) - } - } - } - - const response = await secureFetchWithPinnedIP(url, urlValidation.resolvedIP!, { - timeout: DOWNLOAD_TIMEOUT_MS, - maxResponseBytes: maxDownloadBytes, - signal, - ...(headers && Object.keys(headers).length > 0 && { headers }), - }) - if (!response.ok) { - await readResponseTextWithLimit(response, { - maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, - label: 'file download error response', - signal, - }).catch(() => '') - throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`) - } - - const buffer = await readResponseToBufferWithLimit(response, { - maxBytes: maxDownloadBytes, - label: 'file download', + const { filename, buffer, mimeType } = await fetchExternalUrlToWorkspace({ + url, + userId, + workspaceId: workspaceId || undefined, + saveToWorkspace: Boolean(workspaceId) && !isExecutionFile, + headers, signal, + maxDownloadBytes, + timeoutMs: DOWNLOAD_TIMEOUT_MS, }) + const extension = path.extname(filename).toLowerCase().substring(1) logger.info(`Downloaded file from URL: ${url}, size: ${buffer.length} bytes`) let userFile: UserFile | undefined - const mimeType = response.headers.get('content-type') || getMimeTypeFromExtension(extension) - if (executionContext) { try { userFile = await uploadExecutionFile(executionContext, buffer, filename, mimeType, userId) logger.info(`Stored file in execution storage: ${filename}`, { key: userFile.key }) } catch (uploadError) { - logger.warn(`Failed to store file in execution storage:`, uploadError) - // Continue without userFile - parsing can still work - } - } - - if (shouldCheckWorkspace) { - try { - const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId) - if (permission !== 'admin' && permission !== 'write') { - logger.warn('User does not have write permission for workspace file save', { - userId, - workspaceId, - filename, - permission, - }) - } else { - const { uploadWorkspaceFile } = await import('@/lib/uploads/contexts/workspace') - await uploadWorkspaceFile(workspaceId, userId, buffer, filename, mimeType) - logger.info(`Saved URL file to workspace storage: ${filename}`) - } - } catch (saveError) { - logger.warn(`Failed to save URL file to workspace:`, saveError) + logger.warn('Failed to store file in execution storage:', uploadError) } } @@ -657,6 +563,15 @@ async function handleExternalUrl( } } + if (error instanceof ExternalUrlValidationError) { + logger.warn(`Blocked external URL request: ${error.message}`) + return { + success: false, + error: error.message, + filePath: url, + } + } + return { success: false, error: `Error fetching URL: ${(error as Error).message}`, diff --git a/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.test.ts b/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.test.ts new file mode 100644 index 00000000000..425eb5dd4c8 --- /dev/null +++ b/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.test.ts @@ -0,0 +1,220 @@ +/** + * @vitest-environment node + */ +import { + inputValidationMock, + inputValidationMockFns, + permissionsMock, + permissionsMockFns, +} from '@sim/testing' +import { beforeEach, describe, expect, it, vi } from 'vitest' + +const { mockUploadWorkspaceFile } = vi.hoisted(() => ({ + mockUploadWorkspaceFile: vi.fn(), +})) + +vi.mock('@/lib/core/security/input-validation.server', () => inputValidationMock) +vi.mock('@/lib/workspaces/permissions/utils', () => permissionsMock) +vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({ + uploadWorkspaceFile: mockUploadWorkspaceFile, +})) + +import { + ExternalUrlValidationError, + fetchExternalUrlToWorkspace, +} from '@/lib/uploads/contexts/workspace/fetch-external-url' + +function makeResponse(body: string, contentType = 'application/octet-stream'): Response { + return new Response(body, { status: 200, headers: { 'content-type': contentType } }) +} + +describe('fetchExternalUrlToWorkspace', () => { + beforeEach(() => { + vi.clearAllMocks() + inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({ + isValid: true, + resolvedIP: '203.0.113.10', + }) + permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue('write') + mockUploadWorkspaceFile.mockImplementation( + async (workspaceId: string, _userId: string, _buffer: Buffer, fileName: string) => ({ + id: `wf_${fileName}`, + name: fileName, + size: 0, + type: 'application/octet-stream', + url: `/api/files/serve/${workspaceId}/${fileName}`, + key: `${workspaceId}/${fileName}`, + context: 'workspace', + }) + ) + }) + + it('downloads each URL independently — never dedups by path filename', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP + .mockResolvedValueOnce(makeResponse('first bytes', 'image/png')) + .mockResolvedValueOnce(makeResponse('different second bytes', 'image/png')) + + const first = await fetchExternalUrlToWorkspace({ + url: 'https://files.slack.com/files-pri/T07-FAAA/download/image.png', + userId: 'user-1', + workspaceId: 'workspace-1', + }) + const second = await fetchExternalUrlToWorkspace({ + url: 'https://files.slack.com/files-pri/T07-FBBB/download/image.png', + userId: 'user-1', + workspaceId: 'workspace-1', + }) + + expect(first.filename).toBe('image.png') + expect(second.filename).toBe('image.png') + expect(first.buffer.toString()).toBe('first bytes') + expect(second.buffer.toString()).toBe('different second bytes') + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenCalledTimes(2) + expect(mockUploadWorkspaceFile).toHaveBeenCalledTimes(2) + }) + + it('throws ExternalUrlValidationError when SSRF validation fails', async () => { + inputValidationMockFns.mockValidateUrlWithDNS.mockResolvedValue({ + isValid: false, + error: 'Blocked private IP', + }) + + await expect( + fetchExternalUrlToWorkspace({ + url: 'http://169.254.169.254/secret', + userId: 'user-1', + }) + ).rejects.toBeInstanceOf(ExternalUrlValidationError) + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).not.toHaveBeenCalled() + }) + + it('throws on non-2xx fetch responses', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + new Response('not found', { status: 404, statusText: 'Not Found' }) + ) + + await expect( + fetchExternalUrlToWorkspace({ + url: 'https://example.com/missing.txt', + userId: 'user-1', + }) + ).rejects.toThrow(/404/) + }) + + it('skips workspace save when saveToWorkspace is false', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + makeResponse('bytes', 'text/plain') + ) + + const result = await fetchExternalUrlToWorkspace({ + url: 'https://example.com/file.txt', + userId: 'user-1', + workspaceId: 'workspace-1', + saveToWorkspace: false, + }) + + expect(result.savedWorkspaceFile).toBeUndefined() + expect(mockUploadWorkspaceFile).not.toHaveBeenCalled() + expect(permissionsMockFns.mockGetUserEntityPermissions).not.toHaveBeenCalled() + }) + + it('skips workspace save when no workspaceId is provided', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + makeResponse('bytes', 'text/plain') + ) + + const result = await fetchExternalUrlToWorkspace({ + url: 'https://example.com/file.txt', + userId: 'user-1', + }) + + expect(result.savedWorkspaceFile).toBeUndefined() + expect(mockUploadWorkspaceFile).not.toHaveBeenCalled() + }) + + it('skips workspace save when user lacks write permission', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + makeResponse('bytes', 'text/plain') + ) + permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue('read') + + const result = await fetchExternalUrlToWorkspace({ + url: 'https://example.com/file.txt', + userId: 'user-1', + workspaceId: 'workspace-1', + }) + + expect(result.savedWorkspaceFile).toBeUndefined() + expect(mockUploadWorkspaceFile).not.toHaveBeenCalled() + }) + + it('returns the saved workspace file when permission allows save', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + makeResponse('bytes', 'text/plain') + ) + + const result = await fetchExternalUrlToWorkspace({ + url: 'https://example.com/notes.txt', + userId: 'user-1', + workspaceId: 'workspace-1', + }) + + expect(mockUploadWorkspaceFile).toHaveBeenCalledWith( + 'workspace-1', + 'user-1', + expect.any(Buffer), + 'notes.txt', + 'text/plain' + ) + expect(result.savedWorkspaceFile?.key).toBe('workspace-1/notes.txt') + }) + + it('swallows workspace save errors so parsing can still proceed', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + makeResponse('bytes', 'text/plain') + ) + mockUploadWorkspaceFile.mockRejectedValueOnce(new Error('disk full')) + + const result = await fetchExternalUrlToWorkspace({ + url: 'https://example.com/file.txt', + userId: 'user-1', + workspaceId: 'workspace-1', + }) + + expect(result.buffer.toString()).toBe('bytes') + expect(result.savedWorkspaceFile).toBeUndefined() + }) + + it('forwards custom headers to the fetch', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + makeResponse('bytes', 'text/plain') + ) + + await fetchExternalUrlToWorkspace({ + url: 'https://files.slack.com/files-pri/T07/download/report.txt', + userId: 'user-1', + headers: { Authorization: 'Bearer xoxb-test-token' }, + }) + + expect(inputValidationMockFns.mockSecureFetchWithPinnedIP).toHaveBeenCalledWith( + 'https://files.slack.com/files-pri/T07/download/report.txt', + '203.0.113.10', + expect.objectContaining({ + headers: { Authorization: 'Bearer xoxb-test-token' }, + }) + ) + }) + + it('uses content-type from response headers', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + makeResponse('pdf bytes', 'application/pdf') + ) + + const result = await fetchExternalUrlToWorkspace({ + url: 'https://example.com/report.pdf', + userId: 'user-1', + }) + + expect(result.mimeType).toBe('application/pdf') + }) +}) diff --git a/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.ts b/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.ts new file mode 100644 index 00000000000..ac06a5ea16d --- /dev/null +++ b/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.ts @@ -0,0 +1,150 @@ +import type { Buffer } from 'buffer' +import path from 'path' +import { createLogger } from '@sim/logger' +import { + secureFetchWithPinnedIP, + validateUrlWithDNS, +} from '@/lib/core/security/input-validation.server' +import { + DEFAULT_MAX_ERROR_BODY_BYTES, + readResponseTextWithLimit, + readResponseToBufferWithLimit, +} from '@/lib/core/utils/stream-limits' +import { uploadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' +import { getMimeTypeFromExtension } from '@/lib/uploads/utils/file-utils' +import { getUserEntityPermissions } from '@/lib/workspaces/permissions/utils' +import type { UserFile } from '@/executor/types' + +const logger = createLogger('FetchExternalUrl') + +const DEFAULT_TIMEOUT_MS = 30_000 +const DEFAULT_MAX_DOWNLOAD_BYTES = 100 * 1024 * 1024 + +/** + * Thrown when the URL fails SSRF/DNS validation. Callers should map this to a + * user-facing 4xx-style response rather than a generic fetch failure. + */ +export class ExternalUrlValidationError extends Error { + constructor(message: string) { + super(message) + this.name = 'ExternalUrlValidationError' + } +} + +export interface FetchExternalUrlOptions { + url: string + userId: string + /** When provided alongside `saveToWorkspace: true`, the downloaded bytes are persisted as a workspace file. */ + workspaceId?: string + /** Defaults to true when a `workspaceId` is provided. Set false when the URL already points at our own storage. */ + saveToWorkspace?: boolean + headers?: Record + signal?: AbortSignal + maxDownloadBytes?: number + timeoutMs?: number +} + +export interface FetchExternalUrlResult { + /** + * Filename derived from the URL path. NOT a content identity — distinct URLs + * frequently share the same path tail (e.g. every Slack clipboard paste is + * `image.png`). Never use this as a cache key. + */ + filename: string + buffer: Buffer + /** Content-Type from the response, or inferred from the filename extension. */ + mimeType: string + /** + * Saved workspace file record. Undefined when the workspace save was skipped + * (no workspaceId, `saveToWorkspace: false`, missing write permission, or a + * save error — the last is logged, not thrown, so the parse path stays alive). + */ + savedWorkspaceFile?: UserFile +} + +/** + * Fetch an external URL into memory and (optionally) save it as a fresh workspace file. + * + * URL fetches are NEVER deduplicated by filename. Two URLs whose paths end in + * `image.png` are two different fetches that produce two different workspace + * files; `uploadWorkspaceFile` allocates a unique on-disk name (`image.png`, + * `image (1).png`, ...) on the save side. Keying a cache by path tail would + * silently return stale bytes — that was the original bug this helper exists + * to make unrepresentable. + */ +export async function fetchExternalUrlToWorkspace( + options: FetchExternalUrlOptions +): Promise { + const { + url, + userId, + workspaceId, + saveToWorkspace = Boolean(workspaceId), + headers, + signal, + maxDownloadBytes = DEFAULT_MAX_DOWNLOAD_BYTES, + timeoutMs = DEFAULT_TIMEOUT_MS, + } = options + + const urlValidation = await validateUrlWithDNS(url, 'fileUrl') + if (!urlValidation.isValid || !urlValidation.resolvedIP) { + throw new ExternalUrlValidationError(urlValidation.error || 'Invalid external URL') + } + + const filename = new URL(url).pathname.split('/').pop() || 'download' + const extension = path.extname(filename).toLowerCase().substring(1) + + const response = await secureFetchWithPinnedIP(url, urlValidation.resolvedIP, { + timeout: timeoutMs, + maxResponseBytes: maxDownloadBytes, + signal, + ...(headers && Object.keys(headers).length > 0 && { headers }), + }) + + if (!response.ok) { + await readResponseTextWithLimit(response, { + maxBytes: DEFAULT_MAX_ERROR_BODY_BYTES, + label: 'external url error body', + signal, + }).catch(() => '') + throw new Error(`Failed to fetch URL: ${response.status} ${response.statusText}`) + } + + const buffer = await readResponseToBufferWithLimit(response, { + maxBytes: maxDownloadBytes, + label: 'external url download', + signal, + }) + + const mimeType = response.headers.get('content-type') || getMimeTypeFromExtension(extension) + + let savedWorkspaceFile: UserFile | undefined + if (workspaceId && saveToWorkspace) { + const permission = await getUserEntityPermissions(userId, 'workspace', workspaceId) + if (permission === 'admin' || permission === 'write') { + try { + savedWorkspaceFile = await uploadWorkspaceFile( + workspaceId, + userId, + buffer, + filename, + mimeType + ) + } catch (saveError) { + logger.warn('Failed to save fetched URL to workspace storage', { + workspaceId, + filename, + saveError, + }) + } + } else { + logger.warn('Skipping workspace save: user lacks write permission', { + userId, + workspaceId, + permission, + }) + } + } + + return { filename, buffer, mimeType, savedWorkspaceFile } +} diff --git a/apps/sim/lib/uploads/contexts/workspace/index.ts b/apps/sim/lib/uploads/contexts/workspace/index.ts index b7dbcdee264..d5c542b512a 100644 --- a/apps/sim/lib/uploads/contexts/workspace/index.ts +++ b/apps/sim/lib/uploads/contexts/workspace/index.ts @@ -1,2 +1,3 @@ +export * from './fetch-external-url' export * from './workspace-file-folder-manager' export * from './workspace-file-manager' From ab766f55ec6d9c99943708286cff592875c5fb17 Mon Sep 17 00:00:00 2001 From: Waleed Latif Date: Sat, 23 May 2026 12:42:01 -0700 Subject: [PATCH 2/2] fix(files): distinguish non-member from read-only in workspace-save skip log Splits the workspace-save skip branch so non-members (permission === null) log 'user is not a workspace member' instead of the misleading 'lacks write permission'. Adds a test covering the null case so the relaxed behavior (vs. the prior route's hard 'File not found') is explicit. --- .../workspace/fetch-external-url.test.ts | 17 +++++++++++++++++ .../contexts/workspace/fetch-external-url.ts | 5 +++++ 2 files changed, 22 insertions(+) diff --git a/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.test.ts b/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.test.ts index 425eb5dd4c8..d8c2476da9a 100644 --- a/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.test.ts +++ b/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.test.ts @@ -148,6 +148,23 @@ describe('fetchExternalUrlToWorkspace', () => { expect(mockUploadWorkspaceFile).not.toHaveBeenCalled() }) + it('returns parsed bytes but skips save when user is not a workspace member', async () => { + inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( + makeResponse('bytes', 'text/plain') + ) + permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue(null) + + const result = await fetchExternalUrlToWorkspace({ + url: 'https://example.com/file.txt', + userId: 'user-1', + workspaceId: 'workspace-1', + }) + + expect(result.buffer.toString()).toBe('bytes') + expect(result.savedWorkspaceFile).toBeUndefined() + expect(mockUploadWorkspaceFile).not.toHaveBeenCalled() + }) + it('returns the saved workspace file when permission allows save', async () => { inputValidationMockFns.mockSecureFetchWithPinnedIP.mockResolvedValue( makeResponse('bytes', 'text/plain') diff --git a/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.ts b/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.ts index ac06a5ea16d..fa942319876 100644 --- a/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.ts +++ b/apps/sim/lib/uploads/contexts/workspace/fetch-external-url.ts @@ -137,6 +137,11 @@ export async function fetchExternalUrlToWorkspace( saveError, }) } + } else if (permission === null) { + logger.warn('Skipping workspace save: user is not a workspace member', { + userId, + workspaceId, + }) } else { logger.warn('Skipping workspace save: user lacks write permission', { userId,