diff --git a/apps/docs/content/docs/en/tools/firecrawl.mdx b/apps/docs/content/docs/en/tools/firecrawl.mdx
index ee346b8e464..6c582bfc76d 100644
--- a/apps/docs/content/docs/en/tools/firecrawl.mdx
+++ b/apps/docs/content/docs/en/tools/firecrawl.mdx
@@ -234,4 +234,48 @@ Autonomous web data extraction agent. Searches and gathers information based on
| `expiresAt` | string | Timestamp when the results expire \(24 hours\) |
| `sources` | object | Array of source URLs used by the agent |
+### `firecrawl_parse`
+
+Parse uploaded documents (PDF, DOCX, HTML, etc.) into clean markdown using Firecrawl. Supports .html, .htm, .pdf, .docx, .doc, .odt, .rtf, .xlsx, .xls.
+
+#### Input
+
+| Parameter | Type | Required | Description |
+| --------- | ---- | -------- | ----------- |
+| `file` | file | Yes | Document file to be parsed |
+| `formats` | array | No | Output formats to return \(e.g., \["markdown"\]\). Defaults to markdown. |
+| `onlyMainContent` | boolean | No | Exclude headers, navs, footers. Defaults to true. |
+| `includeTags` | array | No | HTML tags to include |
+| `excludeTags` | array | No | HTML tags to exclude |
+| `timeout` | number | No | Timeout in milliseconds \(max 300000\). Defaults to 30000. |
+| `parsers` | array | No | Parser configuration \(e.g., \[\{ "type": "pdf" \}\]\) |
+| `removeBase64Images` | boolean | No | Remove base64 images, keep alt text. Defaults to true. |
+| `blockAds` | boolean | No | Block ads and popups. Defaults to true. |
+| `proxy` | string | No | Proxy mode: "basic" or "auto" |
+| `zeroDataRetention` | boolean | No | Enable zero data retention. Defaults to false. |
+| `apiKey` | string | Yes | Firecrawl API key |
+| `rateLimit` | string | No | No description |
+
+#### Output
+
+| Parameter | Type | Description |
+| --------- | ---- | ----------- |
+| `markdown` | string | Parsed document content in markdown format |
+| `summary` | string | Generated summary of the document |
+| `html` | string | Processed HTML content |
+| `rawHtml` | string | Unprocessed raw HTML content |
+| `screenshot` | string | Screenshot URL or base64 \(when requested\) |
+| `links` | array | URLs discovered in the document |
+| `metadata` | object | Document metadata |
+| ↳ `title` | string | Document title |
+| ↳ `description` | string | Document description |
+| ↳ `language` | string | Document language code |
+| ↳ `sourceURL` | string | Source URL |
+| ↳ `url` | string | Final URL |
+| ↳ `keywords` | string | Document keywords |
+| ↳ `statusCode` | number | HTTP status code |
+| ↳ `contentType` | string | Document content type |
+| ↳ `error` | string | Error message if parse failed |
+| `warning` | string | Warning message from the parse operation |
+
diff --git a/apps/docs/content/docs/en/tools/notion.mdx b/apps/docs/content/docs/en/tools/notion.mdx
index 9a7ac971572..392a52d685b 100644
--- a/apps/docs/content/docs/en/tools/notion.mdx
+++ b/apps/docs/content/docs/en/tools/notion.mdx
@@ -256,8 +256,6 @@ Create a new database in Notion with custom properties
### `notion_add_database_row`
-Add a new row to a Notion database with specified properties
-
#### Input
| Parameter | Type | Required | Description |
diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json
index b11c74c5f47..4239620a845 100644
--- a/apps/sim/app/(landing)/integrations/data/integrations.json
+++ b/apps/sim/app/(landing)/integrations/data/integrations.json
@@ -4020,9 +4020,13 @@
{
"name": "Agent",
"description": "Autonomous web data extraction agent. Searches and gathers information based on natural language prompts without requiring specific URLs."
+ },
+ {
+ "name": "Parse Document",
+ "description": "Parse uploaded documents (PDF, DOCX, HTML, etc.) into clean markdown using Firecrawl. Supports .html, .htm, .pdf, .docx, .doc, .odt, .rtf, .xlsx, .xls."
}
],
- "operationCount": 6,
+ "operationCount": 7,
"triggers": [],
"triggerCount": 0,
"authType": "api-key",
diff --git a/apps/sim/app/api/tools/firecrawl/parse/route.ts b/apps/sim/app/api/tools/firecrawl/parse/route.ts
new file mode 100644
index 00000000000..36bf99204f5
--- /dev/null
+++ b/apps/sim/app/api/tools/firecrawl/parse/route.ts
@@ -0,0 +1,104 @@
+import { createLogger } from '@sim/logger'
+import { toError } from '@sim/utils/errors'
+import { type NextRequest, NextResponse } from 'next/server'
+import { z } from 'zod'
+import { checkInternalAuth } from '@/lib/auth/hybrid'
+import { generateRequestId } from '@/lib/core/utils/request'
+import { withRouteHandler } from '@/lib/core/utils/with-route-handler'
+import { RawFileInputSchema } from '@/lib/uploads/utils/file-schemas'
+import { processFilesToUserFiles } from '@/lib/uploads/utils/file-utils'
+import { downloadFileFromStorage } from '@/lib/uploads/utils/file-utils.server'
+
+export const dynamic = 'force-dynamic'
+
+const logger = createLogger('FirecrawlParseAPI')
+
+const FirecrawlParseSchema = z.object({
+ apiKey: z.string().min(1, 'API key is required'),
+ file: RawFileInputSchema,
+ options: z.record(z.unknown()).optional(),
+})
+
+export const POST = withRouteHandler(async (request: NextRequest) => {
+ const requestId = generateRequestId()
+
+ try {
+ const authResult = await checkInternalAuth(request, { requireWorkflowId: false })
+
+ if (!authResult.success || !authResult.userId) {
+ logger.warn(`[${requestId}] Unauthorized Firecrawl parse attempt`, {
+ error: authResult.error || 'Missing userId',
+ })
+ return NextResponse.json(
+ { success: false, error: authResult.error || 'Unauthorized' },
+ { status: 401 }
+ )
+ }
+
+ const body = await request.json()
+ const validatedData = FirecrawlParseSchema.parse(body)
+
+ const [userFile] = processFilesToUserFiles([validatedData.file], requestId, logger)
+ if (!userFile) {
+ return NextResponse.json({ success: false, error: 'File input is required' }, { status: 400 })
+ }
+
+ logger.info(`[${requestId}] Firecrawl parse request`, {
+ fileName: userFile.name,
+ size: userFile.size,
+ })
+
+ const buffer = await downloadFileFromStorage(userFile, requestId, logger)
+
+ const formData = new FormData()
+ const blob = new Blob([new Uint8Array(buffer)], {
+ type: userFile.type || 'application/octet-stream',
+ })
+ formData.append('file', blob, userFile.name)
+
+ if (validatedData.options && Object.keys(validatedData.options).length > 0) {
+ formData.append('options', JSON.stringify(validatedData.options))
+ }
+
+ const firecrawlResponse = await fetch('https://api.firecrawl.dev/v2/parse', {
+ method: 'POST',
+ headers: {
+ Authorization: `Bearer ${validatedData.apiKey}`,
+ },
+ body: formData,
+ })
+
+ if (!firecrawlResponse.ok) {
+ const errorText = await firecrawlResponse.text()
+ logger.error(`[${requestId}] Firecrawl API error:`, errorText)
+ return NextResponse.json(
+ {
+ success: false,
+ error: `Firecrawl API error: ${errorText || firecrawlResponse.statusText}`,
+ },
+ { status: firecrawlResponse.status }
+ )
+ }
+
+ const firecrawlData = await firecrawlResponse.json()
+
+ logger.info(`[${requestId}] Firecrawl parse successful`)
+
+ return NextResponse.json({
+ success: true,
+ output: firecrawlData.data ?? firecrawlData,
+ })
+ } catch (error) {
+ if (error instanceof z.ZodError) {
+ logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors })
+ return NextResponse.json(
+ { success: false, error: 'Invalid request data', details: error.errors },
+ { status: 400 }
+ )
+ }
+
+ logger.error(`[${requestId}] Error in Firecrawl parse:`, error)
+
+ return NextResponse.json({ success: false, error: toError(error).message }, { status: 500 })
+ }
+})
diff --git a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/short-input/short-input.tsx b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/short-input/short-input.tsx
index 6de90a072e2..ba5ff8461b1 100644
--- a/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/short-input/short-input.tsx
+++ b/apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/short-input/short-input.tsx
@@ -347,7 +347,7 @@ export const ShortInput = memo(function ShortInput({
<>
}
- className='allow-scroll w-full overflow-auto text-transparent caret-foreground [-ms-overflow-style:none] [scrollbar-width:none] selection:text-transparent placeholder:text-muted-foreground/50 [&::-webkit-scrollbar]:hidden'
+ className='allow-scroll w-full overflow-auto text-transparent caret-foreground [-ms-overflow-style:none] [scrollbar-width:none] placeholder:text-muted-foreground/50 [&::-webkit-scrollbar]:hidden'
readOnly={readOnly}
placeholder={placeholder ?? ''}
type='text'
diff --git a/apps/sim/blocks/blocks/firecrawl.ts b/apps/sim/blocks/blocks/firecrawl.ts
index f50f1731985..13c9710cf55 100644
--- a/apps/sim/blocks/blocks/firecrawl.ts
+++ b/apps/sim/blocks/blocks/firecrawl.ts
@@ -1,6 +1,7 @@
import { FirecrawlIcon } from '@/components/icons'
-import type { BlockConfig } from '@/blocks/types'
+import type { BlockConfig, SubBlockType } from '@/blocks/types'
import { AuthMode, IntegrationType } from '@/blocks/types'
+import { normalizeFileInput } from '@/blocks/utils'
import type { FirecrawlResponse } from '@/tools/firecrawl/types'
export const FirecrawlBlock: BlockConfig = {
@@ -28,9 +29,39 @@ export const FirecrawlBlock: BlockConfig = {
{ label: 'Map', id: 'map' },
{ label: 'Extract', id: 'extract' },
{ label: 'Agent', id: 'agent' },
+ { label: 'Parse Document', id: 'parse' },
],
value: () => 'scrape',
},
+ {
+ id: 'fileUpload',
+ title: 'Document',
+ type: 'file-upload' as SubBlockType,
+ canonicalParamId: 'document',
+ acceptedTypes:
+ 'application/pdf,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/msword,application/vnd.oasis.opendocument.text,application/rtf,text/rtf,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/vnd.ms-excel,text/html',
+ placeholder: 'Upload a document (PDF, DOCX, HTML, XLSX, etc.)',
+ mode: 'basic',
+ maxSize: 50,
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ required: true,
+ },
+ {
+ id: 'fileReference',
+ title: 'File Reference',
+ type: 'short-input' as SubBlockType,
+ canonicalParamId: 'document',
+ placeholder: 'File reference from previous block',
+ mode: 'advanced',
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ required: true,
+ },
{
id: 'url',
title: 'Website URL',
@@ -180,7 +211,7 @@ Example 2 - Product Data:
type: 'switch',
condition: {
field: 'operation',
- value: 'scrape',
+ value: ['scrape', 'parse'],
},
},
{
@@ -190,7 +221,7 @@ Example 2 - Product Data:
placeholder: '["markdown", "html"]',
condition: {
field: 'operation',
- value: 'scrape',
+ value: ['scrape', 'parse'],
},
},
{
@@ -219,7 +250,7 @@ Example 2 - Product Data:
placeholder: '60000',
condition: {
field: 'operation',
- value: ['scrape', 'search'],
+ value: ['scrape', 'search', 'parse'],
},
},
{
@@ -232,6 +263,83 @@ Example 2 - Product Data:
value: ['crawl', 'map', 'search'],
},
},
+ {
+ id: 'includeTags',
+ title: 'Include Tags',
+ type: 'long-input',
+ placeholder: '["article", "main"]',
+ mode: 'advanced',
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ },
+ {
+ id: 'excludeTags',
+ title: 'Exclude Tags',
+ type: 'long-input',
+ placeholder: '["nav", "footer"]',
+ mode: 'advanced',
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ },
+ {
+ id: 'parsers',
+ title: 'Parsers',
+ type: 'long-input',
+ placeholder: '[{"type": "pdf", "mode": "auto"}]',
+ mode: 'advanced',
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ },
+ {
+ id: 'removeBase64Images',
+ title: 'Remove Base64 Images',
+ type: 'switch',
+ mode: 'advanced',
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ },
+ {
+ id: 'blockAds',
+ title: 'Block Ads',
+ type: 'switch',
+ mode: 'advanced',
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ },
+ {
+ id: 'proxy',
+ title: 'Proxy Mode',
+ type: 'dropdown',
+ options: [
+ { id: 'basic', label: 'Basic' },
+ { id: 'auto', label: 'Auto' },
+ ],
+ mode: 'advanced',
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ },
+ {
+ id: 'zeroDataRetention',
+ title: 'Zero Data Retention',
+ type: 'switch',
+ mode: 'advanced',
+ condition: {
+ field: 'operation',
+ value: 'parse',
+ },
+ },
{
id: 'query',
title: 'Search Query',
@@ -278,6 +386,7 @@ Example 2 - Product Data:
'firecrawl_map',
'firecrawl_extract',
'firecrawl_agent',
+ 'firecrawl_parse',
],
config: {
tool: (params) => {
@@ -294,6 +403,8 @@ Example 2 - Product Data:
return 'firecrawl_extract'
case 'agent':
return 'firecrawl_agent'
+ case 'parse':
+ return 'firecrawl_parse'
default:
return 'firecrawl_scrape'
}
@@ -375,6 +486,68 @@ Example 2 - Product Data:
if (prompt) result.prompt = prompt
break
+ case 'parse': {
+ const file = normalizeFileInput(params.document, { single: true })
+ if (!file) {
+ throw new Error('A document file is required for the parse operation')
+ }
+ result.file = file
+ if (formats) {
+ if (Array.isArray(formats)) {
+ result.formats = formats
+ } else if (typeof formats === 'string') {
+ try {
+ const parsed = JSON.parse(formats)
+ result.formats = Array.isArray(parsed) ? parsed : ['markdown']
+ } catch {
+ result.formats = ['markdown']
+ }
+ }
+ }
+ if (onlyMainContent != null) result.onlyMainContent = onlyMainContent
+ if (timeout) result.timeout = Number.parseInt(timeout)
+
+ const parseStringArray = (value: unknown): string[] | undefined => {
+ if (Array.isArray(value)) return value as string[]
+ if (typeof value === 'string' && value.trim() !== '') {
+ try {
+ const parsed = JSON.parse(value)
+ return Array.isArray(parsed) ? parsed : undefined
+ } catch {
+ return undefined
+ }
+ }
+ return undefined
+ }
+
+ const includeTagsParsed = parseStringArray(params.includeTags)
+ if (includeTagsParsed) result.includeTags = includeTagsParsed
+
+ const excludeTagsParsed = parseStringArray(params.excludeTags)
+ if (excludeTagsParsed) result.excludeTags = excludeTagsParsed
+
+ if (params.parsers) {
+ if (Array.isArray(params.parsers)) {
+ result.parsers = params.parsers
+ } else if (typeof params.parsers === 'string' && params.parsers.trim() !== '') {
+ try {
+ const parsed = JSON.parse(params.parsers)
+ if (Array.isArray(parsed)) result.parsers = parsed
+ } catch {
+ // Skip invalid parsers config
+ }
+ }
+ }
+
+ if (params.removeBase64Images != null)
+ result.removeBase64Images = params.removeBase64Images
+ if (params.blockAds != null) result.blockAds = params.blockAds
+ if (params.proxy) result.proxy = params.proxy
+ if (params.zeroDataRetention != null)
+ result.zeroDataRetention = params.zeroDataRetention
+ break
+ }
+
case 'agent':
if (agentPrompt) result.prompt = agentPrompt
if (agentUrls) {
@@ -451,6 +624,14 @@ Example 2 - Product Data:
},
maxCredits: { type: 'number', description: 'Maximum credits to spend' },
strictConstrainToURLs: { type: 'boolean', description: 'Limit agent to provided URLs only' },
+ document: { type: 'json', description: 'Document input (file upload or file reference)' },
+ includeTags: { type: 'json', description: 'HTML tags to include during parsing' },
+ excludeTags: { type: 'json', description: 'HTML tags to exclude during parsing' },
+ parsers: { type: 'json', description: 'Parser configuration (e.g., [{"type": "pdf"}])' },
+ removeBase64Images: { type: 'boolean', description: 'Remove base64 images, keep alt text' },
+ blockAds: { type: 'boolean', description: 'Block ads and popups during parsing' },
+ proxy: { type: 'string', description: 'Proxy mode (basic or auto)' },
+ zeroDataRetention: { type: 'boolean', description: 'Enable zero data retention' },
},
outputs: {
// Scrape output
@@ -471,5 +652,9 @@ Example 2 - Product Data:
// Agent output
status: { type: 'string', description: 'Agent job status' },
expiresAt: { type: 'string', description: 'Result expiration timestamp' },
+ // Parse output
+ summary: { type: 'string', description: 'Generated summary of the parsed document' },
+ rawHtml: { type: 'string', description: 'Unprocessed raw HTML from the parsed document' },
+ screenshot: { type: 'string', description: 'Screenshot URL or base64 (when requested)' },
},
}
diff --git a/apps/sim/tools/firecrawl/index.ts b/apps/sim/tools/firecrawl/index.ts
index 94060bdf491..9d868ba7d37 100644
--- a/apps/sim/tools/firecrawl/index.ts
+++ b/apps/sim/tools/firecrawl/index.ts
@@ -2,6 +2,7 @@ import { agentTool } from '@/tools/firecrawl/agent'
import { crawlTool } from '@/tools/firecrawl/crawl'
import { extractTool } from '@/tools/firecrawl/extract'
import { mapTool } from '@/tools/firecrawl/map'
+import { parseTool } from '@/tools/firecrawl/parse'
import { scrapeTool } from '@/tools/firecrawl/scrape'
import { searchTool } from '@/tools/firecrawl/search'
@@ -11,3 +12,4 @@ export const firecrawlCrawlTool = crawlTool
export const firecrawlMapTool = mapTool
export const firecrawlExtractTool = extractTool
export const firecrawlAgentTool = agentTool
+export const firecrawlParseTool = parseTool
diff --git a/apps/sim/tools/firecrawl/parse.ts b/apps/sim/tools/firecrawl/parse.ts
new file mode 100644
index 00000000000..756c53b36a3
--- /dev/null
+++ b/apps/sim/tools/firecrawl/parse.ts
@@ -0,0 +1,225 @@
+import type { ParseParams, ParseResponse } from '@/tools/firecrawl/types'
+import type { ToolConfig } from '@/tools/types'
+
+export const parseTool: ToolConfig = {
+ id: 'firecrawl_parse',
+ name: 'Firecrawl Document Parser',
+ description:
+ 'Parse uploaded documents (PDF, DOCX, HTML, etc.) into clean markdown using Firecrawl. Supports .html, .htm, .pdf, .docx, .doc, .odt, .rtf, .xlsx, .xls.',
+ version: '1.0.0',
+
+ params: {
+ file: {
+ type: 'file',
+ required: true,
+ visibility: 'user-only',
+ description: 'Document file to be parsed',
+ },
+ formats: {
+ type: 'array',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'Output formats to return (e.g., ["markdown"]). Defaults to markdown.',
+ },
+ onlyMainContent: {
+ type: 'boolean',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'Exclude headers, navs, footers. Defaults to true.',
+ },
+ includeTags: {
+ type: 'array',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'HTML tags to include',
+ },
+ excludeTags: {
+ type: 'array',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'HTML tags to exclude',
+ },
+ timeout: {
+ type: 'number',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'Timeout in milliseconds (max 300000). Defaults to 30000.',
+ },
+ parsers: {
+ type: 'array',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'Parser configuration (e.g., [{ "type": "pdf" }])',
+ },
+ removeBase64Images: {
+ type: 'boolean',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'Remove base64 images, keep alt text. Defaults to true.',
+ },
+ blockAds: {
+ type: 'boolean',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'Block ads and popups. Defaults to true.',
+ },
+ proxy: {
+ type: 'string',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'Proxy mode: "basic" or "auto"',
+ },
+ zeroDataRetention: {
+ type: 'boolean',
+ required: false,
+ visibility: 'user-or-llm',
+ description: 'Enable zero data retention. Defaults to false.',
+ },
+ apiKey: {
+ type: 'string',
+ required: true,
+ visibility: 'user-only',
+ description: 'Firecrawl API key',
+ },
+ },
+
+ hosting: {
+ envKeyPrefix: 'FIRECRAWL_API_KEY',
+ apiKeyParam: 'apiKey',
+ byokProviderId: 'firecrawl',
+ pricing: {
+ type: 'custom',
+ getCost: (_params, output) => {
+ const creditsUsed = (output.metadata as { creditsUsed?: number })?.creditsUsed
+ if (creditsUsed == null) {
+ throw new Error('Firecrawl response missing creditsUsed field')
+ }
+
+ if (Number.isNaN(creditsUsed)) {
+ throw new Error('Firecrawl response returned a non-numeric creditsUsed field')
+ }
+
+ return {
+ cost: creditsUsed * 0.001,
+ metadata: { creditsUsed },
+ }
+ },
+ },
+ rateLimit: {
+ mode: 'per_request',
+ requestsPerMinute: 100,
+ },
+ },
+
+ request: {
+ method: 'POST',
+ url: '/api/tools/firecrawl/parse',
+ headers: () => ({
+ 'Content-Type': 'application/json',
+ Accept: 'application/json',
+ }),
+ body: (params) => {
+ if (!params.apiKey || typeof params.apiKey !== 'string' || params.apiKey.trim() === '') {
+ throw new Error('Missing or invalid API key: A valid Firecrawl API key is required')
+ }
+ if (!params.file || typeof params.file !== 'object') {
+ throw new Error('File input is required')
+ }
+
+ const options: Record = {}
+ if (params.formats) options.formats = params.formats
+ if (typeof params.onlyMainContent === 'boolean')
+ options.onlyMainContent = params.onlyMainContent
+ if (params.includeTags) options.includeTags = params.includeTags
+ if (params.excludeTags) options.excludeTags = params.excludeTags
+ if (params.timeout != null) options.timeout = Number(params.timeout)
+ if (params.parsers) options.parsers = params.parsers
+ if (typeof params.removeBase64Images === 'boolean')
+ options.removeBase64Images = params.removeBase64Images
+ if (typeof params.blockAds === 'boolean') options.blockAds = params.blockAds
+ if (params.proxy) options.proxy = params.proxy
+ if (typeof params.zeroDataRetention === 'boolean')
+ options.zeroDataRetention = params.zeroDataRetention
+
+ return {
+ apiKey: params.apiKey,
+ file: params.file,
+ options,
+ }
+ },
+ },
+
+ transformResponse: async (response: Response) => {
+ const data = await response.json()
+ if (!data || typeof data !== 'object') {
+ throw new Error('Invalid response format from Firecrawl parse API')
+ }
+
+ const result = data.output ?? data.data ?? data
+
+ return {
+ success: true,
+ output: {
+ markdown: result.markdown ?? '',
+ summary: result.summary ?? null,
+ html: result.html ?? null,
+ rawHtml: result.rawHtml ?? null,
+ screenshot: result.screenshot ?? null,
+ links: result.links ?? [],
+ metadata: result.metadata ?? null,
+ warning: result.warning ?? null,
+ },
+ }
+ },
+
+ outputs: {
+ markdown: { type: 'string', description: 'Parsed document content in markdown format' },
+ summary: {
+ type: 'string',
+ description: 'Generated summary of the document',
+ optional: true,
+ },
+ html: {
+ type: 'string',
+ description: 'Processed HTML content',
+ optional: true,
+ },
+ rawHtml: {
+ type: 'string',
+ description: 'Unprocessed raw HTML content',
+ optional: true,
+ },
+ screenshot: {
+ type: 'string',
+ description: 'Screenshot URL or base64 (when requested)',
+ optional: true,
+ },
+ links: {
+ type: 'array',
+ description: 'URLs discovered in the document',
+ optional: true,
+ items: { type: 'string', description: 'Discovered URL' },
+ },
+ metadata: {
+ type: 'object',
+ description: 'Document metadata',
+ optional: true,
+ properties: {
+ title: { type: 'string', description: 'Document title', optional: true },
+ description: { type: 'string', description: 'Document description', optional: true },
+ language: { type: 'string', description: 'Document language code', optional: true },
+ sourceURL: { type: 'string', description: 'Source URL', optional: true },
+ url: { type: 'string', description: 'Final URL', optional: true },
+ keywords: { type: 'string', description: 'Document keywords', optional: true },
+ statusCode: { type: 'number', description: 'HTTP status code', optional: true },
+ contentType: { type: 'string', description: 'Document content type', optional: true },
+ error: { type: 'string', description: 'Error message if parse failed', optional: true },
+ },
+ },
+ warning: {
+ type: 'string',
+ description: 'Warning message from the parse operation',
+ optional: true,
+ },
+ },
+}
diff --git a/apps/sim/tools/firecrawl/types.ts b/apps/sim/tools/firecrawl/types.ts
index ff2a44fe2b6..28a64eef0db 100644
--- a/apps/sim/tools/firecrawl/types.ts
+++ b/apps/sim/tools/firecrawl/types.ts
@@ -521,6 +521,44 @@ export interface AgentResponse extends ToolResponse {
}
}
+export interface ParseParams {
+ apiKey: string
+ file: unknown
+ formats?: Array<{ type: string } | string>
+ onlyMainContent?: boolean
+ includeTags?: string[]
+ excludeTags?: string[]
+ timeout?: number
+ parsers?: Array<{ type: string; mode?: string } | string>
+ removeBase64Images?: boolean
+ blockAds?: boolean
+ proxy?: 'basic' | 'auto'
+ zeroDataRetention?: boolean
+}
+
+export interface ParseResponse extends ToolResponse {
+ output: {
+ markdown: string
+ summary?: string | null
+ html?: string | null
+ rawHtml?: string | null
+ screenshot?: string | null
+ links?: string[]
+ metadata?: {
+ title?: string | string[]
+ description?: string | string[]
+ language?: string | string[] | null
+ sourceURL?: string
+ url?: string
+ keywords?: string | string[]
+ statusCode?: number
+ contentType?: string
+ error?: string | null
+ } | null
+ warning?: string | null
+ }
+}
+
export type FirecrawlResponse =
| ScrapeResponse
| SearchResponse
@@ -528,3 +566,4 @@ export type FirecrawlResponse =
| MapResponse
| ExtractResponse
| AgentResponse
+ | ParseResponse
diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts
index dac92d1d879..8ee2f4f45f7 100644
--- a/apps/sim/tools/registry.ts
+++ b/apps/sim/tools/registry.ts
@@ -637,6 +637,7 @@ import {
firecrawlCrawlTool,
firecrawlExtractTool,
firecrawlMapTool,
+ firecrawlParseTool,
firecrawlScrapeTool,
firecrawlSearchTool,
} from '@/tools/firecrawl'
@@ -3114,6 +3115,7 @@ export const tools: Record = {
firecrawl_map: firecrawlMapTool,
firecrawl_extract: firecrawlExtractTool,
firecrawl_agent: firecrawlAgentTool,
+ firecrawl_parse: firecrawlParseTool,
fireflies_list_transcripts: firefliesListTranscriptsTool,
fireflies_get_transcript: firefliesGetTranscriptTool,
fireflies_get_user: firefliesGetUserTool,