From b360bf572fd203b8b0d0b33af7e31258e7c28966 Mon Sep 17 00:00:00 2001 From: FinleyGe Date: Fri, 14 Nov 2025 11:27:47 +0800 Subject: [PATCH] feat: tavity new tool and new features --- modules/tool/packages/tavily/README.md | 398 ++++++ .../packages/tavily/children/crawl/DESIGN.md | 1245 +++++++++++++++++ .../packages/tavily/children/crawl/config.ts | 177 +++ .../packages/tavily/children/crawl/index.ts | 10 + .../tavily/children/crawl/src/index.ts | 115 ++ .../tavily/children/crawl/test/index.test.ts | 564 ++++++++ .../children/crawl/test/performance.test.ts | 244 ++++ .../tavily/children/extract/DESIGN.md | 387 +++++ .../tavily/children/extract/config.ts | 88 ++ .../tavily/children/extract/src/index.ts | 21 +- .../children/extract/test/index.test.ts | 80 +- .../children/extract/test/simple.test.ts | 5 +- .../packages/tavily/children/map/DESIGN.md | 0 .../packages/tavily/children/map/config.ts | 151 ++ .../packages/tavily/children/map/index.ts | 10 + .../packages/tavily/children/map/src/index.ts | 91 ++ .../tavily/children/map/test/index.test.ts | 491 +++++++ .../packages/tavily/children/search/DESIGN.md | 693 +++++++++ .../packages/tavily/children/search/config.ts | 140 ++ .../tavily/children/search/src/index.ts | 30 +- .../tavily/children/search/test/index.test.ts | 120 +- modules/tool/packages/tavily/config.ts | 5 +- modules/tool/packages/tavily/types.ts | 88 +- 23 files changed, 5114 insertions(+), 39 deletions(-) create mode 100644 modules/tool/packages/tavily/README.md create mode 100644 modules/tool/packages/tavily/children/crawl/DESIGN.md create mode 100644 modules/tool/packages/tavily/children/crawl/config.ts create mode 100644 modules/tool/packages/tavily/children/crawl/index.ts create mode 100644 modules/tool/packages/tavily/children/crawl/src/index.ts create mode 100644 modules/tool/packages/tavily/children/crawl/test/index.test.ts create mode 100644 modules/tool/packages/tavily/children/crawl/test/performance.test.ts create mode 100644 modules/tool/packages/tavily/children/extract/DESIGN.md create mode 100644 modules/tool/packages/tavily/children/map/DESIGN.md create mode 100644 modules/tool/packages/tavily/children/map/config.ts create mode 100644 modules/tool/packages/tavily/children/map/index.ts create mode 100644 modules/tool/packages/tavily/children/map/src/index.ts create mode 100644 modules/tool/packages/tavily/children/map/test/index.test.ts create mode 100644 modules/tool/packages/tavily/children/search/DESIGN.md diff --git a/modules/tool/packages/tavily/README.md b/modules/tool/packages/tavily/README.md new file mode 100644 index 00000000..c0ae6f0e --- /dev/null +++ b/modules/tool/packages/tavily/README.md @@ -0,0 +1,398 @@ +# Tavily 工具集 + +Tavily 是一个专门为 AI 应用程序设计的智能搜索 API,提供高质量的搜索结果和内容提取功能。 + +## 密钥获取 + +1. 访问 [Tavily 官网](https://tavily.com) 注册账户 +2. 登录后在 [控制台](https://app.tavily.com) 获取 API 密钥 +3. API 密钥格式:`tvly-xxxxxxxxxxxxx`(以 `tvly-` 开头) + +## 功能 + +### 📊 AI 搜索 (AI Search) + +使用 AI 驱动的智能网络搜索,提供相关性排序的搜索结果和可选的 AI 生成摘要。 + +**主要特性:** +- 智能相关性排序和结果过滤 +- 支持 AI 生成答案摘要 +- 基础搜索(1 credit)和高级搜索(2 credits)两种模式 +- 可定制返回结果数量(1-20 个) +- 60 秒超时保护 + +**使用场景:** +- 实时信息检索 +- 研究资料收集 +- 新闻和资讯搜索 +- 技术文档查找 + +### 📝 内容提取 (Content Extract) + +从网页中提取结构化的内容,支持 Markdown 和文本格式输出。 + +**主要特性:** +- 支持批量 URL 处理 +- 自动清理和格式化内容 +- 提取页面中的图片链接 +- 支持换行分隔的多 URL 输入 +- 错误处理和失败报告 + +**使用场景:** +- 网页内容抓取 +- 文章采集和处理 +- 数据清洗和整理 +- 内容聚合和分析 + +### 🕷️ 网站爬取 (Web Crawler) + +基于图的并行网站爬取功能,深度探索网站内容结构。 + +**主要特性:** +- 智能图遍历算法,支持并行爬取 +- 可配置爬取深度和广度限制 +- 自然语言指令指导爬取方向 +- 路径选择和排除规则(正则表达式) +- 支持基础和高级内容提取 +- 可控制外部链接处理策略 +- 图片和 favicon 包含选项 +- 10-150秒可调超时时间 + +**使用场景:** +- 完整网站文档抓取 +- API 文档结构化采集 +- 站点地图生成 +- 内容聚合平台 +- 竞品分析 +- SEO 优化研究 +- 知识库构建 + +### 🗺️ 网站地图 (Site Map) + +智能网站结构映射工具,快速发现和整理网站的所有可访问链接。 + +**主要特性:** +- 图遍历算法,并行探索网站结构 +- 智能链接发现和分类 +- 路径和域名过滤(正则表达式) +- 深度和广度控制 +- 外部链接包含/排除选项 +- 指令引导映射(自然语言) +- 10-150秒可调超时时间 + +**使用场景:** +- 网站结构分析 +- 站点地图生成 +- 爬取计划制定 +- SEO 网站审计 +- 链接完整性检查 +- 内容架构分析 +- 网站迁移规划 + +## 配置参数 + +### AI 搜索参数 + +| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| query | string | - | **必填** 搜索查询内容 | +| searchDepth | select | basic | 搜索深度:basic(基础)或 advanced(高级) | +| maxResults | number | 10 | 最大返回结果数(1-20) | +| includeAnswer | boolean | false | 是否生成 AI 摘要答案 | + +### 内容提取参数 + +| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| urls | string | - | **必填** URL 地址,支持多个(换行分隔) | +| format | select | markdown | 输出格式:markdown 或 text | + +### 网站爬取参数 + +| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| url | string | - | **必填** 起始爬取的根 URL | +| instructions | string | - | 自然语言爬取指令(使用会增加成本) | +| maxDepth | number | 1 | 爬取最大深度(1-5) | +| maxBreadth | number | 20 | 每层跟随的最大链接数 | +| limit | number | 50 | 处理的总链接数上限 | +| selectPaths | string | - | 包含路径的正则表达式(每行一个) | +| excludePaths | string | - | 排除路径的正则表达式(每行一个) | +| allowExternal | boolean | true | 是否包含外部域链接 | +| includeImages | boolean | false | 是否在结果中包含图片 | +| extractDepth | select | basic | 提取深度:basic 或 advanced | +| format | select | markdown | 内容输出格式:markdown 或 text | +| includeFavicon | boolean | false | 是否为每个结果包含 favicon URL | +| timeout | number | 150 | 超时时间(10-150秒) | + +### 网站地图参数 + +| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| url | string | - | **必填** 起始映射的根 URL | +| instructions | string | - | 自然语言映射指令(使用会增加成本) | +| maxDepth | number | 1 | 映射最大深度(1-5) | +| maxBreadth | number | 20 | 每层跟随的最大链接数 | +| limit | number | 50 | 处理的总链接数上限 | +| selectPaths | string | - | 包含路径的正则表达式(每行一个) | +| selectDomains | string | - | 包含域名的正则表达式(每行一个) | +| excludePaths | string | - | 排除路径的正则表达式(每行一个) | +| excludeDomains | string | - | 排除域名的正则表达式(每行一个) | +| allowExternal | boolean | true | 是否包含外部域链接 | +| timeout | number | 150 | 超时时间(10-150秒) | + +## 输出格式 + +### AI 搜索输出 + +```json +{ + "answer": "AI 生成的答案摘要(可选)", + "results": [ + { + "title": "网页标题", + "url": "网页链接", + "content": "网页内容摘要", + "raw_content": "完整原始内容(高级搜索)" + } + ] +} +``` + +### 内容提取输出 + +```json +{ + "results": [ + { + "url": "提取的 URL", + "raw_content": "提取的内容", + "images": ["图片链接数组"] + } + ], + "successCount": 3, + "failedUrls": ["失败的 URL 及原因"] +} +``` + +### 网站爬取输出 + +```json +{ + "baseUrl": "被爬取的基础 URL", + "results": [ + { + "url": "爬取的页面 URL", + "raw_content": "页面内容", + "favicon": "favicon URL(可选)" + } + ], + "successCount": 25, + "responseTime": 45.67 +} +``` + +### 网站地图输出 + +```json +{ + "baseUrl": "被映射的基础 URL", + "results": [ + "https://example.com/page1", + "https://example.com/page2", + "https://example.com/docs/api" + ], + "urlCount": 150, + "responseTime": 12.34 +} +``` + +## 使用示例 + +### 基础搜索 +```typescript +// 搜索 TypeScript 相关内容 +{ + "query": "TypeScript 最新特性", + "searchDepth": "basic", + "maxResults": 5, + "includeAnswer": false +} +``` + +### AI 摘要搜索 +```typescript +// 获取 AI 生成的摘要答案 +{ + "query": "什么是量子计算?", + "searchDepth": "advanced", + "maxResults": 8, + "includeAnswer": true +} +``` + +### 内容提取 +```typescript +// 提取单个网页内容 +{ + "urls": "https://example.com/article", + "format": "markdown" +} +``` + +### 批量内容提取 +```typescript +// 提取多个网页内容 +{ + "urls": "https://site1.com/article\nhttps://site2.com/news\nhttps://site3.com/docs", + "format": "text" +} +``` + +### 基础网站爬取 +```typescript +// 深度爬取网站文档 +{ + "url": "docs.tavily.com", + "maxDepth": 2, + "maxBreadth": 15, + "limit": 50, + "includeFavicon": true +} +``` + +### 指令引导爬取 +```typescript +// 使用自然语言指令指导爬取 +{ + "url": "docs.tavily.com", + "instructions": "Find all pages about the Python SDK", + "maxDepth": 3, + "limit": 30, + "extractDepth": "advanced" +} +``` + +### 路径过滤爬取 +```typescript +// 使用正则表达式过滤爬取路径 +{ + "url": "example.com", + "selectPaths": "/docs/.*\n/api/v1.*", + "excludePaths": "/private/.*\n/admin/.*", + "maxDepth": 2 +} +``` + +### 大规模爬取 +```typescript +// 配置大规模深度爬取 +{ + "url": "large-site.com", + "maxDepth": 5, + "maxBreadth": 30, + "limit": 200, + "allowExternal": false, + "includeImages": true, + "timeout": 300 +} +``` + +### 基础网站映射 +```typescript +// 映射网站结构 +{ + "url": "docs.tavily.com", + "maxDepth": 2, + "maxBreadth": 15, + "limit": 50, + "allowExternal": true +} +``` + +### 指令引导映射 +```typescript +// 使用自然语言指令映射 +{ + "url": "docs.tavily.com", + "instructions": "Find all API documentation pages", + "maxDepth": 3, + "limit": 100 +} +``` + +### 路径过滤映射 +```typescript +// 使用正则表达式过滤映射 +{ + "url": "example.com", + "selectPaths": "/docs/.*\n/api/.*", + "excludePaths": "/private/.*", + "selectDomains": "^example\\.com$", + "maxDepth": 2 +} +``` + +## 错误处理 + +工具提供完善的错误处理机制: + +- **认证错误**:API 密钥无效或过期 +- **速率限制**:请求频率超限,需要等待 +- **网络错误**:连接超时或网络不可达 +- **服务器错误**:Tavily 服务端错误 +- **请求错误**:参数格式不正确 + +## API 限制 + +### AI 搜索 +- 请求超时:60 秒 +- 搜索结果数:1-20 个 +- 速率限制:根据订阅计划不同而有所限制 + +### 内容提取 +- 请求超时:60 秒 +- 批量提取:支持多个 URL(换行分隔) +- 基础提取:1 credit / 5 次成功提取 +- 高级提取:2 credits / 5 次成功提取 + +### 网站爬取 +- 请求超时:10-150 秒(可配置) +- 爬取深度:1-5 层 +- 最大链接数:可自定义限制 +- 基础提取:1 credit / 5 页成功爬取 +- 高级提取:2 credits / 5 页成功爬取 +- 指令引导:2 credits / 10 页成功爬取 +- 速率限制:根据订阅计划不同而有所限制 + +**注意:** 爬取操作消耗更多资源,建议合理设置限制参数。 + +### 网站地图 +- 请求超时:10-150 秒(可配置) +- 映射深度:1-5 层 +- 最大链接数:可自定义限制 +- 基础映射:1 credit / 10 页成功映射 +- 指令引导:2 credits / 10 页成功映射 +- 速率限制:根据订阅计划不同而有所限制 + +**注意:** 映射操作比搜索消耗更多资源,但比爬取操作轻量。 + +## 开发和测试 + +```bash +# 安装依赖 +bun install + +# 运行测试 +bun run test + +# 构建项目 +bun run build:runtime +``` + +## 支持与反馈 + +如有问题或建议,请通过以下方式联系: +- GitHub Issues +- FastGPT 社区 +- Tavily 官方文档:https://docs.tavily.com diff --git a/modules/tool/packages/tavily/children/crawl/DESIGN.md b/modules/tool/packages/tavily/children/crawl/DESIGN.md new file mode 100644 index 00000000..ffab436e --- /dev/null +++ b/modules/tool/packages/tavily/children/crawl/DESIGN.md @@ -0,0 +1,1245 @@ +# Tavily Crawl + +> Tavily Crawl is a graph-based website traversal tool that can explore hundreds of paths in parallel with built-in extraction and intelligent discovery. + +## OpenAPI + +````yaml POST /crawl +paths: + path: /crawl + method: post + servers: + - url: https://api.tavily.com/ + request: + security: + - title: bearerAuth + parameters: + query: {} + header: + Authorization: + type: http + scheme: bearer + description: >- + Bearer authentication header in the form Bearer , where + is your Tavily API key (e.g., Bearer tvly-YOUR_API_KEY). + cookie: {} + parameters: + path: {} + query: {} + header: {} + cookie: {} + body: + application/json: + schemaArray: + - type: object + properties: + url: + allOf: + - type: string + description: The root URL to begin the crawl. + example: docs.tavily.com + instructions: + allOf: + - type: string + description: >- + Natural language instructions for the crawler. When + specified, the mapping cost increases to 2 API credits per + 10 successful pages instead of 1 API credit per 10 pages. + example: Find all pages about the Python SDK + max_depth: + allOf: + - type: integer + description: >- + Max depth of the crawl. Defines how far from the base URL + the crawler can explore. + default: 1 + minimum: 1 + maximum: 5 + max_breadth: + allOf: + - type: integer + description: >- + Max number of links to follow per level of the tree (i.e., + per page). + default: 20 + minimum: 1 + limit: + allOf: + - type: integer + description: >- + Total number of links the crawler will process before + stopping. + default: 50 + minimum: 1 + select_paths: + allOf: + - type: array + description: >- + Regex patterns to select only URLs with specific path + patterns (e.g., `/docs/.*`, `/api/v1.*`). + items: + type: string + default: null + select_domains: + allOf: + - type: array + description: >- + Regex patterns to select crawling to specific domains or + subdomains (e.g., `^docs\.example\.com$`). + items: + type: string + default: null + exclude_paths: + allOf: + - type: array + description: >- + Regex patterns to exclude URLs with specific path patterns + (e.g., `/private/.*`, `/admin/.*`). + items: + type: string + default: null + exclude_domains: + allOf: + - type: array + description: >- + Regex patterns to exclude specific domains or subdomains + from crawling (e.g., `^private\.example\.com$`). + items: + type: string + default: null + allow_external: + allOf: + - type: boolean + description: >- + Whether to include external domain links in the final + results list. + default: true + include_images: + allOf: + - type: boolean + description: Whether to include images in the crawl results. + default: false + extract_depth: + allOf: + - type: string + description: >- + Advanced extraction retrieves more data, including tables + and embedded content, with higher success but may increase + latency. `basic` extraction costs 1 credit per 5 + successful extractions, while `advanced` extraction costs + 2 credits per 5 successful extractions. + enum: + - basic + - advanced + default: basic + format: + allOf: + - type: string + description: >- + The format of the extracted web page content. `markdown` + returns content in markdown format. `text` returns plain + text and may increase latency. + enum: + - markdown + - text + default: markdown + include_favicon: + allOf: + - type: boolean + description: Whether to include the favicon URL for each result. + default: false + timeout: + allOf: + - type: number + format: float + description: >- + Maximum time in seconds to wait for the crawl operation + before timing out. Must be between 10 and 150 seconds. + minimum: 10 + maximum: 150 + default: 150 + required: true + requiredProperties: + - url + examples: + example: + value: + url: docs.tavily.com + instructions: Find all pages about the Python SDK + max_depth: 1 + max_breadth: 20 + limit: 50 + select_paths: null + select_domains: null + exclude_paths: null + exclude_domains: null + allow_external: true + include_images: false + extract_depth: basic + format: markdown + include_favicon: false + timeout: 150 + description: Parameters for the Tavily Crawl request. + codeSamples: + - label: Python SDK + lang: python + source: >- + from tavily import TavilyClient + + + tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY") + + response = tavily_client.crawl("https://docs.tavily.com", + instructions="Find all pages on the Python SDK") + + + print(response) + - label: JavaScript SDK + lang: javascript + source: >- + const { tavily } = require("@tavily/core"); + + + const tvly = tavily({ apiKey: "tvly-YOUR_API_KEY" }); + + const response = await tvly.crawl("https://docs.tavily.com", { + instructions: "Find all pages on the Python SDK" }); + + + console.log(response); + response: + '200': + application/json: + schemaArray: + - type: object + properties: + base_url: + allOf: + - type: string + description: The base URL that was crawled. + example: docs.tavily.com + results: + allOf: + - type: array + description: A list of extracted content from the crawled URLs. + items: + type: object + properties: + url: + type: string + description: The URL that was crawled. + example: https://docs.tavily.com + raw_content: + type: string + description: The full content extracted from the page. + favicon: + type: string + description: The favicon URL for the result. + example: >- + https://mintlify.s3-us-west-1.amazonaws.com/tavilyai/_generated/favicon/apple-touch-icon.png?v=3 + example: + - url: https://docs.tavily.com/welcome + raw_content: >- + Welcome - Tavily Docs + + + [Tavily Docs home page![light + logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/light.svg)![dark + logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/dark.svg)](https://tavily.com/) + + + Search or ask... + + + Ctrl K + + + - [Support](mailto:support@tavily.com) + + - [Get an API key](https://app.tavily.com) + + - [Get an API key](https://app.tavily.com) + + + Search... + + + Navigation + + + [Home](/welcome)[Documentation](/documentation/about)[SDKs](/sdk/python/quick-start)[Examples](/examples/use-cases/data-enrichment)[FAQ](/faq/faq) + + + Explore our docs + + + Your journey to state-of-the-art web search starts + right here. + + + [## Quickstart + + + Start searching with Tavily in + minutes](documentation/quickstart)[## API Reference + + + Start using Tavily's powerful + APIs](documentation/api-reference/endpoint/search)[## + API Credits Overview + + + Learn how to get and manage your Tavily API + Credits](documentation/api-credits)[## Rate Limits + + + Learn about Tavily's API rate limits for both + development and production + environments](documentation/rate-limits)[## Python + + + Get started with our Python SDK, + `tavily-python`](sdk/python/quick-start)[## Playground + + + Explore Tavily's APIs with our interactive + playground](https://app.tavily.com/playground) + favicon: >- + https://mintlify.s3-us-west-1.amazonaws.com/tavilyai/_generated/favicon/apple-touch-icon.png?v=3 + - url: https://docs.tavily.com/documentation/api-credits + raw_content: >- + Credits & Pricing - Tavily Docs + + + [Tavily Docs home page![light + logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/light.svg)![dark + logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/dark.svg)](https://tavily.com/) + + + Search or ask... + + + Ctrl K + + + - [Support](mailto:support@tavily.com) + + - [Get an API key](https://app.tavily.com) + + - [Get an API key](https://app.tavily.com) + + + Search... + + + Navigation + + + Overview + + + Credits & Pricing + + + [Home](/welcome)[Documentation](/documentation/about)[SDKs](/sdk/python/quick-start)[Examples](/examples/use-cases/data-enrichment)[FAQ](/faq/faq) + + + - [API Playground](https://app.tavily.com/playground) + + - [Community](https://community.tavily.com) + + - [Blog](https://blog.tavily.com) + + + ##### Overview + + + - [About](/documentation/about) + + - [Quickstart](/documentation/quickstart) + + - [Credits & Pricing](/documentation/api-credits) + + - [Rate Limits](/documentation/rate-limits) + + + ##### API Reference + + + - + [Introduction](/documentation/api-reference/introduction) + + - [POST + + Tavily Search](/documentation/api-reference/endpoint/search) + - [POST + + Tavily Extract](/documentation/api-reference/endpoint/extract) + - [POST + + Tavily Crawl](/documentation/api-reference/endpoint/crawl) + - [POST + + Tavily Map](/documentation/api-reference/endpoint/map) + + ##### Best Practices + + + - [Best Practices for + Search](/documentation/best-practices/best-practices-search) + + - [Best Practices for + Extract](/documentation/best-practices/best-practices-extract) + + + ##### Tavily MCP Server + + + - [Tavily MCP Server](/documentation/mcp) + + + ##### Integrations + + + - [LangChain](/documentation/integrations/langchain) + + - [LlamaIndex](/documentation/integrations/llamaindex) + + - [Zapier](/documentation/integrations/zapier) + + - [Dify](/documentation/integrations/dify) + + - [Composio](/documentation/integrations/composio) + + - [Make](/documentation/integrations/make) + + - [Agno](/documentation/integrations/agno) + + - [Pydantic + AI](/documentation/integrations/pydantic-ai) + + - [FlowiseAI](/documentation/integrations/flowise) + + + ##### Legal + + + - [Security & Compliance](https://trust.tavily.com) + + - [Privacy Policy](https://tavily.com/privacy) + + + ##### Help + + + - [Help Center](https://help.tavily.com) + + + ##### Tavily Search Crawler + + + - [Tavily Search + Crawler](/documentation/search-crawler) + + + Overview + + + # Credits & Pricing + + + Learn how to get and manage your Tavily API Credits. + + + ## [​](#free-api-credits) Free API Credits + + + [## Get your free API key + + + You get 1,000 free API Credits every month. + + **No credit card required.**](https://app.tavily.com) + + + ## [​](#pricing-overview) Pricing Overview + + + Tavily operates on a simple, credit-based model: + + + - **Free**: 1,000 credits/month + + - **Pay-as-you-go**: $0.008 per credit (allows you to + be charged per credit once your plan's credit limit is + reached). + + - **Monthly plans**: $0.0075 - $0.005 per credit + + - **Enterprise**: Custom pricing and volume + + + | **Plan** | **Credits per month** | **Monthly price** + | **Price per credit** | + + | --- | --- | --- | --- | + + | **Researcher** | 1,000 | Free | - | + + | **Project** | 4,000 | $30 | $0.0075 | + + | **Bootstrap** | 15,000 | $100 | $0.0067 | + + | **Startup** | 38,000 | $220 | $0.0058 | + + | **Growth** | 100,000 | $500 | $0.005 | + + | **Pay as you go** | Per usage | $0.008 / Credit | + $0.008 | + + | **Enterprise** | Custom | Custom | Custom | + + + Head to [my plan](https://app.tavily.com/account/plan) + to explore our different options and manage your plan. + + + ## [​](#api-credits-costs) API Credits Costs + + + ### [​](#tavily-search) Tavily Search + + + Your [search + depth](/api-reference/endpoint/search#body-search-depth) + determines the cost of your request. + + + - **Basic Search (`basic`):** + Each request costs **1 API credit**. + - **Advanced Search (`advanced`):** + Each request costs **2 API credits**. + + ### [​](#tavily-extract) Tavily Extract + + + The number of successful URL extractions and your + [extraction + depth](/api-reference/endpoint/extract#body-extract-depth) + determines the cost of your request. You never get + charged if a URL extraction fails. + + + - **Basic Extract (`basic`):** + Every 5 successful URL extractions cost **1 API credit** + - **Advanced Extract (`advanced`):** + Every 5 successful URL extractions cost **2 API credits** + + [Quickstart](/documentation/quickstart)[Rate + Limits](/documentation/rate-limits) + + + [x](https://x.com/tavilyai)[github](https://github.com/tavily-ai)[linkedin](https://linkedin.com/company/tavily)[website](https://tavily.com) + + + [Powered by + Mintlify](https://mintlify.com/preview-request?utm_campaign=poweredBy&utm_medium=docs&utm_source=docs.tavily.com) + + + On this page + + + - [Free API Credits](#free-api-credits) + + - [Pricing Overview](#pricing-overview) + + - [API Credits Costs](#api-credits-costs) + + - [Tavily Search](#tavily-search) + + - [Tavily Extract](#tavily-extract) + favicon: >- + https://mintlify.s3-us-west-1.amazonaws.com/tavilyai/_generated/favicon/apple-touch-icon.png?v=3 + - url: https://docs.tavily.com/documentation/about + raw_content: >- + Who are we? + + ----------- + + + We're a team of AI researchers and developers + passionate about helping you build the next generation + of AI assistants. Our mission is to empower + individuals and organizations with accurate, unbiased, + and factual information. + + + What is the Tavily Search Engine? + + --------------------------------- + + + Building an AI agent that leverages realtime online + information is not a simple task. Scraping doesn't + scale and requires expertise to refine, current search + engine APIs don't provide explicit information to + queries but simply potential related articles (which + are not always related), and are not very customziable + for AI agent needs. This is why we're excited to + introduce the first search engine for AI agents - + [Tavily](https://app.tavily.com/). + + + Tavily is a search engine optimized for LLMs, aimed at + efficient, quick and persistent search results. Unlike + other search APIs such as Serp or Google, Tavily + focuses on optimizing search for AI developers and + autonomous AI agents. We take care of all the burden + of searching, scraping, filtering and extracting the + most relevant information from online sources. All in + a single API call! + + + To try the API in action, you can now use our hosted + version on our [API + Playground](https://app.tavily.com/playground). + + + Why choose Tavily? + + ------------------ + + + Tavily shines where others fail, with a Search API + optimized for LLMs. + + + How does the Search API work? + + ----------------------------- + + + Traditional search APIs such as Google, Serp and Bing + retrieve search results based on a user query. + However, the results are sometimes irrelevant to the + goal of the search, and return simple URLs and + snippets of content which are not always relevant. + Because of this, any developer would need to then + scrape the sites to extract relevant content, filter + irrelevant information, optimize the content to fit + LLM context limits, and more. This task is a burden + and requires a lot of time and effort to complete. The + Tavily Search API takes care of all of this for you in + a single API call. + + + The Tavily Search API aggregates up to 20 sites per a + single API call, and uses proprietary AI to score, + filter and rank the top most relevant sources and + content to your task, query or goal. In addition, + Tavily allows developers to add custom fields such as + context and limit response tokens to enable the + optimal search experience for LLMs. + + + Tavily can also help your AI agent make better + decisions by including a short answer for cross-agent + communication. + + + Getting started + + --------------- + + + [Sign up](https://app.tavily.com/) for Tavily to get + your API key. You get **1,000 free API Credits every + month**. No credit card required. + + + [Get your free API key --------------------- You get + 1,000 free API Credits every month. **No credit card + required.**](https://app.tavily.com/)Head to our [API + Playground](https://app.tavily.com/playground) to + familiarize yourself with our API. + + + To get started with Tavily's APIs and SDKs using code, + head to our [Quickstart + Guide](https://docs.tavily.com/guides/quickstart) and + follow the steps. + favicon: >- + https://mintlify.s3-us-west-1.amazonaws.com/tavilyai/_generated/favicon/apple-touch-icon.png?v=3 + response_time: + allOf: + - type: number + format: float + description: Time in seconds it took to complete the request. + example: 1.23 + request_id: + allOf: + - type: string + description: >- + A unique request identifier you can share with customer + support to help resolve issues with specific requests. + example: 123e4567-e89b-12d3-a456-426614174111 + examples: + example: + value: + base_url: docs.tavily.com + results: + - url: https://docs.tavily.com/welcome + raw_content: >- + Welcome - Tavily Docs + + + [Tavily Docs home page![light + logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/light.svg)![dark + logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/dark.svg)](https://tavily.com/) + + + Search or ask... + + + Ctrl K + + + - [Support](mailto:support@tavily.com) + + - [Get an API key](https://app.tavily.com) + + - [Get an API key](https://app.tavily.com) + + + Search... + + + Navigation + + + [Home](/welcome)[Documentation](/documentation/about)[SDKs](/sdk/python/quick-start)[Examples](/examples/use-cases/data-enrichment)[FAQ](/faq/faq) + + + Explore our docs + + + Your journey to state-of-the-art web search starts right + here. + + + [## Quickstart + + + Start searching with Tavily in + minutes](documentation/quickstart)[## API Reference + + + Start using Tavily's powerful + APIs](documentation/api-reference/endpoint/search)[## API + Credits Overview + + + Learn how to get and manage your Tavily API + Credits](documentation/api-credits)[## Rate Limits + + + Learn about Tavily's API rate limits for both development + and production environments](documentation/rate-limits)[## + Python + + + Get started with our Python SDK, + `tavily-python`](sdk/python/quick-start)[## Playground + + + Explore Tavily's APIs with our interactive + playground](https://app.tavily.com/playground) + favicon: >- + https://mintlify.s3-us-west-1.amazonaws.com/tavilyai/_generated/favicon/apple-touch-icon.png?v=3 + - url: https://docs.tavily.com/documentation/api-credits + raw_content: >- + Credits & Pricing - Tavily Docs + + + [Tavily Docs home page![light + logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/light.svg)![dark + logo](https://mintlify.s3.us-west-1.amazonaws.com/tavilyai/logo/dark.svg)](https://tavily.com/) + + + Search or ask... + + + Ctrl K + + + - [Support](mailto:support@tavily.com) + + - [Get an API key](https://app.tavily.com) + + - [Get an API key](https://app.tavily.com) + + + Search... + + + Navigation + + + Overview + + + Credits & Pricing + + + [Home](/welcome)[Documentation](/documentation/about)[SDKs](/sdk/python/quick-start)[Examples](/examples/use-cases/data-enrichment)[FAQ](/faq/faq) + + + - [API Playground](https://app.tavily.com/playground) + + - [Community](https://community.tavily.com) + + - [Blog](https://blog.tavily.com) + + + ##### Overview + + + - [About](/documentation/about) + + - [Quickstart](/documentation/quickstart) + + - [Credits & Pricing](/documentation/api-credits) + + - [Rate Limits](/documentation/rate-limits) + + + ##### API Reference + + + - [Introduction](/documentation/api-reference/introduction) + + - [POST + + Tavily Search](/documentation/api-reference/endpoint/search) + - [POST + + Tavily Extract](/documentation/api-reference/endpoint/extract) + - [POST + + Tavily Crawl](/documentation/api-reference/endpoint/crawl) + - [POST + + Tavily Map](/documentation/api-reference/endpoint/map) + + ##### Best Practices + + + - [Best Practices for + Search](/documentation/best-practices/best-practices-search) + + - [Best Practices for + Extract](/documentation/best-practices/best-practices-extract) + + + ##### Tavily MCP Server + + + - [Tavily MCP Server](/documentation/mcp) + + + ##### Integrations + + + - [LangChain](/documentation/integrations/langchain) + + - [LlamaIndex](/documentation/integrations/llamaindex) + + - [Zapier](/documentation/integrations/zapier) + + - [Dify](/documentation/integrations/dify) + + - [Composio](/documentation/integrations/composio) + + - [Make](/documentation/integrations/make) + + - [Agno](/documentation/integrations/agno) + + - [Pydantic AI](/documentation/integrations/pydantic-ai) + + - [FlowiseAI](/documentation/integrations/flowise) + + + ##### Legal + + + - [Security & Compliance](https://trust.tavily.com) + + - [Privacy Policy](https://tavily.com/privacy) + + + ##### Help + + + - [Help Center](https://help.tavily.com) + + + ##### Tavily Search Crawler + + + - [Tavily Search Crawler](/documentation/search-crawler) + + + Overview + + + # Credits & Pricing + + + Learn how to get and manage your Tavily API Credits. + + + ## [​](#free-api-credits) Free API Credits + + + [## Get your free API key + + + You get 1,000 free API Credits every month. + + **No credit card required.**](https://app.tavily.com) + + + ## [​](#pricing-overview) Pricing Overview + + + Tavily operates on a simple, credit-based model: + + + - **Free**: 1,000 credits/month + + - **Pay-as-you-go**: $0.008 per credit (allows you to be + charged per credit once your plan's credit limit is + reached). + + - **Monthly plans**: $0.0075 - $0.005 per credit + + - **Enterprise**: Custom pricing and volume + + + | **Plan** | **Credits per month** | **Monthly price** | + **Price per credit** | + + | --- | --- | --- | --- | + + | **Researcher** | 1,000 | Free | - | + + | **Project** | 4,000 | $30 | $0.0075 | + + | **Bootstrap** | 15,000 | $100 | $0.0067 | + + | **Startup** | 38,000 | $220 | $0.0058 | + + | **Growth** | 100,000 | $500 | $0.005 | + + | **Pay as you go** | Per usage | $0.008 / Credit | $0.008 | + + | **Enterprise** | Custom | Custom | Custom | + + + Head to [my plan](https://app.tavily.com/account/plan) to + explore our different options and manage your plan. + + + ## [​](#api-credits-costs) API Credits Costs + + + ### [​](#tavily-search) Tavily Search + + + Your [search + depth](/api-reference/endpoint/search#body-search-depth) + determines the cost of your request. + + + - **Basic Search (`basic`):** + Each request costs **1 API credit**. + - **Advanced Search (`advanced`):** + Each request costs **2 API credits**. + + ### [​](#tavily-extract) Tavily Extract + + + The number of successful URL extractions and your + [extraction + depth](/api-reference/endpoint/extract#body-extract-depth) + determines the cost of your request. You never get charged + if a URL extraction fails. + + + - **Basic Extract (`basic`):** + Every 5 successful URL extractions cost **1 API credit** + - **Advanced Extract (`advanced`):** + Every 5 successful URL extractions cost **2 API credits** + + [Quickstart](/documentation/quickstart)[Rate + Limits](/documentation/rate-limits) + + + [x](https://x.com/tavilyai)[github](https://github.com/tavily-ai)[linkedin](https://linkedin.com/company/tavily)[website](https://tavily.com) + + + [Powered by + Mintlify](https://mintlify.com/preview-request?utm_campaign=poweredBy&utm_medium=docs&utm_source=docs.tavily.com) + + + On this page + + + - [Free API Credits](#free-api-credits) + + - [Pricing Overview](#pricing-overview) + + - [API Credits Costs](#api-credits-costs) + + - [Tavily Search](#tavily-search) + + - [Tavily Extract](#tavily-extract) + favicon: >- + https://mintlify.s3-us-west-1.amazonaws.com/tavilyai/_generated/favicon/apple-touch-icon.png?v=3 + - url: https://docs.tavily.com/documentation/about + raw_content: >- + Who are we? + + ----------- + + + We're a team of AI researchers and developers passionate + about helping you build the next generation of AI + assistants. Our mission is to empower individuals and + organizations with accurate, unbiased, and factual + information. + + + What is the Tavily Search Engine? + + --------------------------------- + + + Building an AI agent that leverages realtime online + information is not a simple task. Scraping doesn't scale and + requires expertise to refine, current search engine APIs + don't provide explicit information to queries but simply + potential related articles (which are not always related), + and are not very customziable for AI agent needs. This is + why we're excited to introduce the first search engine for + AI agents - [Tavily](https://app.tavily.com/). + + + Tavily is a search engine optimized for LLMs, aimed at + efficient, quick and persistent search results. Unlike other + search APIs such as Serp or Google, Tavily focuses on + optimizing search for AI developers and autonomous AI + agents. We take care of all the burden of searching, + scraping, filtering and extracting the most relevant + information from online sources. All in a single API call! + + + To try the API in action, you can now use our hosted version + on our [API Playground](https://app.tavily.com/playground). + + + Why choose Tavily? + + ------------------ + + + Tavily shines where others fail, with a Search API optimized + for LLMs. + + + How does the Search API work? + + ----------------------------- + + + Traditional search APIs such as Google, Serp and Bing + retrieve search results based on a user query. However, the + results are sometimes irrelevant to the goal of the search, + and return simple URLs and snippets of content which are not + always relevant. Because of this, any developer would need + to then scrape the sites to extract relevant content, filter + irrelevant information, optimize the content to fit LLM + context limits, and more. This task is a burden and requires + a lot of time and effort to complete. The Tavily Search API + takes care of all of this for you in a single API call. + + + The Tavily Search API aggregates up to 20 sites per a single + API call, and uses proprietary AI to score, filter and rank + the top most relevant sources and content to your task, + query or goal. In addition, Tavily allows developers to add + custom fields such as context and limit response tokens to + enable the optimal search experience for LLMs. + + + Tavily can also help your AI agent make better decisions by + including a short answer for cross-agent communication. + + + Getting started + + --------------- + + + [Sign up](https://app.tavily.com/) for Tavily to get your + API key. You get **1,000 free API Credits every month**. No + credit card required. + + + [Get your free API key --------------------- You get 1,000 + free API Credits every month. **No credit card + required.**](https://app.tavily.com/)Head to our [API + Playground](https://app.tavily.com/playground) to + familiarize yourself with our API. + + + To get started with Tavily's APIs and SDKs using code, head + to our [Quickstart + Guide](https://docs.tavily.com/guides/quickstart) and follow + the steps. + favicon: >- + https://mintlify.s3-us-west-1.amazonaws.com/tavilyai/_generated/favicon/apple-touch-icon.png?v=3 + response_time: 1.23 + request_id: 123e4567-e89b-12d3-a456-426614174111 + description: Crawl results returned successfully + '400': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: '[400] No starting url provided' + description: Bad Request - Your request is invalid. + '401': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: 'Unauthorized: missing or invalid API key.' + description: Unauthorized - Your API key is wrong or missing. + '403': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: '[403] URL is not supported' + description: Forbidden - URL is not supported. + '429': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + Your request has been blocked due to excessive requests. + Please reduce rate of requests. + description: Too many requests - Rate limit exceeded + '432': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + This request exceeds your plan's set usage limit. Please + upgrade your plan or contact support@tavily.com + description: Key limit or Plan Limit exceeded + '433': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + This request exceeds the pay-as-you-go limit. You can increase + your limit on the Tavily dashboard. + description: PayGo limit exceeded + '500': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: '[500] Internal server error' + description: Internal Server Error - We had a problem with our server. + deprecated: false + type: path +components: + schemas: {} + +```` diff --git a/modules/tool/packages/tavily/children/crawl/config.ts b/modules/tool/packages/tavily/children/crawl/config.ts new file mode 100644 index 00000000..67dbf668 --- /dev/null +++ b/modules/tool/packages/tavily/children/crawl/config.ts @@ -0,0 +1,177 @@ +import { defineTool } from '@tool/type'; +import { FlowNodeInputTypeEnum, WorkflowIOValueTypeEnum } from '@tool/type/fastgpt'; + +export default defineTool({ + name: { + 'zh-CN': '网站爬取', + en: 'Web Crawler' + }, + description: { + 'zh-CN': '使用基于图的并行网站爬取功能,深度探索网站内容', + en: 'Graph-based parallel website crawling with intelligent discovery' + }, + toolDescription: + 'Crawl hundreds of website paths in parallel with built-in extraction and intelligent discovery. ' + + 'Perfect for comprehensive site exploration, documentation scraping, and content aggregation.', + + versionList: [ + { + value: '0.1.0', + description: 'Initial version with comprehensive crawling capabilities', + inputs: [ + { + key: 'url', + label: '起始 URL', + description: '开始爬取的根 URL', + required: true, + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.reference, FlowNodeInputTypeEnum.input], + toolDescription: 'The root URL to begin the crawl' + }, + { + key: 'instructions', + label: '爬取指令', + description: '自然语言指令,指导爬虫查找特定内容(使用会增加成本)', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Natural language instructions for the crawler' + }, + { + key: 'maxDepth', + label: '最大深度', + description: '爬取的最大深度(1-5)', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 1, + min: 1, + max: 5, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Max depth of the crawl' + }, + { + key: 'maxBreadth', + label: '最大广度', + description: '每层跟随的最大链接数', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 20, + min: 1, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Max number of links to follow per level' + }, + { + key: 'limit', + label: '总限制', + description: '处理的总链接数上限', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 50, + min: 1, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Total number of links to process' + }, + { + key: 'selectPaths', + label: '包含路径', + description: '正则表达式模式,选择特定路径(每行一个)', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Regex patterns to select specific path patterns' + }, + { + key: 'excludePaths', + label: '排除路径', + description: '正则表达式模式,排除特定路径(每行一个)', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Regex patterns to exclude specific path patterns' + }, + { + key: 'allowExternal', + label: '允许外部链接', + description: '是否在结果中包含外部域链接', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: true, + renderTypeList: [FlowNodeInputTypeEnum.switch], + toolDescription: 'Whether to include external domain links' + }, + { + key: 'includeImages', + label: '包含图片', + description: '是否在爬取结果中包含图片', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.switch], + toolDescription: 'Whether to include images in the crawl results' + }, + { + key: 'extractDepth', + label: '提取深度', + description: '基础提取(1 credit/5 pages)或高级提取(2 credits/5 pages)', + valueType: WorkflowIOValueTypeEnum.string, + defaultValue: 'basic', + renderTypeList: [FlowNodeInputTypeEnum.select], + list: [ + { label: '基础', value: 'basic' }, + { label: '高级', value: 'advanced' } + ] + }, + { + key: 'format', + label: '输出格式', + description: '内容输出格式', + valueType: WorkflowIOValueTypeEnum.string, + defaultValue: 'markdown', + renderTypeList: [FlowNodeInputTypeEnum.select], + list: [ + { label: 'Markdown', value: 'markdown' }, + { label: 'Text', value: 'text' } + ] + }, + { + key: 'includeFavicon', + label: '包含图标', + description: '是否为每个结果包含 favicon URL', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.switch], + toolDescription: 'Whether to include the favicon URL for each result' + }, + { + key: 'timeout', + label: '超时时间(秒)', + description: '爬取操作的最大等待时间(10-150秒)', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 150, + min: 10, + max: 150, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Maximum time in seconds to wait before timing out' + } + ], + outputs: [ + { + key: 'baseUrl', + label: '基础 URL', + description: '被爬取的基础 URL', + valueType: WorkflowIOValueTypeEnum.string + }, + { + key: 'results', + label: '爬取结果', + description: '从爬取 URL 中提取的内容列表', + valueType: WorkflowIOValueTypeEnum.arrayObject + }, + { + key: 'successCount', + label: '成功数量', + description: '成功爬取的页面数量', + valueType: WorkflowIOValueTypeEnum.number + }, + { + key: 'responseTime', + label: '响应时间', + description: '完成请求所花费的时间(秒)', + valueType: WorkflowIOValueTypeEnum.number + } + ] + } + ] +}); diff --git a/modules/tool/packages/tavily/children/crawl/index.ts b/modules/tool/packages/tavily/children/crawl/index.ts new file mode 100644 index 00000000..d698ed48 --- /dev/null +++ b/modules/tool/packages/tavily/children/crawl/index.ts @@ -0,0 +1,10 @@ +import config from './config'; +import { InputType, OutputType, tool as toolCb } from './src'; +import { exportTool } from '@tool/utils/tool'; + +export default exportTool({ + toolCb, + InputType, + OutputType, + config +}); diff --git a/modules/tool/packages/tavily/children/crawl/src/index.ts b/modules/tool/packages/tavily/children/crawl/src/index.ts new file mode 100644 index 00000000..de81e07c --- /dev/null +++ b/modules/tool/packages/tavily/children/crawl/src/index.ts @@ -0,0 +1,115 @@ +import { z } from 'zod'; +import { createTavilyClient, handleTavilyError, validateApiKey } from '../../../client'; +import type { CrawlRequest, CrawlResponse } from '../../../types'; + +// 输入类型 (包含父级密钥) +export const InputType = z.object({ + tavilyApiKey: z.string().min(1, 'Tavily API key is required'), + url: z.string().min(1, 'URL is required'), + instructions: z.string().optional(), + maxDepth: z.number().int().min(1).max(5).default(1), + maxBreadth: z.number().int().min(1).default(20), + limit: z.number().int().min(1).default(50), + selectPaths: z.string().optional(), + excludePaths: z.string().optional(), + allowExternal: z.boolean().default(true), + includeImages: z.boolean().default(false), + extractDepth: z.enum(['basic', 'advanced']).default('basic'), + format: z.enum(['markdown', 'text']).default('markdown'), + includeFavicon: z.boolean().default(false), + timeout: z.number().min(10).max(150).default(150) +}); + +// 输出类型 +export const OutputType = z.object({ + baseUrl: z.string(), + results: z + .array( + z.object({ + url: z.string(), + raw_content: z.string(), + favicon: z.string().optional() + }) + ) + .default([]), + successCount: z.number(), + responseTime: z.number() +}); + +export async function tool({ + tavilyApiKey, + url, + instructions, + maxDepth, + maxBreadth, + limit, + selectPaths, + excludePaths, + allowExternal, + includeImages, + extractDepth, + format, + includeFavicon, + timeout +}: z.infer): Promise> { + try { + // 1. 验证 API Key + validateApiKey(tavilyApiKey); + + // 2. 创建客户端 + const client = createTavilyClient(tavilyApiKey); + + // 3. 处理数组类型的参数 + let parsedSelectPaths: string[] | undefined; + let parsedExcludePaths: string[] | undefined; + + if (selectPaths) { + parsedSelectPaths = selectPaths + .split('\n') + .map((path) => path.trim()) + .filter((path) => path.length > 0); + } + + if (excludePaths) { + parsedExcludePaths = excludePaths + .split('\n') + .map((path) => path.trim()) + .filter((path) => path.length > 0); + } + + // 4. 构建请求 + const requestBody: CrawlRequest = { + api_key: tavilyApiKey, + url, + instructions: instructions || undefined, + max_depth: maxDepth, + max_breadth: maxBreadth, + limit, + select_paths: parsedSelectPaths, + select_domains: undefined, + exclude_paths: parsedExcludePaths, + exclude_domains: undefined, + allow_external: allowExternal, + include_images: includeImages, + extract_depth: extractDepth, + format, + include_favicon: includeFavicon, + timeout + }; + + // 5. 发送请求 + const response = await client.post('/crawl', requestBody); + + console.log(response.data); + + // 6. 格式化输出 + return { + baseUrl: response.data.base_url, + results: response.data.results || [], + successCount: (response.data.results || []).length, + responseTime: response.data.response_time + }; + } catch (error) { + return Promise.reject(handleTavilyError(error)); + } +} diff --git a/modules/tool/packages/tavily/children/crawl/test/index.test.ts b/modules/tool/packages/tavily/children/crawl/test/index.test.ts new file mode 100644 index 00000000..799263ee --- /dev/null +++ b/modules/tool/packages/tavily/children/crawl/test/index.test.ts @@ -0,0 +1,564 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { tool } from '../src'; +import * as clientModule from '../../../client'; + +describe('Tavily Crawl Tool', () => { + const testApiKey = process.env.TEST_TAVLIY_KEY || 'tvly-test-key-1234567890abcdefgh'; + + beforeEach(() => { + vi.restoreAllMocks(); + }); + + describe('Unit Tests (Mocked)', () => { + it('should perform basic crawl successfully', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'docs.tavily.com', + results: [ + { + url: 'https://docs.tavily.com/welcome', + raw_content: '# Welcome to Tavily Docs\nThis is the welcome page content.', + favicon: 'https://docs.tavily.com/favicon.ico' + }, + { + url: 'https://docs.tavily.com/documentation/about', + raw_content: '# About Tavily\nTavily is a search engine for AI agents.' + } + ], + response_time: 12.34, + request_id: 'crawl-test-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'docs.tavily.com', + maxDepth: 1, + maxBreadth: 10, + limit: 25, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: true, + timeout: 120 + }); + + expect(result.baseUrl).toBe('docs.tavily.com'); + expect(result.results).toHaveLength(2); + expect(result.successCount).toBe(2); + expect(result.responseTime).toBe(12.34); + expect(result.results[0].url).toBe('https://docs.tavily.com/welcome'); + expect(result.results[0].favicon).toBe('https://docs.tavily.com/favicon.ico'); + expect(mockClient.post).toHaveBeenCalledWith('/crawl', { + api_key: testApiKey, + url: 'docs.tavily.com', + instructions: undefined, + max_depth: 1, + max_breadth: 10, + limit: 25, + select_paths: undefined, + select_domains: undefined, + exclude_paths: undefined, + exclude_domains: undefined, + allow_external: true, + include_images: false, + extract_depth: 'basic', + format: 'markdown', + include_favicon: true, + timeout: 120 + }); + }); + + it('should perform crawl with instructions', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'docs.tavily.com', + results: [ + { + url: 'https://docs.tavily.com/sdk/python/quick-start', + raw_content: '# Python SDK Quick Start\nThis is the Python SDK documentation.' + } + ], + response_time: 8.76, + request_id: 'crawl-instructions-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'docs.tavily.com', + instructions: 'Find all pages about the Python SDK', + maxDepth: 2, + maxBreadth: 15, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + + expect(result.results).toHaveLength(1); + expect(result.results[0].url).toBe('https://docs.tavily.com/sdk/python/quick-start'); + expect(mockClient.post).toHaveBeenCalledWith( + '/crawl', + expect.objectContaining({ + api_key: testApiKey, + url: 'docs.tavily.com', + instructions: 'Find all pages about the Python SDK' + }) + ); + }); + + it('should handle select and exclude paths correctly', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'example.com', + results: [ + { + url: 'https://example.com/docs/api/v1', + raw_content: 'API Documentation v1' + } + ], + response_time: 5.43, + request_id: 'crawl-paths-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'example.com', + selectPaths: '/docs/.*\n/api/v1.*', + excludePaths: '/private/.*\n/admin/.*', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + + expect(result.results).toHaveLength(1); + expect(mockClient.post).toHaveBeenCalledWith( + '/crawl', + expect.objectContaining({ + select_paths: ['/docs/.*', '/api/v1.*'], + exclude_paths: ['/private/.*', '/admin/.*'] + }) + ); + }); + + it('should handle empty select/exclude paths', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'example.com', + results: [], + response_time: 2.1, + request_id: 'crawl-empty-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'example.com', + selectPaths: '', + excludePaths: '', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + + expect(result.results).toEqual([]); + expect(mockClient.post).toHaveBeenCalledWith( + '/crawl', + expect.objectContaining({ + select_paths: undefined, + exclude_paths: undefined + }) + ); + }); + + it('should handle advanced extraction', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'data-heavy-site.com', + results: [ + { + url: 'https://data-heavy-site.com/tables', + raw_content: + '# Data Tables\n| Column 1 | Column 2 |\n|----------|----------|\n| Data 1 | Data 2 |' + } + ], + response_time: 15.67, + request_id: 'crawl-advanced-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'data-heavy-site.com', + extractDepth: 'advanced', + includeImages: true, + format: 'text', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeFavicon: false, + timeout: 150 + }); + + expect(result.results).toHaveLength(1); + expect(result.results[0].raw_content).toContain('Data Tables'); + expect(mockClient.post).toHaveBeenCalledWith( + '/crawl', + expect.objectContaining({ + extract_depth: 'advanced', + include_images: true, + format: 'text' + }) + ); + }); + + it('should handle maximum depth and breadth limits', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'deep-site.com', + results: [ + { + url: 'https://deep-site.com/page1', + raw_content: 'Page 1 content' + }, + { + url: 'https://deep-site.com/page2', + raw_content: 'Page 2 content' + } + ], + response_time: 25.89, + request_id: 'crawl-limits-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'deep-site.com', + maxDepth: 3, + maxBreadth: 5, + limit: 10, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + + expect(result.results).toHaveLength(2); + expect(mockClient.post).toHaveBeenCalledWith( + '/crawl', + expect.objectContaining({ + max_depth: 3, + max_breadth: 5, + limit: 10 + }) + ); + }); + + it('should validate API key format', async () => { + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => { + throw new Error('Invalid Tavily API key format. Key should start with "tvly-"'); + }); + + await expect( + tool({ + tavilyApiKey: 'invalid-key', + url: 'example.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }) + ).rejects.toMatch('Invalid Tavily API key format'); + }); + + it('should handle authentication error', async () => { + const mockClient = { + post: vi.fn().mockRejectedValue({ + isAxiosError: true, + response: { + status: 401, + data: { detail: { error: 'Invalid API key' } } + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + vi.spyOn(clientModule, 'handleTavilyError').mockReturnValue( + 'Authentication failed: Invalid Tavily API key' + ); + + await expect( + tool({ + tavilyApiKey: testApiKey, + url: 'example.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }) + ).rejects.toMatch('Authentication failed'); + }); + + it('should handle rate limit error', async () => { + const mockClient = { + post: vi.fn().mockRejectedValue({ + isAxiosError: true, + response: { + status: 429, + data: { detail: { error: 'Rate limit exceeded' } } + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + vi.spyOn(clientModule, 'handleTavilyError').mockReturnValue( + 'Rate limit exceeded. Please wait before making more requests.' + ); + + await expect( + tool({ + tavilyApiKey: testApiKey, + url: 'example.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }) + ).rejects.toMatch('Rate limit exceeded'); + }); + + it('should handle forbidden URL error', async () => { + const mockClient = { + post: vi.fn().mockRejectedValue({ + isAxiosError: true, + response: { + status: 403, + data: { detail: { error: 'URL is not supported' } } + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + vi.spyOn(clientModule, 'handleTavilyError').mockReturnValue( + 'Forbidden: URL is not supported' + ); + + await expect( + tool({ + tavilyApiKey: testApiKey, + url: 'forbidden-site.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }) + ).rejects.toMatch('Forbidden'); + }); + + it('should handle timeout error', async () => { + const mockClient = { + post: vi.fn().mockRejectedValue({ + isAxiosError: true, + code: 'ECONNABORTED', + message: 'timeout of 150000ms exceeded' + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + vi.spyOn(clientModule, 'handleTavilyError').mockReturnValue( + 'Request timeout. Please check your network connection.' + ); + + await expect( + tool({ + tavilyApiKey: testApiKey, + url: 'slow-site.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }) + ).rejects.toMatch('Request timeout'); + }); + }); + + describe('Integration Tests (Real API)', () => { + const skipIntegration = !process.env.TEST_TAVLIY_KEY; + + it.skipIf(skipIntegration)( + 'should perform real basic crawl', + async () => { + const result = await tool({ + tavilyApiKey: process.env.TEST_TAVLIY_KEY!, + url: 'docs.tavily.com', + maxDepth: 1, + maxBreadth: 10, + limit: 15, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: true, + timeout: 150 + }); + + expect(result.baseUrl).toBe('docs.tavily.com'); + expect(result.results.length).toBeGreaterThan(0); + expect(result.results.length).toBeLessThanOrEqual(15); + expect(result.successCount).toBe(result.results.length); + expect(result.responseTime).toBeGreaterThan(0); + + // Validate result structure + result.results.forEach((crawlResult) => { + expect(crawlResult).toHaveProperty('url'); + expect(crawlResult).toHaveProperty('raw_content'); + expect(typeof crawlResult.url).toBe('string'); + expect(typeof crawlResult.raw_content).toBe('string'); + expect(crawlResult.url.length).toBeGreaterThan(0); + expect(crawlResult.raw_content.length).toBeGreaterThan(0); + }); + }, + 180000 // 3 minutes timeout for crawl operations + ); + + it.skipIf(skipIntegration)( + 'should perform real crawl with instructions', + async () => { + const result = await tool({ + tavilyApiKey: process.env.TEST_TAVLIY_KEY!, + url: 'docs.tavily.com', + instructions: 'Find pages about the Python SDK', + maxDepth: 1, + limit: 10, + maxBreadth: 20, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + + expect(result.baseUrl).toBe('docs.tavily.com'); + expect(result.results.length).toBeGreaterThan(0); + + // Results should contain content related to Python SDK + const hasPythonContent = result.results.some( + (r) => + r.raw_content.toLowerCase().includes('python') || r.url.toLowerCase().includes('python') + ); + expect(hasPythonContent).toBe(true); + }, + 180000 + ); + + it.skipIf(skipIntegration)( + 'should perform real crawl with path filtering', + async () => { + const result = await tool({ + tavilyApiKey: process.env.TEST_TAVLIY_KEY!, + url: 'docs.tavily.com', + selectPaths: '/documentation/.*', + excludePaths: '/private/.*', + maxDepth: 1, + limit: 10, + maxBreadth: 20, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + + expect(result.baseUrl).toBe('docs.tavily.com'); + + // All URLs should match the select_paths pattern + result.results.forEach((crawlResult) => { + expect(crawlResult.url).toMatch(/\/documentation\/.*/); + }); + }, + 180000 + ); + }); +}); diff --git a/modules/tool/packages/tavily/children/crawl/test/performance.test.ts b/modules/tool/packages/tavily/children/crawl/test/performance.test.ts new file mode 100644 index 00000000..0af61462 --- /dev/null +++ b/modules/tool/packages/tavily/children/crawl/test/performance.test.ts @@ -0,0 +1,244 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { tool } from '../src'; +import * as clientModule from '../../../client'; + +describe('Tavily Crawl Tool - Performance Tests', () => { + const testApiKey = 'tvly-test-key-1234567890abcdefgh'; + + beforeEach(() => { + vi.restoreAllMocks(); + }); + + describe('Large Scale Crawls', () => { + it('should handle large number of results efficiently', async () => { + const mockResults = Array.from({ length: 100 }, (_, i) => ({ + url: `https://example.com/page${i + 1}`, + raw_content: `Content for page ${i + 1}`, + favicon: `https://example.com/favicon${i + 1}.ico` + })); + + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'example.com', + results: mockResults, + response_time: 45.67, + request_id: 'large-crawl-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'example.com', + limit: 100, + maxDepth: 2, + maxBreadth: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + + expect(result.results).toHaveLength(100); + expect(result.successCount).toBe(100); + expect(result.responseTime).toBe(45.67); + }); + + it('should handle memory-intensive operations', async () => { + const largeContent = 'A'.repeat(10000); // 10KB of content per result + const mockResults = Array.from({ length: 50 }, (_, i) => ({ + url: `https://large-content-site.com/page${i + 1}`, + raw_content: `${largeContent}\nPage ${i + 1} content here.`, + favicon: `https://large-content-site.com/favicon.ico` + })); + + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'large-content-site.com', + results: mockResults, + response_time: 78.9, + request_id: 'memory-intensive-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const startTime = Date.now(); + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'large-content-site.com', + limit: 50, + extractDepth: 'advanced', + maxDepth: 1, + maxBreadth: 20, + allowExternal: true, + includeImages: false, + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + const endTime = Date.now(); + + expect(result.results).toHaveLength(50); + expect(result.results[0].raw_content.length).toBeGreaterThan(10000); + expect(endTime - startTime).toBeLessThan(5000); // Should complete within 5 seconds + }); + }); + + describe('Network Resilience', () => { + it('should handle intermittent network failures gracefully', async () => { + let callCount = 0; + const mockClient = { + post: vi.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + return Promise.reject({ + isAxiosError: true, + code: 'ECONNRESET', + message: 'Connection reset by peer' + }); + } + return Promise.resolve({ + data: { + base_url: 'resilient-site.com', + results: [ + { + url: 'https://resilient-site.com/page1', + raw_content: 'Successfully fetched after retry' + } + ], + response_time: 12.34, + request_id: 'resilient-request-id' + } + }); + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + // Note: This test demonstrates that the current implementation doesn't automatically retry + // In a production environment, you might want to add retry logic + await expect( + tool({ + tavilyApiKey: testApiKey, + url: 'resilient-site.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }) + ).rejects.toThrow(); + }); + }); + + describe('Input Validation Performance', () => { + it('should handle large select/exclude path lists efficiently', async () => { + const largePathList = Array.from({ length: 1000 }, (_, i) => `/path${i + 1}/.*`).join('\n'); + + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'large-path-site.com', + results: [], + response_time: 5.43, + request_id: 'large-path-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const startTime = Date.now(); + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'large-path-site.com', + selectPaths: largePathList, + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + const endTime = Date.now(); + + expect(endTime - startTime).toBeLessThan(1000); // Should process within 1 second + expect(mockClient.post).toHaveBeenCalledWith( + '/crawl', + expect.objectContaining({ + select_paths: Array.from({ length: 1000 }, (_, i) => `/path${i + 1}/.*`) + }) + ); + }); + }); + + describe('Resource Cleanup', () => { + it('should properly handle response cleanup', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'cleanup-test.com', + results: [ + { + url: 'https://cleanup-test.com/page1', + raw_content: 'Test content 1', + favicon: 'https://cleanup-test.com/favicon1.ico' + }, + { + url: 'https://cleanup-test.com/page2', + raw_content: 'Test content 2', + favicon: 'https://cleanup-test.com/favicon2.ico' + } + ], + response_time: 3.21, + request_id: 'cleanup-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'cleanup-test.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + includeImages: false, + extractDepth: 'basic', + format: 'markdown', + includeFavicon: false, + timeout: 150 + }); + + // Verify that all properties are properly handled and no undefined values exist + result.results.forEach((crawlResult) => { + expect(crawlResult.url).toBeDefined(); + expect(crawlResult.raw_content).toBeDefined(); + expect(typeof crawlResult.url).toBe('string'); + expect(typeof crawlResult.raw_content).toBe('string'); + }); + }); + }); +}); diff --git a/modules/tool/packages/tavily/children/extract/DESIGN.md b/modules/tool/packages/tavily/children/extract/DESIGN.md new file mode 100644 index 00000000..34a87772 --- /dev/null +++ b/modules/tool/packages/tavily/children/extract/DESIGN.md @@ -0,0 +1,387 @@ +# Tavily Extract + +> Extract web page content from one or more specified URLs using Tavily Extract. + +## OpenAPI + +````yaml POST /extract +paths: + path: /extract + method: post + servers: + - url: https://api.tavily.com/ + request: + security: + - title: bearerAuth + parameters: + query: {} + header: + Authorization: + type: http + scheme: bearer + description: >- + Bearer authentication header in the form Bearer , where + is your Tavily API key (e.g., Bearer tvly-YOUR_API_KEY). + cookie: {} + parameters: + path: {} + query: {} + header: {} + cookie: {} + body: + application/json: + schemaArray: + - type: object + properties: + urls: + allOf: + - oneOf: + - type: string + description: The URL to extract content from. + example: https://en.wikipedia.org/wiki/Artificial_intelligence + - type: array + items: + type: string + description: A list of URLs to extract content from. + example: + - >- + https://en.wikipedia.org/wiki/Artificial_intelligence + - https://en.wikipedia.org/wiki/Machine_learning + - https://en.wikipedia.org/wiki/Data_science + include_images: + allOf: + - type: boolean + description: >- + Include a list of images extracted from the URLs in the + response. Default is false. + default: false + include_favicon: + allOf: + - type: boolean + description: Whether to include the favicon URL for each result. + default: false + extract_depth: + allOf: + - type: string + description: >- + The depth of the extraction process. `advanced` extraction + retrieves more data, including tables and embedded + content, with higher success but may increase + latency.`basic` extraction costs 1 credit per 5 successful + URL extractions, while `advanced` extraction costs 2 + credits per 5 successful URL extractions. + enum: + - basic + - advanced + default: basic + format: + allOf: + - type: string + description: >- + The format of the extracted web page content. `markdown` + returns content in markdown format. `text` returns plain + text and may increase latency. + enum: + - markdown + - text + default: markdown + timeout: + allOf: + - type: number + format: float + description: >- + Maximum time in seconds to wait for the URL extraction + before timing out. Must be between 1.0 and 60.0 seconds. + If not specified, default timeouts are applied based on + extract_depth: 10 seconds for basic extraction and 30 + seconds for advanced extraction. + minimum: 1 + maximum: 60 + default: None + required: true + requiredProperties: + - urls + examples: + example: + value: + urls: https://en.wikipedia.org/wiki/Artificial_intelligence + include_images: false + include_favicon: false + extract_depth: basic + format: markdown + timeout: None + description: Parameters for the Tavily Extract request. + codeSamples: + - label: Python SDK + lang: python + source: >- + from tavily import TavilyClient + + + tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY") + + response = + tavily_client.extract("https://en.wikipedia.org/wiki/Artificial_intelligence") + + + print(response) + - label: JavaScript SDK + lang: javascript + source: >- + const { tavily } = require("@tavily/core"); + + + const tvly = tavily({ apiKey: "tvly-YOUR_API_KEY" }); + + const response = await + tvly.extract("https://en.wikipedia.org/wiki/Artificial_intelligence"); + + + console.log(response); + response: + '200': + application/json: + schemaArray: + - type: object + properties: + results: + allOf: + - type: array + description: A list of extracted content from the provided URLs. + items: + type: object + properties: + url: + type: string + description: The URL from which the content was extracted. + example: >- + https://en.wikipedia.org/wiki/Artificial_intelligence + raw_content: + type: string + description: The full content extracted from the page. + example: >- + "Jump to content\nMain + menu\nSearch\nAppearance\nDonate\nCreate + account\nLog in\nPersonal tools\n Photograph + your local culture, help Wikipedia and win!\nToggle + the table of contents\nArtificial intelligence\n161 + languages\nArticle\nTalk\nRead\nView source\nView + history\nTools\nFrom Wikipedia, the free + encyclopedia\n\"AI\" redirects here. For other uses, + see AI (disambiguation) and Artificial intelligence + (disambiguation).\nPart of a series on\nArtificial + intelligence (AI)\nshow\nMajor + goals\nshow\nApproaches\nshow\nApplications\nshow\nPhilosophy\nshow\nHistory\nshow\nGlossary\nvte\nArtificial + intelligence (AI), in its broadest sense, is + intelligence exhibited by machines, particularly + computer systems. It is a field of research in + computer science that develops and studies methods + and software that enable machines to perceive their + environment and use learning and intelligence to + take actions that maximize their chances of + achieving defined goals.[1] Such machines may be + called AIs.\nHigh-profile applications of AI include + advanced web search engines (e.g., Google Search); + recommendation systems (used by YouTube, Amazon, and + Netflix); virtual assistants (e.g., Google + Assistant, Siri, and Alexa); autonomous vehicles + (e.g., Waymo); generative and creative tools (e.g., + ChatGPT and AI art); and superhuman play and + analysis in strategy games (e.g., chess and + Go)................... + images: + type: array + example: [] + description: >- + This is only available if `include_images` is set to + `true`. A list of image URLs extracted from the + page. + items: + type: string + favicon: + type: string + description: The favicon URL for the result. + example: >- + https://en.wikipedia.org/static/favicon/wikipedia.ico + failed_results: + allOf: + - type: array + example: [] + description: A list of URLs that could not be processed. + items: + type: object + properties: + url: + type: string + description: The URL that failed to be processed. + error: + type: string + description: >- + An error message describing why the URL couldn't be + processed. + response_time: + allOf: + - type: number + format: float + description: Time in seconds it took to complete the request. + example: 0.02 + request_id: + allOf: + - type: string + description: >- + A unique request identifier you can share with customer + support to help resolve issues with specific requests. + example: 123e4567-e89b-12d3-a456-426614174111 + examples: + example: + value: + results: + - url: https://en.wikipedia.org/wiki/Artificial_intelligence + raw_content: >- + "Jump to content\nMain + menu\nSearch\nAppearance\nDonate\nCreate account\nLog + in\nPersonal tools\n Photograph your local culture, + help Wikipedia and win!\nToggle the table of + contents\nArtificial intelligence\n161 + languages\nArticle\nTalk\nRead\nView source\nView + history\nTools\nFrom Wikipedia, the free + encyclopedia\n\"AI\" redirects here. For other uses, see AI + (disambiguation) and Artificial intelligence + (disambiguation).\nPart of a series on\nArtificial + intelligence (AI)\nshow\nMajor + goals\nshow\nApproaches\nshow\nApplications\nshow\nPhilosophy\nshow\nHistory\nshow\nGlossary\nvte\nArtificial + intelligence (AI), in its broadest sense, is intelligence + exhibited by machines, particularly computer systems. It is + a field of research in computer science that develops and + studies methods and software that enable machines to + perceive their environment and use learning and intelligence + to take actions that maximize their chances of achieving + defined goals.[1] Such machines may be called + AIs.\nHigh-profile applications of AI include advanced web + search engines (e.g., Google Search); recommendation systems + (used by YouTube, Amazon, and Netflix); virtual assistants + (e.g., Google Assistant, Siri, and Alexa); autonomous + vehicles (e.g., Waymo); generative and creative tools (e.g., + ChatGPT and AI art); and superhuman play and analysis in + strategy games (e.g., chess and Go)................... + images: [] + favicon: https://en.wikipedia.org/static/favicon/wikipedia.ico + failed_results: [] + response_time: 0.02 + request_id: 123e4567-e89b-12d3-a456-426614174111 + description: Extraction results returned successfully + '400': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: <400 Bad Request, (e.g Max 20 URLs are allowed.)> + description: Bad Request + '401': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: 'Unauthorized: missing or invalid API key.' + description: Unauthorized - Your API key is wrong or missing. + '429': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + Your request has been blocked due to excessive requests. + Please reduce rate of requests. + description: Too many requests - Rate limit exceeded + '432': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + <432 Custom Forbidden Error (e.g This request exceeds your + plan's set usage limit. Please upgrade your plan or contact + support@tavily.com)> + description: Key limit or Plan Limit exceeded + '433': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + This request exceeds the pay-as-you-go limit. You can increase + your limit on the Tavily dashboard. + description: PayGo limit exceeded + '500': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: Internal Server Error + description: Internal Server Error - We had a problem with our server. + deprecated: false + type: path +components: + schemas: {} + +```` diff --git a/modules/tool/packages/tavily/children/extract/config.ts b/modules/tool/packages/tavily/children/extract/config.ts index dfe46d20..2d4befe2 100644 --- a/modules/tool/packages/tavily/children/extract/config.ts +++ b/modules/tool/packages/tavily/children/extract/config.ts @@ -15,6 +15,94 @@ export default defineTool({ 'Supports batch extraction from multiple URLs.', versionList: [ + { + value: '0.1.1', + description: '支持高级配置', + inputs: [ + { + key: 'urls', + label: 'URL 地址', + description: '单个或多个 URL (多个用换行分隔)', + required: true, + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Single URL or multiple URLs (one per line)' + }, + { + key: 'extract_depth', + label: '提取深度', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.select], + defaultValue: 'basic', + list: [ + { + label: '基础', + value: 'basic' + }, + { + label: '高级', + value: 'advanced' + } + ] + }, + { + key: 'format', + label: '输出格式', + description: '内容输出格式', + valueType: WorkflowIOValueTypeEnum.string, + defaultValue: 'markdown', + renderTypeList: [FlowNodeInputTypeEnum.select], + list: [ + { label: 'Markdown', value: 'markdown' }, + { label: 'Text', value: 'text' } + ] + }, + { + key: 'include_images', + label: '包含图片', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.switch] + }, + { + key: 'include_favicon', + label: '包含 favicon', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.switch] + }, + { + key: 'timeout', + label: '超时时间(秒)', + description: '最大等待时间(1-60秒),根据提取深度设置默认值', + valueType: WorkflowIOValueTypeEnum.number, + min: 1, + max: 60, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Maximum time in seconds to wait before timing out (1-60)' + } + ], + outputs: [ + { + key: 'results', + label: '提取结果', + description: '成功提取的内容数组', + valueType: WorkflowIOValueTypeEnum.arrayObject + }, + { + key: 'successCount', + label: '成功数量', + description: '成功提取的 URL 数量', + valueType: WorkflowIOValueTypeEnum.number + }, + { + key: 'failedUrls', + label: '失败列表', + description: '提取失败的 URL 及原因', + valueType: WorkflowIOValueTypeEnum.arrayString + } + ] + }, { value: '0.1.0', description: 'Initial version with content extraction', diff --git a/modules/tool/packages/tavily/children/extract/src/index.ts b/modules/tool/packages/tavily/children/extract/src/index.ts index ff337eb7..54bfd18f 100644 --- a/modules/tool/packages/tavily/children/extract/src/index.ts +++ b/modules/tool/packages/tavily/children/extract/src/index.ts @@ -6,7 +6,11 @@ import type { ExtractRequest, ExtractResponse } from '../../../types'; export const InputType = z.object({ tavilyApiKey: z.string().min(1, 'Tavily API key is required'), urls: z.string().min(1, 'At least one URL is required'), - format: z.enum(['markdown', 'text']).default('markdown') + format: z.enum(['markdown', 'text']).default('markdown'), + extract_depth: z.enum(['basic', 'advanced']).default('basic'), + include_images: z.boolean().default(false), + include_favicon: z.boolean().default(false), + timeout: z.number().min(1).max(60).optional() }); // 输出类型 @@ -16,7 +20,8 @@ export const OutputType = z.object({ z.object({ url: z.string(), raw_content: z.string(), - images: z.array(z.string()).optional() + images: z.array(z.string()).optional(), + favicon: z.string().optional() }) ) .default([]), @@ -27,7 +32,11 @@ export const OutputType = z.object({ export async function tool({ tavilyApiKey, urls, - format + format, + extract_depth, + include_images, + include_favicon, + timeout }: z.infer): Promise> { try { // 1. 验证 API Key @@ -50,7 +59,11 @@ export async function tool({ const requestBody: ExtractRequest = { api_key: tavilyApiKey, urls: urlList.length === 1 ? urlList[0] : urlList, - format + format, + extract_depth, + include_images, + include_favicon, + timeout: timeout || undefined }; // 5. 发送请求 diff --git a/modules/tool/packages/tavily/children/extract/test/index.test.ts b/modules/tool/packages/tavily/children/extract/test/index.test.ts index 7a70b36a..54b2ee2f 100644 --- a/modules/tool/packages/tavily/children/extract/test/index.test.ts +++ b/modules/tool/packages/tavily/children/extract/test/index.test.ts @@ -33,7 +33,10 @@ describe('Tavily Extract Tool', () => { const result = await tool({ tavilyApiKey: testApiKey, urls: 'https://example.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); expect(result.results).toHaveLength(1); @@ -44,7 +47,10 @@ describe('Tavily Extract Tool', () => { expect(mockClient.post).toHaveBeenCalledWith('/extract', { api_key: testApiKey, urls: 'https://example.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); }); @@ -76,7 +82,10 @@ describe('Tavily Extract Tool', () => { const result = await tool({ tavilyApiKey: testApiKey, urls: 'https://example1.com\nhttps://example2.com', - format: 'text' + format: 'text', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); expect(result.results).toHaveLength(2); @@ -85,7 +94,10 @@ describe('Tavily Extract Tool', () => { expect(mockClient.post).toHaveBeenCalledWith('/extract', { api_key: testApiKey, urls: ['https://example1.com', 'https://example2.com'], - format: 'text' + format: 'text', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); }); @@ -117,14 +129,20 @@ describe('Tavily Extract Tool', () => { const result = await tool({ tavilyApiKey: testApiKey, urls: 'https://example1.com\n\n\nhttps://example2.com\n', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); expect(result.results).toHaveLength(2); expect(mockClient.post).toHaveBeenCalledWith('/extract', { api_key: testApiKey, urls: ['https://example1.com', 'https://example2.com'], - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); }); @@ -161,7 +179,10 @@ describe('Tavily Extract Tool', () => { const result = await tool({ tavilyApiKey: testApiKey, urls: 'https://example1.com\nhttps://invalid.com\nhttps://timeout.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); expect(result.results).toHaveLength(1); @@ -181,7 +202,10 @@ describe('Tavily Extract Tool', () => { tool({ tavilyApiKey: testApiKey, urls: '\n\n\n', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }) ).rejects.toMatch('No valid URLs provided'); }); @@ -195,7 +219,10 @@ describe('Tavily Extract Tool', () => { tool({ tavilyApiKey: 'invalid-key', urls: 'https://example.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }) ).rejects.toMatch('Invalid Tavily API key format'); }); @@ -221,7 +248,10 @@ describe('Tavily Extract Tool', () => { tool({ tavilyApiKey: testApiKey, urls: 'https://example.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }) ).rejects.toMatch('Authentication failed'); }); @@ -247,7 +277,10 @@ describe('Tavily Extract Tool', () => { tool({ tavilyApiKey: testApiKey, urls: 'https://example.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }) ).rejects.toMatch('Rate limit exceeded'); }); @@ -275,13 +308,19 @@ describe('Tavily Extract Tool', () => { await tool({ tavilyApiKey: testApiKey, urls: 'https://example.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); expect(mockClient.post).toHaveBeenCalledWith('/extract', { api_key: testApiKey, urls: 'https://example.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); }); }); @@ -296,7 +335,10 @@ describe('Tavily Extract Tool', () => { const result = await tool({ tavilyApiKey: process.env.TEST_TAVLIY_KEY!, urls: 'https://example.com', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); expect(result.results.length).toBeGreaterThan(0); @@ -314,7 +356,10 @@ describe('Tavily Extract Tool', () => { const result = await tool({ tavilyApiKey: process.env.TEST_TAVLIY_KEY!, urls: 'https://example.com\nhttps://www.iana.org', - format: 'text' + format: 'text', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); expect(result.results.length).toBeGreaterThan(0); @@ -336,7 +381,10 @@ describe('Tavily Extract Tool', () => { const result = await tool({ tavilyApiKey: process.env.TEST_TAVLIY_KEY!, urls: 'https://this-url-does-not-exist-12345.invalid', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); // Should either fail completely or report in failedUrls diff --git a/modules/tool/packages/tavily/children/extract/test/simple.test.ts b/modules/tool/packages/tavily/children/extract/test/simple.test.ts index d1baa165..0d8af4a1 100644 --- a/modules/tool/packages/tavily/children/extract/test/simple.test.ts +++ b/modules/tool/packages/tavily/children/extract/test/simple.test.ts @@ -11,7 +11,10 @@ describe('Tavily Extract Simple Test', () => { const result = await tool({ tavilyApiKey: testApiKey!, urls: 'https://doc.fastgpt.io/docs/introduction', - format: 'markdown' + format: 'markdown', + extract_depth: 'basic', + include_images: false, + include_favicon: false }); // 验证是否成功获取内容 diff --git a/modules/tool/packages/tavily/children/map/DESIGN.md b/modules/tool/packages/tavily/children/map/DESIGN.md new file mode 100644 index 00000000..e69de29b diff --git a/modules/tool/packages/tavily/children/map/config.ts b/modules/tool/packages/tavily/children/map/config.ts new file mode 100644 index 00000000..6a71a8ec --- /dev/null +++ b/modules/tool/packages/tavily/children/map/config.ts @@ -0,0 +1,151 @@ +import { defineTool } from '@tool/type'; +import { FlowNodeInputTypeEnum, WorkflowIOValueTypeEnum } from '@tool/type/fastgpt'; + +export default defineTool({ + name: { + 'zh-CN': '网站地图', + en: 'Site Map' + }, + description: { + 'zh-CN': '像图一样遍历网站,并行探索数百个路径以生成全面的站点地图', + en: 'Traverse websites like a graph and explore hundreds of paths in parallel to generate comprehensive site maps' + }, + toolDescription: + 'Map website structure by discovering and cataloging all accessible URLs. ' + + 'Perfect for understanding site architecture, content inventory, and planning crawls.', + + versionList: [ + { + value: '0.1.0', + description: 'Initial version with intelligent site mapping capabilities', + inputs: [ + { + key: 'url', + label: '起始 URL', + description: '开始映射的根 URL', + required: true, + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.reference, FlowNodeInputTypeEnum.input], + toolDescription: 'The root URL to begin the mapping' + }, + { + key: 'instructions', + label: '映射指令', + description: '自然语言指令,指导映射器查找特定内容(使用会增加成本)', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Natural language instructions for the crawler' + }, + { + key: 'maxDepth', + label: '最大深度', + description: '映射的最大深度(1-5)', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 1, + min: 1, + max: 5, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Max depth of the mapping' + }, + { + key: 'maxBreadth', + label: '最大广度', + description: '每层跟随的最大链接数', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 20, + min: 1, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Max number of links to follow per level' + }, + { + key: 'limit', + label: '总限制', + description: '处理的总链接数上限', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 50, + min: 1, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Total number of links to process' + }, + { + key: 'selectPaths', + label: '包含路径', + description: '正则表达式模式,选择特定路径(每行一个)', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Regex patterns to select specific path patterns' + }, + { + key: 'selectDomains', + label: '包含域名', + description: '正则表达式模式,选择特定域名或子域名(每行一个)', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Regex patterns to select specific domains or subdomains' + }, + { + key: 'excludePaths', + label: '排除路径', + description: '正则表达式模式,排除特定路径(每行一个)', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Regex patterns to exclude specific path patterns' + }, + { + key: 'excludeDomains', + label: '排除域名', + description: '正则表达式模式,排除特定域名或子域名(每行一个)', + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.textarea, FlowNodeInputTypeEnum.reference], + toolDescription: 'Regex patterns to exclude specific domains or subdomains' + }, + { + key: 'allowExternal', + label: '允许外部链接', + description: '是否在最终结果列表中包含外部域链接', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: true, + renderTypeList: [FlowNodeInputTypeEnum.switch], + toolDescription: 'Whether to include external domain links' + }, + { + key: 'timeout', + label: '超时时间(秒)', + description: '映射操作的最大等待时间(10-150秒)', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 150, + min: 10, + max: 150, + renderTypeList: [FlowNodeInputTypeEnum.numberInput], + toolDescription: 'Maximum time in seconds to wait before timing out' + } + ], + outputs: [ + { + key: 'baseUrl', + label: '基础 URL', + description: '被映射的基础 URL', + valueType: WorkflowIOValueTypeEnum.string + }, + { + key: 'results', + label: '发现的 URL', + description: '映射过程中发现的 URL 列表', + valueType: WorkflowIOValueTypeEnum.arrayString + }, + { + key: 'urlCount', + label: 'URL 数量', + description: '发现的 URL 总数', + valueType: WorkflowIOValueTypeEnum.number + }, + { + key: 'responseTime', + label: '响应时间', + description: '完成请求所花费的时间(秒)', + valueType: WorkflowIOValueTypeEnum.number + } + ] + } + ] +}); diff --git a/modules/tool/packages/tavily/children/map/index.ts b/modules/tool/packages/tavily/children/map/index.ts new file mode 100644 index 00000000..d698ed48 --- /dev/null +++ b/modules/tool/packages/tavily/children/map/index.ts @@ -0,0 +1,10 @@ +import config from './config'; +import { InputType, OutputType, tool as toolCb } from './src'; +import { exportTool } from '@tool/utils/tool'; + +export default exportTool({ + toolCb, + InputType, + OutputType, + config +}); diff --git a/modules/tool/packages/tavily/children/map/src/index.ts b/modules/tool/packages/tavily/children/map/src/index.ts new file mode 100644 index 00000000..62a32de5 --- /dev/null +++ b/modules/tool/packages/tavily/children/map/src/index.ts @@ -0,0 +1,91 @@ +import { z } from 'zod'; +import { createTavilyClient, handleTavilyError, validateApiKey } from '../../../client'; +import type { MapRequest, MapResponse } from '../../../types'; + +// 输入类型 (包含父级密钥) +export const InputType = z.object({ + tavilyApiKey: z.string().min(1, 'Tavily API key is required'), + url: z.string().min(1, 'URL is required'), + instructions: z.string().optional(), + maxDepth: z.number().int().min(1).max(5).default(1), + maxBreadth: z.number().int().min(1).default(20), + limit: z.number().int().min(1).default(50), + selectPaths: z.string().optional(), + selectDomains: z.string().optional(), + excludePaths: z.string().optional(), + excludeDomains: z.string().optional(), + allowExternal: z.boolean().default(true), + timeout: z.number().min(10).max(150).default(150) +}); + +// 输出类型 +export const OutputType = z.object({ + baseUrl: z.string(), + results: z.array(z.string()).default([]), + urlCount: z.number(), + responseTime: z.number() +}); + +export async function tool({ + tavilyApiKey, + url, + instructions, + maxDepth, + maxBreadth, + limit, + selectPaths, + selectDomains, + excludePaths, + excludeDomains, + allowExternal, + timeout +}: z.infer): Promise> { + try { + // 1. 验证 API Key + validateApiKey(tavilyApiKey); + + // 2. 创建客户端 + const client = createTavilyClient(tavilyApiKey); + + // 3. 处理数组类型的参数 + const parseRegexPatterns = (input?: string): string[] | undefined => { + if (!input) return undefined; + + return input + .split('\n') + .map((pattern) => pattern.trim()) + .filter((pattern) => pattern.length > 0); + }; + + // 4. 构建请求 + const requestBody: MapRequest = { + api_key: tavilyApiKey, + url, + instructions: instructions || undefined, + max_depth: maxDepth, + max_breadth: maxBreadth, + limit, + select_paths: parseRegexPatterns(selectPaths), + select_domains: parseRegexPatterns(selectDomains), + exclude_paths: parseRegexPatterns(excludePaths), + exclude_domains: parseRegexPatterns(excludeDomains), + allow_external: allowExternal, + timeout + }; + + // 5. 发送请求 + const response = await client.post('/map', requestBody); + + console.log(response.data); + + // 6. 格式化输出 + return { + baseUrl: response.data.base_url, + results: response.data.results || [], + urlCount: (response.data.results || []).length, + responseTime: response.data.response_time + }; + } catch (error) { + return Promise.reject(handleTavilyError(error)); + } +} diff --git a/modules/tool/packages/tavily/children/map/test/index.test.ts b/modules/tool/packages/tavily/children/map/test/index.test.ts new file mode 100644 index 00000000..b4deb572 --- /dev/null +++ b/modules/tool/packages/tavily/children/map/test/index.test.ts @@ -0,0 +1,491 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { tool } from '../src'; +import * as clientModule from '../../../client'; + +describe('Tavily Map Tool', () => { + const testApiKey = process.env.TEST_TAVLIY_KEY || 'tvly-test-key-1234567890abcdefgh'; + + beforeEach(() => { + vi.restoreAllMocks(); + }); + + describe('Unit Tests (Mocked)', () => { + it('should perform basic site mapping successfully', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'docs.tavily.com', + results: [ + 'https://docs.tavily.com/welcome', + 'https://docs.tavily.com/documentation/api-credits', + 'https://docs.tavily.com/documentation/about', + 'https://docs.tavily.com/sdk/python/quick-start' + ], + response_time: 3.45, + request_id: 'map-test-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'docs.tavily.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 150 + }); + + expect(result.baseUrl).toBe('docs.tavily.com'); + expect(result.results).toHaveLength(4); + expect(result.urlCount).toBe(4); + expect(result.responseTime).toBe(3.45); + expect(result.results[0]).toBe('https://docs.tavily.com/welcome'); + expect(mockClient.post).toHaveBeenCalledWith('/map', { + api_key: testApiKey, + url: 'docs.tavily.com', + instructions: undefined, + max_depth: 1, + max_breadth: 20, + limit: 50, + select_paths: undefined, + select_domains: undefined, + exclude_paths: undefined, + exclude_domains: undefined, + allow_external: true, + timeout: 150 + }); + }); + + it('should perform mapping with instructions', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'docs.tavily.com', + results: [ + 'https://docs.tavily.com/sdk/python/quick-start', + 'https://docs.tavily.com/sdk/python/advanced', + 'https://docs.tavily.com/sdk/python/examples' + ], + response_time: 5.67, + request_id: 'map-instructions-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'docs.tavily.com', + instructions: 'Find all pages about the Python SDK', + maxDepth: 2, + maxBreadth: 20, + limit: 30, + allowExternal: true, + timeout: 150 + }); + + expect(result.results).toHaveLength(3); + expect(result.results[0]).toBe('https://docs.tavily.com/sdk/python/quick-start'); + expect(mockClient.post).toHaveBeenCalledWith( + '/map', + expect.objectContaining({ + api_key: testApiKey, + url: 'docs.tavily.com', + instructions: 'Find all pages about the Python SDK' + }) + ); + }); + + it('should handle select and exclude patterns correctly', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'example.com', + results: ['https://example.com/docs/api/v1', 'https://example.com/docs/api/v2'], + response_time: 2.89, + request_id: 'map-patterns-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'example.com', + selectPaths: '/docs/.*\n/api/.*', + selectDomains: '^example\\.com$', + excludePaths: '/private/.*', + excludeDomains: '^admin\\.example\\.com$', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 150 + }); + + expect(result.results).toHaveLength(2); + expect(mockClient.post).toHaveBeenCalledWith( + '/map', + expect.objectContaining({ + select_paths: ['/docs/.*', '/api/.*'], + select_domains: ['^example\\.com$'], + exclude_paths: ['/private/.*'], + exclude_domains: ['^admin\\.example\\.com$'] + }) + ); + }); + + it('should handle empty pattern inputs', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'simple-site.com', + results: ['https://simple-site.com/page1', 'https://simple-site.com/page2'], + response_time: 1.23, + request_id: 'map-simple-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'simple-site.com', + selectPaths: '', + selectDomains: '', + excludePaths: '', + excludeDomains: '', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 150 + }); + + expect(result.results).toHaveLength(2); + expect(mockClient.post).toHaveBeenCalledWith( + '/map', + expect.objectContaining({ + select_paths: undefined, + select_domains: undefined, + exclude_paths: undefined, + exclude_domains: undefined + }) + ); + }); + + it('should handle external link settings', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'internal-site.com', + results: ['https://internal-site.com/about', 'https://internal-site.com/contact'], + response_time: 2.1, + request_id: 'map-external-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'internal-site.com', + allowExternal: false, + maxDepth: 1, + maxBreadth: 20, + limit: 50, + timeout: 150 + }); + + expect(result.results).toHaveLength(2); + expect(mockClient.post).toHaveBeenCalledWith( + '/map', + expect.objectContaining({ + allow_external: false + }) + ); + }); + + it('should handle maximum depth and breadth limits', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'deep-site.com', + results: [ + 'https://deep-site.com/level1/page1', + 'https://deep-site.com/level1/page2', + 'https://deep-site.com/level1/level2/page3' + ], + response_time: 8.76, + request_id: 'map-limits-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'deep-site.com', + maxDepth: 3, + maxBreadth: 10, + limit: 25, + allowExternal: true, + timeout: 150 + }); + + expect(result.results).toHaveLength(3); + expect(mockClient.post).toHaveBeenCalledWith( + '/map', + expect.objectContaining({ + max_depth: 3, + max_breadth: 10, + limit: 25 + }) + ); + }); + + it('should handle timeout settings', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'slow-site.com', + results: ['https://slow-site.com/page1'], + response_time: 45.32, + request_id: 'map-timeout-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'slow-site.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 60 + }); + + expect(result.results).toHaveLength(1); + expect(mockClient.post).toHaveBeenCalledWith( + '/map', + expect.objectContaining({ + timeout: 60 + }) + ); + }); + + it('should validate API key format', async () => { + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => { + throw new Error('Invalid Tavily API key format. Key should start with "tvly-"'); + }); + + await expect( + tool({ + tavilyApiKey: 'invalid-key', + url: 'example.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 150 + }) + ).rejects.toMatch('Invalid Tavily API key format'); + }); + + it('should handle authentication error', async () => { + const mockClient = { + post: vi.fn().mockRejectedValue({ + isAxiosError: true, + response: { + status: 401, + data: { detail: { error: 'Unauthorized: missing or invalid API key' } } + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + vi.spyOn(clientModule, 'handleTavilyError').mockReturnValue( + 'Authentication failed: Invalid Tavily API key' + ); + + await expect( + tool({ + tavilyApiKey: testApiKey, + url: 'example.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 150 + }) + ).rejects.toMatch('Authentication failed'); + }); + + it('should handle rate limit error', async () => { + const mockClient = { + post: vi.fn().mockRejectedValue({ + isAxiosError: true, + response: { + status: 429, + data: { detail: { error: 'Rate limit exceeded' } } + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + vi.spyOn(clientModule, 'handleTavilyError').mockReturnValue( + 'Rate limit exceeded. Please wait before making more requests.' + ); + + await expect( + tool({ + tavilyApiKey: testApiKey, + url: 'example.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 150 + }) + ).rejects.toMatch('Rate limit exceeded'); + }); + + it('should handle forbidden URL error', async () => { + const mockClient = { + post: vi.fn().mockRejectedValue({ + isAxiosError: true, + response: { + status: 403, + data: { detail: { error: 'URL is not supported' } } + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + vi.spyOn(clientModule, 'handleTavilyError').mockReturnValue( + 'Forbidden: URL is not supported' + ); + + await expect( + tool({ + tavilyApiKey: testApiKey, + url: 'forbidden-site.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 150 + }) + ).rejects.toMatch('Forbidden'); + }); + + it('should handle empty results', async () => { + const mockClient = { + post: vi.fn().mockResolvedValue({ + data: { + base_url: 'empty-site.com', + results: [], + response_time: 0.5, + request_id: 'map-empty-request-id' + } + }) + }; + + vi.spyOn(clientModule, 'validateApiKey').mockImplementation(() => {}); + vi.spyOn(clientModule, 'createTavilyClient').mockReturnValue(mockClient as any); + + const result = await tool({ + tavilyApiKey: testApiKey, + url: 'empty-site.com', + maxDepth: 1, + maxBreadth: 20, + limit: 50, + allowExternal: true, + timeout: 150 + }); + + expect(result.results).toEqual([]); + expect(result.urlCount).toBe(0); + expect(result.responseTime).toBe(0.5); + }); + }); + + describe('Integration Tests (Real API)', () => { + const skipIntegration = !process.env.TEST_TAVLIY_KEY; + + it.skipIf(skipIntegration)( + 'should perform real site mapping', + async () => { + const result = await tool({ + tavilyApiKey: process.env.TEST_TAVLIY_KEY!, + url: 'docs.tavily.com', + maxDepth: 1, + maxBreadth: 20, + limit: 20, + allowExternal: true, + timeout: 150 + }); + + expect(result.baseUrl).toBe('docs.tavily.com'); + expect(result.results.length).toBeGreaterThan(0); + expect(result.urlCount).toBe(result.results.length); + expect(result.responseTime).toBeGreaterThan(0); + + // Validate URL format + result.results.forEach((url) => { + expect(typeof url).toBe('string'); + expect(url.length).toBeGreaterThan(0); + expect(url).toMatch(/^https?:\/\/.+/); + }); + }, + 120000 // 2 minutes timeout + ); + + it.skipIf(skipIntegration)( + 'should perform real mapping with path filtering', + async () => { + const result = await tool({ + tavilyApiKey: process.env.TEST_TAVLIY_KEY!, + url: 'docs.tavily.com', + selectPaths: '/documentation/.*', + maxDepth: 1, + maxBreadth: 20, + limit: 10, + allowExternal: true, + timeout: 150 + }); + + expect(result.baseUrl).toBe('docs.tavily.com'); + expect(result.results.length).toBeGreaterThan(0); + + // All URLs should match the select_paths pattern + result.results.forEach((url) => { + expect(url).toMatch(/\/documentation\/.*/); + }); + }, + 120000 + ); + }); +}); diff --git a/modules/tool/packages/tavily/children/search/DESIGN.md b/modules/tool/packages/tavily/children/search/DESIGN.md new file mode 100644 index 00000000..e74ef481 --- /dev/null +++ b/modules/tool/packages/tavily/children/search/DESIGN.md @@ -0,0 +1,693 @@ +# Tavily Search + +> Execute a search query using Tavily Search. + +## OpenAPI + +````yaml POST /search +paths: + path: /search + method: post + servers: + - url: https://api.tavily.com/ + request: + security: + - title: bearerAuth + parameters: + query: {} + header: + Authorization: + type: http + scheme: bearer + description: >- + Bearer authentication header in the form Bearer , where + is your Tavily API key (e.g., Bearer tvly-YOUR_API_KEY). + cookie: {} + parameters: + path: {} + query: {} + header: {} + cookie: {} + body: + application/json: + schemaArray: + - type: object + properties: + query: + allOf: + - type: string + description: The search query to execute with Tavily. + example: who is Leo Messi? + auto_parameters: + allOf: + - type: boolean + description: >- + When `auto_parameters` is enabled, Tavily automatically + configures search parameters based on your query's content + and intent. You can still set other parameters manually, + and your explicit values will override the automatic ones. + The parameters `include_answer`, `include_raw_content`, + and `max_results` must always be set manually, as they + directly affect response size. Note: `search_depth` may be + automatically set to advanced when it's likely to improve + results. This uses 2 API credits per request. To avoid the + extra cost, you can explicitly set `search_depth` to + `basic`. Currently in beta. + default: false + topic: + allOf: + - type: string + description: >- + The category of the search.`news` is useful for retrieving + real-time updates, particularly about politics, sports, + and major current events covered by mainstream media + sources. `general` is for broader, more general-purpose + searches that may include a wide range of sources. + default: general + enum: + - general + - news + - finance + search_depth: + allOf: + - type: string + description: >- + The depth of the search. `advanced` search is tailored to + retrieve the most relevant sources and `content` snippets + for your query, while `basic` search provides generic + content snippets from each source. A `basic` search costs + 1 API Credit, while an `advanced` search costs 2 API + Credits. + enum: + - basic + - advanced + default: basic + chunks_per_source: + allOf: + - type: integer + description: >- + Chunks are short content snippets (maximum 500 characters + each) pulled directly from the source. Use + `chunks_per_source` to define the maximum number of + relevant chunks returned per source and to control the + `content` length. Chunks will appear in the `content` + field as: ` [...] [...] `. + Available only when `search_depth` is `advanced`. + default: 3 + minimum: 1 + maximum: 3 + max_results: + allOf: + - type: integer + example: 1 + description: The maximum number of search results to return. + default: 5 + minimum: 0 + maximum: 20 + time_range: + allOf: + - type: string + description: >- + The time range back from the current date to filter + results based on publish date or last updated date. Useful + when looking for sources that have published or updated + data. + enum: + - day + - week + - month + - year + - d + - w + - m + - 'y' + default: null + start_date: + allOf: + - type: string + description: >- + Will return all results after the specified start date + based on publish date or last updated date. Required to be + written in the format YYYY-MM-DD + example: '2025-02-09' + default: null + end_date: + allOf: + - type: string + description: >- + Will return all results before the specified end date + based on publish date or last updated date. Required to be + written in the format YYYY-MM-DD + example: '2025-12-29' + default: null + include_answer: + allOf: + - oneOf: + - type: boolean + - type: string + enum: + - basic + - advanced + description: >- + Include an LLM-generated answer to the provided query. + `basic` or `true` returns a quick answer. `advanced` + returns a more detailed answer. + default: false + include_raw_content: + allOf: + - oneOf: + - type: boolean + - type: string + enum: + - markdown + - text + description: >- + Include the cleaned and parsed HTML content of each search + result. `markdown` or `true` returns search result content + in markdown format. `text` returns the plain text from the + results and may increase latency. + default: false + include_images: + allOf: + - type: boolean + description: >- + Also perform an image search and include the results in + the response. + default: false + include_image_descriptions: + allOf: + - type: boolean + description: >- + When `include_images` is `true`, also add a descriptive + text for each image. + default: false + include_favicon: + allOf: + - type: boolean + description: Whether to include the favicon URL for each result. + default: false + include_domains: + allOf: + - type: array + description: >- + A list of domains to specifically include in the search + results. Maximum 300 domains. + items: + type: string + default: [] + exclude_domains: + allOf: + - type: array + description: >- + A list of domains to specifically exclude from the search + results. Maximum 150 domains. + items: + type: string + default: [] + country: + allOf: + - type: string + description: >- + Boost search results from a specific country. This will + prioritize content from the selected country in the search + results. Available only if topic is `general`. + enum: + - afghanistan + - albania + - algeria + - andorra + - angola + - argentina + - armenia + - australia + - austria + - azerbaijan + - bahamas + - bahrain + - bangladesh + - barbados + - belarus + - belgium + - belize + - benin + - bhutan + - bolivia + - bosnia and herzegovina + - botswana + - brazil + - brunei + - bulgaria + - burkina faso + - burundi + - cambodia + - cameroon + - canada + - cape verde + - central african republic + - chad + - chile + - china + - colombia + - comoros + - congo + - costa rica + - croatia + - cuba + - cyprus + - czech republic + - denmark + - djibouti + - dominican republic + - ecuador + - egypt + - el salvador + - equatorial guinea + - eritrea + - estonia + - ethiopia + - fiji + - finland + - france + - gabon + - gambia + - georgia + - germany + - ghana + - greece + - guatemala + - guinea + - haiti + - honduras + - hungary + - iceland + - india + - indonesia + - iran + - iraq + - ireland + - israel + - italy + - jamaica + - japan + - jordan + - kazakhstan + - kenya + - kuwait + - kyrgyzstan + - latvia + - lebanon + - lesotho + - liberia + - libya + - liechtenstein + - lithuania + - luxembourg + - madagascar + - malawi + - malaysia + - maldives + - mali + - malta + - mauritania + - mauritius + - mexico + - moldova + - monaco + - mongolia + - montenegro + - morocco + - mozambique + - myanmar + - namibia + - nepal + - netherlands + - new zealand + - nicaragua + - niger + - nigeria + - north korea + - north macedonia + - norway + - oman + - pakistan + - panama + - papua new guinea + - paraguay + - peru + - philippines + - poland + - portugal + - qatar + - romania + - russia + - rwanda + - saudi arabia + - senegal + - serbia + - singapore + - slovakia + - slovenia + - somalia + - south africa + - south korea + - south sudan + - spain + - sri lanka + - sudan + - sweden + - switzerland + - syria + - taiwan + - tajikistan + - tanzania + - thailand + - togo + - trinidad and tobago + - tunisia + - turkey + - turkmenistan + - uganda + - ukraine + - united arab emirates + - united kingdom + - united states + - uruguay + - uzbekistan + - venezuela + - vietnam + - yemen + - zambia + - zimbabwe + default: null + required: true + requiredProperties: + - query + examples: + example: + value: + query: who is Leo Messi? + auto_parameters: false + topic: general + search_depth: basic + chunks_per_source: 3 + max_results: 1 + time_range: null + start_date: '2025-02-09' + end_date: '2025-12-29' + include_answer: true + include_raw_content: true + include_images: false + include_image_descriptions: false + include_favicon: false + include_domains: [] + exclude_domains: [] + country: null + description: Parameters for the Tavily Search request. + codeSamples: + - label: Python SDK + lang: python + source: |- + from tavily import TavilyClient + + tavily_client = TavilyClient(api_key="tvly-YOUR_API_KEY") + response = tavily_client.search("Who is Leo Messi?") + + print(response) + - label: JavaScript SDK + lang: javascript + source: |- + const { tavily } = require("@tavily/core"); + + const tvly = tavily({ apiKey: "tvly-YOUR_API_KEY" }); + const response = await tvly.search("Who is Leo Messi?"); + + console.log(response); + response: + '200': + application/json: + schemaArray: + - type: object + properties: + query: + allOf: + - type: string + description: The search query that was executed. + example: Who is Leo Messi? + answer: + allOf: + - type: string + description: >- + A short answer to the user's query, generated by an LLM. + Included in the response only if `include_answer` is + requested (i.e., set to `true`, `basic`, or `advanced`) + example: >- + Lionel Messi, born in 1987, is an Argentine footballer + widely regarded as one of the greatest players of his + generation. He spent the majority of his career playing + for FC Barcelona, where he won numerous domestic league + titles and UEFA Champions League titles. Messi is known + for his exceptional dribbling skills, vision, and + goal-scoring ability. He has won multiple FIFA Ballon d'Or + awards, numerous La Liga titles with Barcelona, and holds + the record for most goals scored in a calendar year. In + 2014, he led Argentina to the World Cup final, and in + 2015, he helped Barcelona capture another treble. Despite + turning 36 in June, Messi remains highly influential in + the sport. + images: + allOf: + - type: array + description: >- + List of query-related images. If + `include_image_descriptions` is true, each item will have + `url` and `description`. + example: [] + items: + type: object + properties: + url: + type: string + description: + type: string + results: + allOf: + - type: array + description: A list of sorted search results, ranked by relevancy. + items: + type: object + properties: + title: + type: string + description: The title of the search result. + example: Lionel Messi Facts | Britannica + url: + type: string + description: The URL of the search result. + example: https://www.britannica.com/facts/Lionel-Messi + content: + type: string + description: A short description of the search result. + example: >- + Lionel Messi, an Argentine footballer, is widely + regarded as one of the greatest football players of + his generation. Born in 1987, Messi spent the + majority of his career playing for Barcelona, where + he won numerous domestic league titles and UEFA + Champions League titles. Messi is known for his + exceptional dribbling skills, vision, and goal + score: + type: number + format: float + description: The relevance score of the search result. + example: 0.81025416 + raw_content: + type: string + description: >- + The cleaned and parsed HTML content of the search + result. Only if `include_raw_content` is true. + example: null + favicon: + type: string + description: The favicon URL for the result. + example: https://britannica.com/favicon.png + auto_parameters: + allOf: + - type: object + description: >- + A dictionary of the selected auto_parameters, only shown + when `auto_parameters` is true. + example: + topic: general + search_depth: basic + response_time: + allOf: + - type: number + format: float + description: Time in seconds it took to complete the request. + example: '1.67' + request_id: + allOf: + - type: string + description: >- + A unique request identifier you can share with customer + support to help resolve issues with specific requests. + example: 123e4567-e89b-12d3-a456-426614174111 + requiredProperties: + - query + - results + - images + - response_time + - answer + examples: + example: + value: + query: Who is Leo Messi? + answer: >- + Lionel Messi, born in 1987, is an Argentine footballer widely + regarded as one of the greatest players of his generation. He + spent the majority of his career playing for FC Barcelona, where + he won numerous domestic league titles and UEFA Champions League + titles. Messi is known for his exceptional dribbling skills, + vision, and goal-scoring ability. He has won multiple FIFA + Ballon d'Or awards, numerous La Liga titles with Barcelona, and + holds the record for most goals scored in a calendar year. In + 2014, he led Argentina to the World Cup final, and in 2015, he + helped Barcelona capture another treble. Despite turning 36 in + June, Messi remains highly influential in the sport. + images: [] + results: + - title: Lionel Messi Facts | Britannica + url: https://www.britannica.com/facts/Lionel-Messi + content: >- + Lionel Messi, an Argentine footballer, is widely regarded as + one of the greatest football players of his generation. Born + in 1987, Messi spent the majority of his career playing for + Barcelona, where he won numerous domestic league titles and + UEFA Champions League titles. Messi is known for his + exceptional dribbling skills, vision, and goal + score: 0.81025416 + raw_content: null + favicon: https://britannica.com/favicon.png + auto_parameters: + topic: general + search_depth: basic + response_time: '1.67' + request_id: 123e4567-e89b-12d3-a456-426614174111 + description: Search results returned successfully + '400': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + <400 Bad Request, (e.g Invalid topic. Must be 'general' or + 'news'.)> + description: Bad Request - Your request is invalid. + '401': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: 'Unauthorized: missing or invalid API key.' + description: Unauthorized - Your API key is wrong or missing. + '429': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + Your request has been blocked due to excessive requests. + Please reduce rate of requests. + description: Too many requests - Rate limit exceeded + '432': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + <432 Custom Forbidden Error (e.g This request exceeds your + plan's set usage limit. Please upgrade your plan or contact + support@tavily.com)> + description: Key limit or Plan Limit exceeded + '433': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: >- + This request exceeds the pay-as-you-go limit. You can increase + your limit on the Tavily dashboard. + description: PayGo limit exceeded + '500': + application/json: + schemaArray: + - type: object + properties: + detail: + allOf: + - type: object + properties: + error: + type: string + examples: + example: + value: + detail: + error: Internal Server Error + description: Internal Server Error - We had a problem with our server. + deprecated: false + type: path +components: + schemas: {} + +```` diff --git a/modules/tool/packages/tavily/children/search/config.ts b/modules/tool/packages/tavily/children/search/config.ts index 978cfe33..9686d51d 100644 --- a/modules/tool/packages/tavily/children/search/config.ts +++ b/modules/tool/packages/tavily/children/search/config.ts @@ -13,6 +13,146 @@ export default defineTool({ toolDescription: 'Search the web with AI-powered relevance ranking and answer generation.', versionList: [ + { + value: '0.1.1', + description: '支持高级配置', + inputs: [ + { + key: 'query', + label: '搜索内容', + description: '要搜索的内容', + required: true, + valueType: WorkflowIOValueTypeEnum.string, + renderTypeList: [FlowNodeInputTypeEnum.reference, FlowNodeInputTypeEnum.input], + toolDescription: 'The search query string' + }, + { + key: 'searchDepth', + label: '搜索深度', + description: '基础搜索 (1 credit) | 高级搜索 (2 credits)', + valueType: WorkflowIOValueTypeEnum.string, + defaultValue: 'basic', + renderTypeList: [FlowNodeInputTypeEnum.select], + list: [ + { label: '基础', value: 'basic' }, + { label: '高级', value: 'advanced' } + ] + }, + { + key: 'maxResults', + label: '最大结果数', + description: '返回的最大搜索结果数量 (1-20)', + valueType: WorkflowIOValueTypeEnum.number, + defaultValue: 10, + min: 1, + max: 20, + renderTypeList: [FlowNodeInputTypeEnum.numberInput] + }, + { + key: 'includeAnswer', + label: '生成 AI 摘要', + description: '是否生成 AI 摘要答案', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.switch] + }, + { + key: 'searchTopic', + label: '搜索主题', + description: '搜索主题', + valueType: WorkflowIOValueTypeEnum.string, + defaultValue: 'general', + renderTypeList: [FlowNodeInputTypeEnum.select], + list: [ + { label: '通用', value: 'general' }, + { label: '新闻', value: 'news' }, + { label: '经济', value: 'finance' } + ] + }, + { + key: 'includeRawContent', + label: '包含原始内容', + description: '是否包含原始内容', + valueType: WorkflowIOValueTypeEnum.string, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.select], + list: [ + { label: '不包含', value: 'none' }, + { label: 'Text 格式', value: 'text' }, + { label: 'Markdown 格式', value: 'markdown' } + ] + }, + { + key: 'timeRange', + label: '时间范围', + description: '搜索时间范围', + valueType: WorkflowIOValueTypeEnum.string, + defaultValue: 'none', + renderTypeList: [FlowNodeInputTypeEnum.select], + list: [ + { label: '不限', value: 'none' }, + { label: '最近一天', value: 'day' }, + { label: '最近一周', value: 'week' }, + { label: '最近一月', value: 'month' }, + { label: '最近一年', value: 'year' } + ] + }, + { + key: 'includeImages', + label: '包含图片', + description: '是否包含图片', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.switch] + }, + { + key: 'includeImageDescriptions', + label: '包含图片描述', + description: '是否包含图片描述', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.switch] + }, + { + key: 'includeFavicon', + label: '包含 favicon', + description: '是否包含 favicon', + valueType: WorkflowIOValueTypeEnum.boolean, + defaultValue: false, + renderTypeList: [FlowNodeInputTypeEnum.switch] + }, + { + key: 'includeDomains', + label: '包含的域名', + description: '搜索结果中包含的域名', + valueType: WorkflowIOValueTypeEnum.arrayString, + defaultValue: [], + renderTypeList: [FlowNodeInputTypeEnum.input, FlowNodeInputTypeEnum.reference] + }, + { + key: 'excludeDomains', + label: '排除的域名', + description: '搜索结果中排除的域名', + valueType: WorkflowIOValueTypeEnum.arrayString, + defaultValue: [], + renderTypeList: [FlowNodeInputTypeEnum.input, FlowNodeInputTypeEnum.reference] + } + ], + outputs: [ + { + key: 'answer', + label: 'AI 摘要', + description: 'AI 生成的答案摘要', + valueType: WorkflowIOValueTypeEnum.string + }, + { + key: 'results', + label: '搜索结果', + description: '结构化的搜索结果数组', + valueType: WorkflowIOValueTypeEnum.arrayObject + } + ] + }, { value: '0.1.0', description: 'Initial version with basic and advanced search', diff --git a/modules/tool/packages/tavily/children/search/src/index.ts b/modules/tool/packages/tavily/children/search/src/index.ts index 3b03eec6..b8000e22 100644 --- a/modules/tool/packages/tavily/children/search/src/index.ts +++ b/modules/tool/packages/tavily/children/search/src/index.ts @@ -8,7 +8,15 @@ export const InputType = z.object({ query: z.string().min(1, 'Search query cannot be empty'), searchDepth: z.enum(['basic', 'advanced']).default('basic'), maxResults: z.number().int().min(1).max(20).default(5), - includeAnswer: z.boolean().default(false) + includeAnswer: z.boolean().default(false), + searchTopic: z.enum(['general', 'news', 'finance']).default('general'), + includeRawContent: z.enum(['none', 'text', 'markdown']).default('none'), + timeRange: z.enum(['none', 'day', 'week', 'month', 'year']).default('none'), + includeImages: z.boolean().default(false), + includeImageDescriptions: z.boolean().default(false), + includeFavicon: z.boolean().default(false), + includeDomains: z.array(z.string()).default([]), + excludeDomains: z.array(z.string()).default([]) }); // 输出类型 @@ -31,7 +39,15 @@ export async function tool({ query, searchDepth, maxResults, - includeAnswer + includeAnswer, + searchTopic, + includeRawContent, + timeRange, + includeImages, + includeImageDescriptions, + includeFavicon, + includeDomains, + excludeDomains }: z.infer): Promise> { try { // 1. 验证 API Key @@ -46,7 +62,15 @@ export async function tool({ query, search_depth: searchDepth, max_results: maxResults, - include_answer: includeAnswer + include_answer: includeAnswer, + include_domains: includeDomains, + exclude_domains: excludeDomains, + include_images: includeImages, + include_image_descriptions: includeImageDescriptions, + include_favicon: includeFavicon, + include_raw_content: includeRawContent === 'none' ? false : includeRawContent, + time_range: timeRange === 'none' ? undefined : timeRange, + topic: searchTopic }; // 4. 发送请求 diff --git a/modules/tool/packages/tavily/children/search/test/index.test.ts b/modules/tool/packages/tavily/children/search/test/index.test.ts index 11fc7354..ccf26aa2 100644 --- a/modules/tool/packages/tavily/children/search/test/index.test.ts +++ b/modules/tool/packages/tavily/children/search/test/index.test.ts @@ -44,7 +44,15 @@ describe('Tavily Search Tool', () => { query: 'test query', searchDepth: 'basic', maxResults: 5, - includeAnswer: true + includeAnswer: true, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }); expect(result.answer).toBe('Test answer'); @@ -56,7 +64,15 @@ describe('Tavily Search Tool', () => { query: 'test query', search_depth: 'basic', max_results: 5, - include_answer: true + include_answer: true, + topic: 'general', + include_raw_content: false, + time_range: undefined, + include_images: false, + include_image_descriptions: false, + include_favicon: false, + include_domains: [], + exclude_domains: [] }); }); @@ -88,7 +104,15 @@ describe('Tavily Search Tool', () => { query: 'advanced query', searchDepth: 'advanced', maxResults: 10, - includeAnswer: false + includeAnswer: false, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }); expect(result.results).toHaveLength(1); @@ -98,7 +122,15 @@ describe('Tavily Search Tool', () => { query: 'advanced query', search_depth: 'advanced', max_results: 10, - include_answer: false + include_answer: false, + topic: 'general', + include_raw_content: false, + time_range: undefined, + include_images: false, + include_image_descriptions: false, + include_favicon: false, + include_domains: [], + exclude_domains: [] }); }); @@ -122,7 +154,15 @@ describe('Tavily Search Tool', () => { query: 'no results query', searchDepth: 'basic', maxResults: 5, - includeAnswer: false + includeAnswer: false, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }); expect(result.results).toEqual([]); @@ -140,7 +180,15 @@ describe('Tavily Search Tool', () => { query: 'test', searchDepth: 'basic', maxResults: 5, - includeAnswer: false + includeAnswer: false, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }) ).rejects.toMatch('Invalid Tavily API key format'); }); @@ -168,7 +216,15 @@ describe('Tavily Search Tool', () => { query: 'test', searchDepth: 'basic', maxResults: 5, - includeAnswer: false + includeAnswer: false, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }) ).rejects.toMatch('Authentication failed'); }); @@ -196,7 +252,15 @@ describe('Tavily Search Tool', () => { query: 'test', searchDepth: 'basic', maxResults: 5, - includeAnswer: false + includeAnswer: false, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }) ).rejects.toMatch('Rate limit exceeded'); }); @@ -222,7 +286,15 @@ describe('Tavily Search Tool', () => { query: 'test', searchDepth: 'basic', maxResults: 5, - includeAnswer: false + includeAnswer: false, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }) ).rejects.toMatch('Request timeout'); }); @@ -240,7 +312,15 @@ describe('Tavily Search Tool', () => { query: 'TypeScript programming language', searchDepth: 'basic', maxResults: 3, - includeAnswer: false + includeAnswer: false, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }); expect(result.results.length).toBeGreaterThan(0); @@ -260,7 +340,15 @@ describe('Tavily Search Tool', () => { query: 'What is artificial intelligence?', searchDepth: 'basic', maxResults: 5, - includeAnswer: true + includeAnswer: true, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }); expect(result.answer).toBeDefined(); @@ -278,7 +366,15 @@ describe('Tavily Search Tool', () => { query: 'latest developments in quantum computing', searchDepth: 'advanced', maxResults: 5, - includeAnswer: true + includeAnswer: true, + searchTopic: 'general', + includeRawContent: 'none', + timeRange: 'none', + includeImages: false, + includeImageDescriptions: false, + includeFavicon: false, + includeDomains: [], + excludeDomains: [] }); expect(result.results.length).toBeGreaterThan(0); diff --git a/modules/tool/packages/tavily/config.ts b/modules/tool/packages/tavily/config.ts index 95ae4e77..c193a5f5 100644 --- a/modules/tool/packages/tavily/config.ts +++ b/modules/tool/packages/tavily/config.ts @@ -14,13 +14,14 @@ export default defineToolSet({ toolDescription: `A Tavily AI search toolset with SEARCH and EXTRACT operations. Use these tools to perform AI-powered web searches with advanced filtering and extract structured content from web pages.`, - + courseUrl: 'https://app.tavily.com', // 共享密钥配置 - 所有子工具共享 secretInputConfig: [ { key: 'tavilyApiKey', label: 'Tavily API Key', - description: 'Tavily API 密钥 (格式: tvly-xxxxxxxxxxxxxxxxxxxxxxxx)', + description: + 'Tavily API 密钥 (格式: tvly-xxxxxxxxxxxxxxxxxxxxxxxx), 在 https://app.tavily.com 获取', required: true, inputType: 'secret' } diff --git a/modules/tool/packages/tavily/types.ts b/modules/tool/packages/tavily/types.ts index 8a936e5f..9601f7a6 100644 --- a/modules/tool/packages/tavily/types.ts +++ b/modules/tool/packages/tavily/types.ts @@ -4,9 +4,22 @@ export interface SearchRequest { api_key: string; query: string; + auto_parameters?: boolean; + topic?: 'general' | 'news' | 'finance'; search_depth?: 'basic' | 'advanced'; + chunks_per_source?: number; max_results?: number; - include_answer?: boolean; + time_range?: 'day' | 'week' | 'month' | 'year' | 'd' | 'w' | 'm' | 'y'; + start_date?: string; + end_date?: string; + include_answer?: boolean | 'basic' | 'advanced'; + include_raw_content?: boolean | 'markdown' | 'text'; + include_images?: boolean; + include_image_descriptions?: boolean; + include_favicon?: boolean; + include_domains?: string[]; + exclude_domains?: string[]; + country?: string; } /** @@ -42,6 +55,10 @@ export interface ExtractRequest { api_key: string; urls: string | string[]; format?: 'markdown' | 'text'; + extract_depth?: 'basic' | 'advanced'; + include_images?: boolean; + include_favicon?: boolean; + timeout?: number; } /** @@ -65,3 +82,72 @@ export interface ExtractResponse { response_time: number; request_id: string; } + +/** + * 爬取请求参数 + */ +export interface CrawlRequest { + api_key: string; + url: string; + instructions?: string; + max_depth?: number; + max_breadth?: number; + limit?: number; + select_paths?: string[]; + select_domains?: string[]; + exclude_paths?: string[]; + exclude_domains?: string[]; + allow_external?: boolean; + include_images?: boolean; + extract_depth?: 'basic' | 'advanced'; + format?: 'markdown' | 'text'; + include_favicon?: boolean; + timeout?: number; +} + +/** + * 爬取结果项 + */ +export interface CrawlResult { + url: string; + raw_content: string; + favicon?: string; +} + +/** + * 爬取响应 + */ +export interface CrawlResponse { + base_url: string; + results: CrawlResult[]; + response_time: number; + request_id: string; +} + +/** + * 映射请求参数 + */ +export interface MapRequest { + api_key: string; + url: string; + instructions?: string; + max_depth?: number; + max_breadth?: number; + limit?: number; + select_paths?: string[]; + select_domains?: string[]; + exclude_paths?: string[]; + exclude_domains?: string[]; + allow_external?: boolean; + timeout?: number; +} + +/** + * 映射响应 + */ +export interface MapResponse { + base_url: string; + results: string[]; + response_time: number; + request_id: string; +}